feat: improved AEC, keyboard shortcuts, dedup participants, dev-fast profile
Some checks failed
Build Release Binaries / build-amd64 (push) Failing after 3m40s
Some checks failed
Build Release Binaries / build-amd64 (push) Failing after 3m40s
AEC improvements: - Reduce echo tail from 100ms to 30ms (3.3x faster, suited for laptops) - Add double-talk detection: freeze adaptation when near-end speaks - Add residual echo suppression - Disable AEC by default in --android mode (macOS has built-in AEC) CLI features: - Keyboard shortcuts: m=mic mute, s=speaker mute, q=quit (raw terminal mode) - Dedup participants in RoomUpdate display (same fingerprint+alias shown once) - Add dev-fast profile (opt-level 2 with incremental compilation) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
181
Cargo.lock
generated
181
Cargo.lock
generated
@@ -291,6 +291,12 @@ dependencies = [
|
||||
"tower-service",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "base16ct"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf"
|
||||
|
||||
[[package]]
|
||||
name = "base64"
|
||||
version = "0.22.1"
|
||||
@@ -461,6 +467,7 @@ dependencies = [
|
||||
"iana-time-zone",
|
||||
"js-sys",
|
||||
"num-traits",
|
||||
"serde",
|
||||
"wasm-bindgen",
|
||||
"windows-link",
|
||||
]
|
||||
@@ -621,6 +628,24 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crunchy"
|
||||
version = "0.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
|
||||
|
||||
[[package]]
|
||||
name = "crypto-bigint"
|
||||
version = "0.5.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76"
|
||||
dependencies = [
|
||||
"generic-array",
|
||||
"rand_core 0.6.4",
|
||||
"subtle",
|
||||
"zeroize",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crypto-common"
|
||||
version = "0.1.7"
|
||||
@@ -644,6 +669,7 @@ dependencies = [
|
||||
"digest",
|
||||
"fiat-crypto",
|
||||
"rustc_version",
|
||||
"serde",
|
||||
"subtle",
|
||||
"zeroize",
|
||||
]
|
||||
@@ -810,6 +836,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
|
||||
dependencies = [
|
||||
"block-buffer",
|
||||
"const-oid",
|
||||
"crypto-common",
|
||||
"subtle",
|
||||
]
|
||||
@@ -844,6 +871,21 @@ dependencies = [
|
||||
"rustfft",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ecdsa"
|
||||
version = "0.16.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ee27f32b5c5292967d2d4a9d7f1e0b0aed2c15daded5a60300e4abb9d8020bca"
|
||||
dependencies = [
|
||||
"der",
|
||||
"digest",
|
||||
"elliptic-curve",
|
||||
"rfc6979",
|
||||
"serdect",
|
||||
"signature",
|
||||
"spki",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ed25519"
|
||||
version = "2.2.3"
|
||||
@@ -851,6 +893,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "115531babc129696a58c64a4fef0a8bf9e9698629fb97e9e40767d235cfbcd53"
|
||||
dependencies = [
|
||||
"pkcs8",
|
||||
"serde",
|
||||
"signature",
|
||||
]
|
||||
|
||||
@@ -875,6 +918,26 @@ version = "1.15.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
|
||||
|
||||
[[package]]
|
||||
name = "elliptic-curve"
|
||||
version = "0.13.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b5e6043086bf7973472e0c7dff2142ea0b680d30e18d9cc40f267efbf222bd47"
|
||||
dependencies = [
|
||||
"base16ct",
|
||||
"crypto-bigint",
|
||||
"digest",
|
||||
"ff",
|
||||
"generic-array",
|
||||
"group",
|
||||
"pkcs8",
|
||||
"rand_core 0.6.4",
|
||||
"sec1",
|
||||
"serdect",
|
||||
"subtle",
|
||||
"zeroize",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding_rs"
|
||||
version = "0.8.35"
|
||||
@@ -918,6 +981,16 @@ version = "2.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
|
||||
|
||||
[[package]]
|
||||
name = "ff"
|
||||
version = "0.13.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c0b50bfb653653f9ca9095b427bed08ab8d75a137839d9ad64eb11810d5b6393"
|
||||
dependencies = [
|
||||
"rand_core 0.6.4",
|
||||
"subtle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fiat-crypto"
|
||||
version = "0.2.9"
|
||||
@@ -1078,6 +1151,7 @@ checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
|
||||
dependencies = [
|
||||
"typenum",
|
||||
"version_check",
|
||||
"zeroize",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1137,6 +1211,17 @@ version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
|
||||
|
||||
[[package]]
|
||||
name = "group"
|
||||
version = "0.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f0f9ef7462f7c099f518d754361858f86d8a07af53ba9af0fe635bbccb151a63"
|
||||
dependencies = [
|
||||
"ff",
|
||||
"rand_core 0.6.4",
|
||||
"subtle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "h2"
|
||||
version = "0.4.13"
|
||||
@@ -1620,6 +1705,21 @@ dependencies = [
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "k256"
|
||||
version = "0.13.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f6e3919bbaa2945715f0bb6d3934a173d1e9a59ac23767fbaaef277265a7411b"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"ecdsa",
|
||||
"elliptic-curve",
|
||||
"once_cell",
|
||||
"serdect",
|
||||
"sha2",
|
||||
"signature",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.5.0"
|
||||
@@ -2383,6 +2483,16 @@ dependencies = [
|
||||
"web-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rfc6979"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f8dd2a808d456c4a54e300a23e9f5a67e122c3024119acbfd73e3bf664491cb2"
|
||||
dependencies = [
|
||||
"hmac",
|
||||
"subtle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ring"
|
||||
version = "0.17.14"
|
||||
@@ -2561,6 +2671,21 @@ version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||
|
||||
[[package]]
|
||||
name = "sec1"
|
||||
version = "0.7.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d3e97a565f76233a6003f9f5c54be1d9c5bdfa3eccfb189469f11ec4901c47dc"
|
||||
dependencies = [
|
||||
"base16ct",
|
||||
"der",
|
||||
"generic-array",
|
||||
"pkcs8",
|
||||
"serdect",
|
||||
"subtle",
|
||||
"zeroize",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "security-framework"
|
||||
version = "3.7.0"
|
||||
@@ -2665,6 +2790,16 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serdect"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a84f14a19e9a014bb9f4512488d9829a68e04ecabffb0f9904cd1ace94598177"
|
||||
dependencies = [
|
||||
"base16ct",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sha1"
|
||||
version = "0.10.6"
|
||||
@@ -2718,6 +2853,7 @@ version = "2.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de"
|
||||
dependencies = [
|
||||
"digest",
|
||||
"rand_core 0.6.4",
|
||||
]
|
||||
|
||||
@@ -2931,6 +3067,15 @@ version = "0.1.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca"
|
||||
|
||||
[[package]]
|
||||
name = "tiny-keccak"
|
||||
version = "2.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237"
|
||||
dependencies = [
|
||||
"crunchy",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tinystr"
|
||||
version = "0.8.2"
|
||||
@@ -3350,6 +3495,18 @@ version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
|
||||
|
||||
[[package]]
|
||||
name = "uuid"
|
||||
version = "1.23.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5ac8b6f42ead25368cf5b098aeb3dc8a1a2c05a3eee8a9a1a68c640edbfc79d9"
|
||||
dependencies = [
|
||||
"getrandom 0.4.2",
|
||||
"js-sys",
|
||||
"serde_core",
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "valuable"
|
||||
version = "0.1.1"
|
||||
@@ -3389,7 +3546,28 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "warzone-protocol"
|
||||
version = "0.1.0"
|
||||
version = "0.0.38"
|
||||
dependencies = [
|
||||
"base64",
|
||||
"bincode",
|
||||
"bip39",
|
||||
"chacha20poly1305",
|
||||
"chrono",
|
||||
"curve25519-dalek",
|
||||
"ed25519-dalek",
|
||||
"hex",
|
||||
"hkdf",
|
||||
"k256",
|
||||
"rand 0.8.5",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sha2",
|
||||
"thiserror 2.0.18",
|
||||
"tiny-keccak",
|
||||
"uuid",
|
||||
"x25519-dalek",
|
||||
"zeroize",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
@@ -4035,6 +4213,7 @@ dependencies = [
|
||||
"bytes",
|
||||
"chrono",
|
||||
"cpal",
|
||||
"libc",
|
||||
"rustls",
|
||||
"serde",
|
||||
"serde_json",
|
||||
|
||||
@@ -54,6 +54,12 @@ wzp-crypto = { path = "crates/wzp-crypto" }
|
||||
wzp-transport = { path = "crates/wzp-transport" }
|
||||
wzp-client = { path = "crates/wzp-client" }
|
||||
|
||||
# Fast dev profile: optimized but with debug info and incremental compilation.
|
||||
# Use with: cargo run --profile dev-fast
|
||||
[profile.dev-fast]
|
||||
inherits = "dev"
|
||||
opt-level = 2
|
||||
|
||||
# Optimize heavy compute deps even in debug builds —
|
||||
# real-time audio needs < 20ms per frame, impossible unoptimized.
|
||||
[profile.dev.package.nnnoiseless]
|
||||
|
||||
@@ -23,6 +23,7 @@ serde_json = "1"
|
||||
chrono = "0.4"
|
||||
rustls = { version = "0.23", default-features = false, features = ["ring", "std"] }
|
||||
cpal = { version = "0.15", optional = true }
|
||||
libc = "0.2"
|
||||
|
||||
[features]
|
||||
default = []
|
||||
|
||||
@@ -241,7 +241,7 @@ impl CallEncoder {
|
||||
block_id: 0,
|
||||
frame_in_block: 0,
|
||||
timestamp_ms: 0,
|
||||
aec: EchoCanceller::new(48000, 100), // 100 ms echo tail
|
||||
aec: EchoCanceller::new(48000, 30), // 30ms echo tail (laptop/phone)
|
||||
agc: AutoGainControl::new(),
|
||||
silence_detector: SilenceDetector::new(
|
||||
config.silence_threshold_rms,
|
||||
|
||||
@@ -375,7 +375,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
{
|
||||
let audio_opts = AudioOpts {
|
||||
no_denoise: cli.no_denoise || cli.direct_playout,
|
||||
no_aec: cli.no_aec,
|
||||
no_aec: cli.no_aec || cli.direct_playout, // AEC disabled by default — macOS has built-in AEC
|
||||
no_agc: cli.no_agc,
|
||||
no_fec: cli.no_fec,
|
||||
no_silence: cli.no_silence || cli.direct_playout,
|
||||
@@ -642,6 +642,41 @@ async fn run_file_mode(
|
||||
/// QUIC → recv task → jitter buffer → decode tick (20ms) → AudioRing → CPAL playback callback
|
||||
///
|
||||
/// All lock-free: CPAL callbacks use atomic ring buffers, no Mutex on the audio path.
|
||||
/// RAII guard for terminal raw mode. Restores on drop.
|
||||
struct RawModeGuard {
|
||||
orig: libc::termios,
|
||||
}
|
||||
|
||||
impl RawModeGuard {
|
||||
fn enter() -> Option<Self> {
|
||||
unsafe {
|
||||
let mut orig: libc::termios = std::mem::zeroed();
|
||||
if libc::tcgetattr(libc::STDIN_FILENO, &mut orig) != 0 {
|
||||
return None;
|
||||
}
|
||||
let mut raw = orig;
|
||||
// ICANON: character-at-a-time input
|
||||
// ECHO: don't echo typed characters
|
||||
// ISIG: let us handle Ctrl+C as a byte
|
||||
raw.c_lflag &= !(libc::ICANON | libc::ECHO | libc::ISIG);
|
||||
// IXON: disable Ctrl+S/Ctrl+Q flow control so we receive them
|
||||
raw.c_iflag &= !libc::IXON;
|
||||
raw.c_cc[libc::VMIN] = 1;
|
||||
raw.c_cc[libc::VTIME] = 0;
|
||||
libc::tcsetattr(libc::STDIN_FILENO, libc::TCSANOW, &raw);
|
||||
Some(Self { orig })
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for RawModeGuard {
|
||||
fn drop(&mut self) {
|
||||
unsafe {
|
||||
libc::tcsetattr(libc::STDIN_FILENO, libc::TCSANOW, &self.orig);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct AudioOpts {
|
||||
no_denoise: bool,
|
||||
no_aec: bool,
|
||||
@@ -674,6 +709,8 @@ async fn run_live(
|
||||
let farend_ring = StdArc::new(AudioRing::new());
|
||||
|
||||
let running = StdArc::new(AtomicBool::new(true));
|
||||
let mic_muted = StdArc::new(AtomicBool::new(false));
|
||||
let spk_muted = StdArc::new(AtomicBool::new(false));
|
||||
|
||||
// --- Signal handler: set running=false on first Ctrl+C, force-quit on second ---
|
||||
let signal_running = running.clone();
|
||||
@@ -709,6 +746,7 @@ async fn run_live(
|
||||
// --- Send task: poll capture ring → encode → send via async ---
|
||||
let send_transport = transport.clone();
|
||||
let send_running = running.clone();
|
||||
let send_mic_muted = mic_muted.clone();
|
||||
let no_aec = opts.no_aec;
|
||||
let no_agc = opts.no_agc;
|
||||
let _no_fec = opts.no_fec;
|
||||
@@ -745,6 +783,11 @@ async fn run_live(
|
||||
continue;
|
||||
}
|
||||
|
||||
// Mic mute: zero out capture buffer (still encode + send silence to keep stream alive)
|
||||
if send_mic_muted.load(Ordering::Relaxed) {
|
||||
capture_buf.fill(0);
|
||||
}
|
||||
|
||||
// Feed AEC far-end reference: what was played through the speaker.
|
||||
// Must be called BEFORE encode_frame processes the mic signal.
|
||||
if !no_aec {
|
||||
@@ -781,6 +824,7 @@ async fn run_live(
|
||||
// --- Recv + playout ---
|
||||
let recv_transport = transport.clone();
|
||||
let recv_running = running.clone();
|
||||
let recv_spk_muted = spk_muted.clone();
|
||||
let direct_playout = opts.direct_playout;
|
||||
|
||||
// Direct playout: decode on recv, write straight to playout ring (like Android).
|
||||
@@ -826,13 +870,18 @@ async fn run_live(
|
||||
if !no_agc {
|
||||
playout_agc.process_frame(&mut pcm_buf[..n]);
|
||||
}
|
||||
playout_ring.write(&pcm_buf[..n]);
|
||||
// Feed far-end ring for AEC
|
||||
// Always feed AEC (even when speaker muted)
|
||||
farend_ring.write(&pcm_buf[..n]);
|
||||
// Speaker mute: don't write to playout ring
|
||||
if !recv_spk_muted.load(Ordering::Relaxed) {
|
||||
playout_ring.write(&pcm_buf[..n]);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
if let Ok(n) = dec.decode_lost(&mut pcm_buf) {
|
||||
playout_ring.write(&pcm_buf[..n]);
|
||||
if !recv_spk_muted.load(Ordering::Relaxed) {
|
||||
playout_ring.write(&pcm_buf[..n]);
|
||||
}
|
||||
}
|
||||
if packets_received < 10 {
|
||||
warn!("decode error: {e}");
|
||||
@@ -924,9 +973,15 @@ async fn run_live(
|
||||
)
|
||||
.await;
|
||||
match result {
|
||||
Ok(Ok(Some(wzp_proto::SignalMessage::RoomUpdate { count, participants }))) => {
|
||||
info!(count, "room update");
|
||||
for p in &participants {
|
||||
Ok(Ok(Some(wzp_proto::SignalMessage::RoomUpdate { participants, .. }))) => {
|
||||
// Dedup by (fingerprint, alias) — same peer may appear multiple times
|
||||
let mut seen = std::collections::HashSet::new();
|
||||
let unique: Vec<_> = participants
|
||||
.iter()
|
||||
.filter(|p| seen.insert((&p.fingerprint, &p.alias)))
|
||||
.collect();
|
||||
info!(count = unique.len(), "room update");
|
||||
for p in &unique {
|
||||
let name = p
|
||||
.alias
|
||||
.as_deref()
|
||||
@@ -955,12 +1010,68 @@ async fn run_live(
|
||||
}
|
||||
};
|
||||
|
||||
// --- Keyboard task: Ctrl+M = toggle mic mute, Ctrl+S = toggle speaker mute ---
|
||||
let kb_running = running.clone();
|
||||
let kb_mic = mic_muted.clone();
|
||||
let kb_spk = spk_muted.clone();
|
||||
let keyboard_task = async move {
|
||||
use tokio::io::AsyncReadExt;
|
||||
|
||||
// Put terminal in raw mode so we get individual keypresses
|
||||
let _raw_guard = RawModeGuard::enter();
|
||||
|
||||
let mut stdin = tokio::io::stdin();
|
||||
let mut buf = [0u8; 1];
|
||||
loop {
|
||||
if !kb_running.load(Ordering::Relaxed) {
|
||||
break;
|
||||
}
|
||||
match tokio::time::timeout(
|
||||
std::time::Duration::from_millis(200),
|
||||
stdin.read(&mut buf),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(Ok(1)) => match buf[0] {
|
||||
b'm' | b'M' | 0x0D => {
|
||||
// 'm' or Ctrl+M
|
||||
let was = kb_mic.fetch_xor(true, Ordering::SeqCst);
|
||||
let state = if !was { "MUTED" } else { "unmuted" };
|
||||
eprintln!("\r[mic {state}]");
|
||||
}
|
||||
b's' | b'S' | 0x13 => {
|
||||
// 's' or Ctrl+S
|
||||
let was = kb_spk.fetch_xor(true, Ordering::SeqCst);
|
||||
let state = if !was { "MUTED" } else { "unmuted" };
|
||||
eprintln!("\r[speaker {state}]");
|
||||
}
|
||||
0x03 => {
|
||||
// Ctrl+C
|
||||
eprintln!();
|
||||
info!("Ctrl+C received, shutting down...");
|
||||
kb_running.store(false, Ordering::SeqCst);
|
||||
break;
|
||||
}
|
||||
b'q' | b'Q' => {
|
||||
eprintln!("\r[quit]");
|
||||
kb_running.store(false, Ordering::SeqCst);
|
||||
break;
|
||||
}
|
||||
_ => {}
|
||||
},
|
||||
Ok(Ok(_)) | Ok(Err(_)) => break,
|
||||
Err(_) => {} // timeout
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// --- Run all tasks, exit when any finishes (or running flag cleared by Ctrl+C) ---
|
||||
tokio::select! {
|
||||
_ = send_task => info!("send task ended"),
|
||||
_ = recv_task => info!("recv task ended"),
|
||||
_ = playout_task => info!("playout task ended"),
|
||||
_ = signal_task => info!("signal task ended"),
|
||||
_ = keyboard_task => info!("keyboard task ended"),
|
||||
}
|
||||
|
||||
running.store(false, Ordering::SeqCst);
|
||||
|
||||
@@ -1,25 +1,40 @@
|
||||
//! Acoustic Echo Cancellation using NLMS adaptive filter.
|
||||
//! Processes 480-sample (10ms) sub-frames at 48kHz.
|
||||
//!
|
||||
//! Improvements over naive NLMS:
|
||||
//! - Double-talk detection: freezes adaptation when near-end speech dominates,
|
||||
//! preventing the filter from cancelling the local speaker's voice.
|
||||
//! - Short default tail (30ms) tuned for laptops/phones where speaker and mic
|
||||
//! are close together.
|
||||
//! - Residual suppression: attenuates output when echo estimate is confident.
|
||||
|
||||
/// NLMS (Normalized Least Mean Squares) adaptive filter echo canceller.
|
||||
///
|
||||
/// Removes acoustic echo by modelling the echo path between the far-end
|
||||
/// (speaker) signal and the near-end (microphone) signal, then subtracting
|
||||
/// the estimated echo from the near-end in real time.
|
||||
/// NLMS (Normalized Least Mean Squares) adaptive filter echo canceller
|
||||
/// with double-talk detection.
|
||||
pub struct EchoCanceller {
|
||||
filter_coeffs: Vec<f32>,
|
||||
filter_len: usize,
|
||||
far_end_buf: Vec<f32>,
|
||||
far_end_pos: usize,
|
||||
/// NLMS step size (adaptation rate).
|
||||
mu: f32,
|
||||
enabled: bool,
|
||||
/// Running far-end power estimate (for double-talk detection).
|
||||
far_power_avg: f32,
|
||||
/// Running near-end power estimate (for double-talk detection).
|
||||
near_power_avg: f32,
|
||||
/// Smoothing factor for power estimates.
|
||||
power_alpha: f32,
|
||||
/// Double-talk threshold: if near/far power ratio exceeds this,
|
||||
/// freeze adaptation to protect near-end speech.
|
||||
dt_threshold: f32,
|
||||
/// Residual echo suppression factor (0.0 = none, 1.0 = full).
|
||||
suppress: f32,
|
||||
}
|
||||
|
||||
impl EchoCanceller {
|
||||
/// Create a new echo canceller.
|
||||
///
|
||||
/// * `sample_rate` — typically 48000
|
||||
/// * `filter_ms` — echo-tail length in milliseconds (e.g. 100 for 100 ms)
|
||||
/// * `filter_ms` — echo-tail length in milliseconds (30ms recommended for laptops)
|
||||
pub fn new(sample_rate: u32, filter_ms: u32) -> Self {
|
||||
let filter_len = (sample_rate as usize) * (filter_ms as usize) / 1000;
|
||||
Self {
|
||||
@@ -27,8 +42,13 @@ impl EchoCanceller {
|
||||
filter_len,
|
||||
far_end_buf: vec![0.0f32; filter_len],
|
||||
far_end_pos: 0,
|
||||
mu: 0.01,
|
||||
mu: 0.005,
|
||||
enabled: true,
|
||||
far_power_avg: 0.0,
|
||||
near_power_avg: 0.0,
|
||||
power_alpha: 0.01,
|
||||
dt_threshold: 4.0,
|
||||
suppress: 0.6,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -45,9 +65,7 @@ impl EchoCanceller {
|
||||
|
||||
/// Process a near-end (microphone) frame, removing the estimated echo.
|
||||
///
|
||||
/// Returns the echo-return-loss enhancement (ERLE) as a ratio: the RMS of
|
||||
/// the original near-end divided by the RMS of the residual. Values > 1.0
|
||||
/// mean echo was reduced.
|
||||
/// Returns the echo-return-loss enhancement (ERLE) as a ratio.
|
||||
pub fn process_frame(&mut self, nearend: &mut [i16]) -> f32 {
|
||||
if !self.enabled {
|
||||
return 1.0;
|
||||
@@ -56,34 +74,44 @@ impl EchoCanceller {
|
||||
let n = nearend.len();
|
||||
let fl = self.filter_len;
|
||||
|
||||
// Compute frame-level power for double-talk detection.
|
||||
let near_power: f32 = nearend.iter().map(|&s| {
|
||||
let f = s as f32;
|
||||
f * f
|
||||
}).sum::<f32>() / n as f32;
|
||||
|
||||
let far_start = (self.far_end_pos + fl * ((n / fl) + 1) - n) % fl;
|
||||
let far_power: f32 = (0..n).map(|i| {
|
||||
let fe = self.far_end_buf[(far_start + i) % fl];
|
||||
fe * fe
|
||||
}).sum::<f32>() / n as f32;
|
||||
|
||||
// Smooth power estimates
|
||||
self.far_power_avg += self.power_alpha * (far_power - self.far_power_avg);
|
||||
self.near_power_avg += self.power_alpha * (near_power - self.near_power_avg);
|
||||
|
||||
// Double-talk detection: if near-end is much louder than far-end,
|
||||
// the local speaker is active — freeze adaptation.
|
||||
let adapt = if self.far_power_avg < 1.0 {
|
||||
// No far-end signal — nothing to cancel, skip adaptation
|
||||
false
|
||||
} else {
|
||||
let ratio = self.near_power_avg / (self.far_power_avg + 1.0);
|
||||
ratio < self.dt_threshold
|
||||
};
|
||||
|
||||
let mut sum_near_sq: f64 = 0.0;
|
||||
let mut sum_err_sq: f64 = 0.0;
|
||||
|
||||
for i in 0..n {
|
||||
let near_f = nearend[i] as f32;
|
||||
|
||||
// --- estimate echo as dot(coeffs, farend_window) ---
|
||||
// The far-end window for this sample starts at
|
||||
// (far_end_pos - 1 - i) mod filter_len (most recent)
|
||||
// and goes back filter_len samples.
|
||||
// Estimate echo: dot(coeffs, farend_window)
|
||||
let base = (self.far_end_pos + fl * ((n / fl) + 2) + i - n) % fl;
|
||||
|
||||
let mut echo_est: f32 = 0.0;
|
||||
let mut power: f32 = 0.0;
|
||||
|
||||
// Position of the most-recent far-end sample for this near-end sample.
|
||||
// far_end_pos points to the *next write* position, so the most-recent
|
||||
// sample written is at far_end_pos - 1. We have already called
|
||||
// feed_farend for this block, so the relevant samples are the last
|
||||
// filter_len entries ending just before the current write position,
|
||||
// offset by how far we are into this near-end frame.
|
||||
//
|
||||
// For sample i of the near-end frame, the corresponding far-end
|
||||
// "now" is far_end_pos - n + i (wrapping).
|
||||
// far_end_pos points to next-write, so most recent sample is at
|
||||
// far_end_pos - 1. For the i-th near-end sample we want the
|
||||
// far-end "now" to be at (far_end_pos - n + i). We add fl
|
||||
// repeatedly to avoid underflow on the usize subtraction.
|
||||
let base = (self.far_end_pos + fl * ((n / fl) + 2) + i - n) % fl;
|
||||
|
||||
for k in 0..fl {
|
||||
let fe_idx = (base + fl - k) % fl;
|
||||
let fe = self.far_end_buf[fe_idx];
|
||||
@@ -93,27 +121,40 @@ impl EchoCanceller {
|
||||
|
||||
let error = near_f - echo_est;
|
||||
|
||||
// --- NLMS coefficient update ---
|
||||
let norm = power + 1.0; // +1 regularisation to avoid div-by-zero
|
||||
let step = self.mu * error / norm;
|
||||
// NLMS coefficient update — only when not in double-talk
|
||||
if adapt && power > 1.0 {
|
||||
let norm = power + 1.0;
|
||||
let step = self.mu * error / norm;
|
||||
|
||||
for k in 0..fl {
|
||||
let fe_idx = (base + fl - k) % fl;
|
||||
let fe = self.far_end_buf[fe_idx];
|
||||
self.filter_coeffs[k] += step * fe;
|
||||
for k in 0..fl {
|
||||
let fe_idx = (base + fl - k) % fl;
|
||||
let fe = self.far_end_buf[fe_idx];
|
||||
self.filter_coeffs[k] += step * fe;
|
||||
}
|
||||
}
|
||||
|
||||
// Clamp output
|
||||
let out = error.max(-32768.0).min(32767.0);
|
||||
// Residual echo suppression: when far-end is active, attenuate
|
||||
// the residual to reduce perceptible echo.
|
||||
let out = if self.far_power_avg > 100.0 && !adapt {
|
||||
// Double-talk: pass through near-end with minimal suppression
|
||||
error
|
||||
} else if self.far_power_avg > 100.0 {
|
||||
// Far-end active, not double-talk: apply suppression
|
||||
error * (1.0 - self.suppress * (echo_est.abs() / (near_f.abs() + 1.0)).min(1.0))
|
||||
} else {
|
||||
// No far-end: pass through
|
||||
error
|
||||
};
|
||||
|
||||
let out = out.max(-32768.0).min(32767.0);
|
||||
nearend[i] = out as i16;
|
||||
|
||||
sum_near_sq += (near_f as f64) * (near_f as f64);
|
||||
sum_err_sq += (out as f64) * (out as f64);
|
||||
}
|
||||
|
||||
// ERLE ratio
|
||||
if sum_err_sq < 1.0 {
|
||||
return 100.0; // near-perfect cancellation
|
||||
return 100.0;
|
||||
}
|
||||
(sum_near_sq / sum_err_sq).sqrt() as f32
|
||||
}
|
||||
@@ -129,12 +170,12 @@ impl EchoCanceller {
|
||||
}
|
||||
|
||||
/// Reset the adaptive filter to its initial state.
|
||||
///
|
||||
/// Zeroes out all filter coefficients and the far-end circular buffer.
|
||||
pub fn reset(&mut self) {
|
||||
self.filter_coeffs.iter_mut().for_each(|c| *c = 0.0);
|
||||
self.far_end_buf.iter_mut().for_each(|s| *s = 0.0);
|
||||
self.far_end_pos = 0;
|
||||
self.far_power_avg = 0.0;
|
||||
self.near_power_avg = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -144,15 +185,15 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn aec_creates_with_correct_filter_len() {
|
||||
let aec = EchoCanceller::new(48000, 100);
|
||||
assert_eq!(aec.filter_len, 4800);
|
||||
assert_eq!(aec.filter_coeffs.len(), 4800);
|
||||
assert_eq!(aec.far_end_buf.len(), 4800);
|
||||
let aec = EchoCanceller::new(48000, 30);
|
||||
assert_eq!(aec.filter_len, 1440);
|
||||
assert_eq!(aec.filter_coeffs.len(), 1440);
|
||||
assert_eq!(aec.far_end_buf.len(), 1440);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn aec_passthrough_when_disabled() {
|
||||
let mut aec = EchoCanceller::new(48000, 100);
|
||||
let mut aec = EchoCanceller::new(48000, 30);
|
||||
aec.set_enabled(false);
|
||||
assert!(!aec.is_enabled());
|
||||
|
||||
@@ -165,7 +206,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn aec_reset_zeroes_state() {
|
||||
let mut aec = EchoCanceller::new(48000, 10); // short for test speed
|
||||
let mut aec = EchoCanceller::new(48000, 10);
|
||||
let farend: Vec<i16> = (0..480).map(|i| ((i * 37) % 1000) as i16).collect();
|
||||
aec.feed_farend(&farend);
|
||||
|
||||
@@ -178,13 +219,9 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn aec_reduces_echo_of_known_signal() {
|
||||
// Use a small filter for speed. Feed a known far-end signal, then
|
||||
// present the *same* signal as near-end (perfect echo, no room).
|
||||
// After adaptation the output energy should drop.
|
||||
let filter_ms = 5; // 240 taps at 48 kHz
|
||||
let filter_ms = 5;
|
||||
let mut aec = EchoCanceller::new(48000, filter_ms);
|
||||
|
||||
// Generate a simple repeating pattern.
|
||||
let frame_len = 480usize;
|
||||
let make_frame = |offset: usize| -> Vec<i16> {
|
||||
(0..frame_len)
|
||||
@@ -195,18 +232,15 @@ mod tests {
|
||||
.collect()
|
||||
};
|
||||
|
||||
// Warm up the adaptive filter with several frames.
|
||||
let mut last_erle = 1.0f32;
|
||||
for frame_idx in 0..40 {
|
||||
let farend = make_frame(frame_idx * frame_len);
|
||||
aec.feed_farend(&farend);
|
||||
|
||||
// Near-end = exact copy of far-end (pure echo).
|
||||
let mut nearend = farend.clone();
|
||||
last_erle = aec.process_frame(&mut nearend);
|
||||
}
|
||||
|
||||
// After 40 frames the ERLE should be meaningfully > 1.
|
||||
assert!(
|
||||
last_erle > 1.0,
|
||||
"expected ERLE > 1.0 after adaptation, got {last_erle}"
|
||||
@@ -216,13 +250,41 @@ mod tests {
|
||||
#[test]
|
||||
fn aec_silence_passthrough() {
|
||||
let mut aec = EchoCanceller::new(48000, 10);
|
||||
// Feed silence far-end
|
||||
aec.feed_farend(&vec![0i16; 480]);
|
||||
// Near-end is silence too
|
||||
let mut frame = vec![0i16; 480];
|
||||
let erle = aec.process_frame(&mut frame);
|
||||
assert!(erle >= 1.0);
|
||||
// Output should still be silence
|
||||
assert!(frame.iter().all(|&s| s == 0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn aec_preserves_nearend_during_doubletalk() {
|
||||
// When only near-end is active (no far-end), output should
|
||||
// closely match input — the AEC should not suppress speech.
|
||||
let mut aec = EchoCanceller::new(48000, 30);
|
||||
|
||||
let frame_len = 960;
|
||||
let nearend_signal: Vec<i16> = (0..frame_len)
|
||||
.map(|i| {
|
||||
let t = i as f64 / 48000.0;
|
||||
(10000.0 * (2.0 * std::f64::consts::PI * 440.0 * t).sin()) as i16
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Feed silence as far-end
|
||||
aec.feed_farend(&vec![0i16; frame_len]);
|
||||
|
||||
let mut frame = nearend_signal.clone();
|
||||
aec.process_frame(&mut frame);
|
||||
|
||||
// Output energy should be close to input energy (not suppressed)
|
||||
let input_energy: f64 = nearend_signal.iter().map(|&s| (s as f64).powi(2)).sum();
|
||||
let output_energy: f64 = frame.iter().map(|&s| (s as f64).powi(2)).sum();
|
||||
let ratio = output_energy / input_energy;
|
||||
|
||||
assert!(
|
||||
ratio > 0.8,
|
||||
"near-end speech should be preserved, energy ratio = {ratio:.3}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user