feat: macOS VoiceProcessingIO for hardware AEC + delay-compensated NLMS
Some checks failed
Build Release Binaries / build-amd64 (push) Failing after 3m33s
Some checks failed
Build Release Binaries / build-amd64 (push) Failing after 3m33s
- Add --os-aec flag: uses Apple VoiceProcessingIO audio unit for hardware echo cancellation (same engine as FaceTime) - New vpio feature + audio_vpio.rs: combined capture+playback via VPIO - Improved software AEC: delay-compensated leaky NLMS with Geigel DTD (60ms tail, 40ms delay, configurable via --aec-delay) - Add --aec-delay flag for tuning software AEC delay compensation - Add dev-fast Cargo profile (opt-level 2 with incremental compilation) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -4212,6 +4212,7 @@ dependencies = [
|
|||||||
"async-trait",
|
"async-trait",
|
||||||
"bytes",
|
"bytes",
|
||||||
"chrono",
|
"chrono",
|
||||||
|
"coreaudio-rs",
|
||||||
"cpal",
|
"cpal",
|
||||||
"libc",
|
"libc",
|
||||||
"rustls",
|
"rustls",
|
||||||
|
|||||||
@@ -23,11 +23,13 @@ serde_json = "1"
|
|||||||
chrono = "0.4"
|
chrono = "0.4"
|
||||||
rustls = { version = "0.23", default-features = false, features = ["ring", "std"] }
|
rustls = { version = "0.23", default-features = false, features = ["ring", "std"] }
|
||||||
cpal = { version = "0.15", optional = true }
|
cpal = { version = "0.15", optional = true }
|
||||||
|
coreaudio-rs = { version = "0.11", optional = true }
|
||||||
libc = "0.2"
|
libc = "0.2"
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = []
|
default = []
|
||||||
audio = ["cpal"]
|
audio = ["cpal"]
|
||||||
|
vpio = ["coreaudio-rs"]
|
||||||
|
|
||||||
[[bin]]
|
[[bin]]
|
||||||
name = "wzp-client"
|
name = "wzp-client"
|
||||||
|
|||||||
179
crates/wzp-client/src/audio_vpio.rs
Normal file
179
crates/wzp-client/src/audio_vpio.rs
Normal file
@@ -0,0 +1,179 @@
|
|||||||
|
//! macOS Voice Processing I/O — uses Apple's VoiceProcessingIO audio unit
|
||||||
|
//! for hardware-accelerated echo cancellation, AGC, and noise suppression.
|
||||||
|
//!
|
||||||
|
//! VoiceProcessingIO is a combined input+output unit that knows what's going
|
||||||
|
//! to the speaker, so it can cancel the echo from the mic signal internally.
|
||||||
|
//! This is the same engine FaceTime and other Apple apps use.
|
||||||
|
|
||||||
|
use std::sync::atomic::{AtomicBool, Ordering};
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use anyhow::Context;
|
||||||
|
use coreaudio::audio_unit::audio_format::LinearPcmFlags;
|
||||||
|
use coreaudio::audio_unit::render_callback::{self, data};
|
||||||
|
use coreaudio::audio_unit::{AudioUnit, Element, IOType, SampleFormat, Scope, StreamFormat};
|
||||||
|
use coreaudio::sys;
|
||||||
|
use tracing::info;
|
||||||
|
|
||||||
|
use crate::audio_ring::AudioRing;
|
||||||
|
|
||||||
|
/// Number of samples per 20 ms frame at 48 kHz mono.
|
||||||
|
pub const FRAME_SAMPLES: usize = 960;
|
||||||
|
|
||||||
|
/// Combined capture + playback via macOS VoiceProcessingIO.
|
||||||
|
///
|
||||||
|
/// The OS handles AEC internally — no manual far-end feeding needed.
|
||||||
|
pub struct VpioAudio {
|
||||||
|
capture_ring: Arc<AudioRing>,
|
||||||
|
playout_ring: Arc<AudioRing>,
|
||||||
|
_audio_unit: AudioUnit,
|
||||||
|
running: Arc<AtomicBool>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl VpioAudio {
|
||||||
|
/// Start VoiceProcessingIO with AEC enabled.
|
||||||
|
pub fn start() -> Result<Self, anyhow::Error> {
|
||||||
|
let capture_ring = Arc::new(AudioRing::new());
|
||||||
|
let playout_ring = Arc::new(AudioRing::new());
|
||||||
|
let running = Arc::new(AtomicBool::new(true));
|
||||||
|
|
||||||
|
let mut au = AudioUnit::new(IOType::VoiceProcessingIO)
|
||||||
|
.context("failed to create VoiceProcessingIO audio unit")?;
|
||||||
|
|
||||||
|
// Must uninitialize before configuring properties.
|
||||||
|
au.uninitialize()
|
||||||
|
.context("failed to uninitialize VPIO for configuration")?;
|
||||||
|
|
||||||
|
// Enable input (mic) on Element::Input (bus 1).
|
||||||
|
let enable: u32 = 1;
|
||||||
|
au.set_property(
|
||||||
|
sys::kAudioOutputUnitProperty_EnableIO,
|
||||||
|
Scope::Input,
|
||||||
|
Element::Input,
|
||||||
|
Some(&enable),
|
||||||
|
)
|
||||||
|
.context("failed to enable VPIO input")?;
|
||||||
|
|
||||||
|
// Output (speaker) is enabled by default on VPIO, but be explicit.
|
||||||
|
au.set_property(
|
||||||
|
sys::kAudioOutputUnitProperty_EnableIO,
|
||||||
|
Scope::Output,
|
||||||
|
Element::Output,
|
||||||
|
Some(&enable),
|
||||||
|
)
|
||||||
|
.context("failed to enable VPIO output")?;
|
||||||
|
|
||||||
|
// Configure stream format: 48kHz mono f32 non-interleaved
|
||||||
|
let stream_format = StreamFormat {
|
||||||
|
sample_rate: 48_000.0,
|
||||||
|
sample_format: SampleFormat::F32,
|
||||||
|
flags: LinearPcmFlags::IS_FLOAT
|
||||||
|
| LinearPcmFlags::IS_PACKED
|
||||||
|
| LinearPcmFlags::IS_NON_INTERLEAVED,
|
||||||
|
channels: 1,
|
||||||
|
};
|
||||||
|
|
||||||
|
let asbd = stream_format.to_asbd();
|
||||||
|
|
||||||
|
// Input: set format on Output scope of Input element
|
||||||
|
// (= the format the AU delivers to us from the mic)
|
||||||
|
au.set_property(
|
||||||
|
sys::kAudioUnitProperty_StreamFormat,
|
||||||
|
Scope::Output,
|
||||||
|
Element::Input,
|
||||||
|
Some(&asbd),
|
||||||
|
)
|
||||||
|
.context("failed to set input stream format")?;
|
||||||
|
|
||||||
|
// Output: set format on Input scope of Output element
|
||||||
|
// (= the format we feed to the AU for the speaker)
|
||||||
|
au.set_property(
|
||||||
|
sys::kAudioUnitProperty_StreamFormat,
|
||||||
|
Scope::Input,
|
||||||
|
Element::Output,
|
||||||
|
Some(&asbd),
|
||||||
|
)
|
||||||
|
.context("failed to set output stream format")?;
|
||||||
|
|
||||||
|
// Set up input callback (mic capture with AEC applied)
|
||||||
|
let cap_ring = capture_ring.clone();
|
||||||
|
let cap_running = running.clone();
|
||||||
|
let logged = Arc::new(AtomicBool::new(false));
|
||||||
|
au.set_input_callback(
|
||||||
|
move |args: render_callback::Args<data::NonInterleaved<f32>>| {
|
||||||
|
if !cap_running.load(Ordering::Relaxed) {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
let mut buffers = args.data.channels();
|
||||||
|
if let Some(ch) = buffers.next() {
|
||||||
|
if !logged.swap(true, Ordering::Relaxed) {
|
||||||
|
eprintln!("[vpio] capture callback: {} f32 samples", ch.len());
|
||||||
|
}
|
||||||
|
let mut tmp = [0i16; FRAME_SAMPLES];
|
||||||
|
for chunk in ch.chunks(FRAME_SAMPLES) {
|
||||||
|
let n = chunk.len();
|
||||||
|
for i in 0..n {
|
||||||
|
tmp[i] = (chunk[i].clamp(-1.0, 1.0) * i16::MAX as f32) as i16;
|
||||||
|
}
|
||||||
|
cap_ring.write(&tmp[..n]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.context("failed to set input callback")?;
|
||||||
|
|
||||||
|
// Set up output callback (speaker playback — AEC uses this as reference)
|
||||||
|
let play_ring = playout_ring.clone();
|
||||||
|
au.set_render_callback(
|
||||||
|
move |mut args: render_callback::Args<data::NonInterleaved<f32>>| {
|
||||||
|
let mut buffers = args.data.channels_mut();
|
||||||
|
if let Some(ch) = buffers.next() {
|
||||||
|
let mut tmp = [0i16; FRAME_SAMPLES];
|
||||||
|
for chunk in ch.chunks_mut(FRAME_SAMPLES) {
|
||||||
|
let n = chunk.len();
|
||||||
|
let read = play_ring.read(&mut tmp[..n]);
|
||||||
|
for i in 0..read {
|
||||||
|
chunk[i] = tmp[i] as f32 / i16::MAX as f32;
|
||||||
|
}
|
||||||
|
for i in read..n {
|
||||||
|
chunk[i] = 0.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.context("failed to set render callback")?;
|
||||||
|
|
||||||
|
au.initialize().context("failed to initialize VoiceProcessingIO")?;
|
||||||
|
au.start().context("failed to start VoiceProcessingIO")?;
|
||||||
|
|
||||||
|
info!("VoiceProcessingIO started (OS-level AEC enabled)");
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
capture_ring,
|
||||||
|
playout_ring,
|
||||||
|
_audio_unit: au,
|
||||||
|
running,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn capture_ring(&self) -> &Arc<AudioRing> {
|
||||||
|
&self.capture_ring
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn playout_ring(&self) -> &Arc<AudioRing> {
|
||||||
|
&self.playout_ring
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn stop(&self) {
|
||||||
|
self.running.store(false, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for VpioAudio {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
self.stop();
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -42,6 +42,9 @@ pub struct CallConfig {
|
|||||||
/// When enabled, only every 50th frame carries a full 12-byte MediaHeader;
|
/// When enabled, only every 50th frame carries a full 12-byte MediaHeader;
|
||||||
/// intermediate frames use a compact 4-byte MiniHeader.
|
/// intermediate frames use a compact 4-byte MiniHeader.
|
||||||
pub mini_frames_enabled: bool,
|
pub mini_frames_enabled: bool,
|
||||||
|
/// AEC far-end delay compensation in milliseconds (default: 40).
|
||||||
|
/// Compensates for the round-trip audio latency from playout to mic capture.
|
||||||
|
pub aec_delay_ms: u32,
|
||||||
/// Enable adaptive jitter buffer (default: true).
|
/// Enable adaptive jitter buffer (default: true).
|
||||||
///
|
///
|
||||||
/// When true, the jitter buffer target depth is automatically adjusted
|
/// When true, the jitter buffer target depth is automatically adjusted
|
||||||
@@ -63,6 +66,7 @@ impl Default for CallConfig {
|
|||||||
noise_suppression: true,
|
noise_suppression: true,
|
||||||
mini_frames_enabled: true,
|
mini_frames_enabled: true,
|
||||||
adaptive_jitter: true,
|
adaptive_jitter: true,
|
||||||
|
aec_delay_ms: 40,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -241,7 +245,7 @@ impl CallEncoder {
|
|||||||
block_id: 0,
|
block_id: 0,
|
||||||
frame_in_block: 0,
|
frame_in_block: 0,
|
||||||
timestamp_ms: 0,
|
timestamp_ms: 0,
|
||||||
aec: EchoCanceller::new(48000, 30), // 30ms echo tail (laptop/phone)
|
aec: EchoCanceller::with_delay(48000, 60, config.aec_delay_ms),
|
||||||
agc: AutoGainControl::new(),
|
agc: AutoGainControl::new(),
|
||||||
silence_detector: SilenceDetector::new(
|
silence_detector: SilenceDetector::new(
|
||||||
config.silence_threshold_rms,
|
config.silence_threshold_rms,
|
||||||
|
|||||||
@@ -53,6 +53,8 @@ struct CliArgs {
|
|||||||
no_fec: bool,
|
no_fec: bool,
|
||||||
no_silence: bool,
|
no_silence: bool,
|
||||||
direct_playout: bool,
|
direct_playout: bool,
|
||||||
|
aec_delay_ms: Option<u32>,
|
||||||
|
os_aec: bool,
|
||||||
token: Option<String>,
|
token: Option<String>,
|
||||||
_metrics_file: Option<String>,
|
_metrics_file: Option<String>,
|
||||||
}
|
}
|
||||||
@@ -130,6 +132,8 @@ fn parse_args() -> CliArgs {
|
|||||||
let mut no_fec = false;
|
let mut no_fec = false;
|
||||||
let mut no_silence = false;
|
let mut no_silence = false;
|
||||||
let mut direct_playout = false;
|
let mut direct_playout = false;
|
||||||
|
let mut aec_delay_ms = None;
|
||||||
|
let mut os_aec = false;
|
||||||
let mut token = None;
|
let mut token = None;
|
||||||
let mut metrics_file = None;
|
let mut metrics_file = None;
|
||||||
let mut relay_str = None;
|
let mut relay_str = None;
|
||||||
@@ -181,6 +185,16 @@ fn parse_args() -> CliArgs {
|
|||||||
"--no-fec" => no_fec = true,
|
"--no-fec" => no_fec = true,
|
||||||
"--no-silence" => no_silence = true,
|
"--no-silence" => no_silence = true,
|
||||||
"--direct-playout" | "--android" => direct_playout = true,
|
"--direct-playout" | "--android" => direct_playout = true,
|
||||||
|
"--os-aec" => os_aec = true,
|
||||||
|
"--aec-delay" => {
|
||||||
|
i += 1;
|
||||||
|
aec_delay_ms = Some(
|
||||||
|
args.get(i)
|
||||||
|
.expect("--aec-delay requires milliseconds")
|
||||||
|
.parse()
|
||||||
|
.expect("--aec-delay value must be a number"),
|
||||||
|
);
|
||||||
|
}
|
||||||
"--alias" => {
|
"--alias" => {
|
||||||
i += 1;
|
i += 1;
|
||||||
alias = Some(args.get(i).expect("--alias requires a name").to_string());
|
alias = Some(args.get(i).expect("--alias requires a name").to_string());
|
||||||
@@ -246,7 +260,9 @@ fn parse_args() -> CliArgs {
|
|||||||
eprintln!(" --no-fec Disable forward error correction");
|
eprintln!(" --no-fec Disable forward error correction");
|
||||||
eprintln!(" --no-silence Disable silence suppression");
|
eprintln!(" --no-silence Disable silence suppression");
|
||||||
eprintln!(" --direct-playout Bypass jitter buffer (decode on recv, like Android)");
|
eprintln!(" --direct-playout Bypass jitter buffer (decode on recv, like Android)");
|
||||||
eprintln!(" --android Alias for --no-denoise --no-aec --no-silence --direct-playout");
|
eprintln!(" --aec-delay <ms> AEC far-end delay compensation (default: 40ms)");
|
||||||
|
eprintln!(" --os-aec Use macOS VoiceProcessingIO for hardware AEC (requires --vpio feature)");
|
||||||
|
eprintln!(" --android Alias for --no-denoise --no-silence --direct-playout");
|
||||||
eprintln!(" --token <token> featherChat bearer token for relay auth");
|
eprintln!(" --token <token> featherChat bearer token for relay auth");
|
||||||
eprintln!(" --metrics-file <path> Write JSONL telemetry to file (1 line/sec)");
|
eprintln!(" --metrics-file <path> Write JSONL telemetry to file (1 line/sec)");
|
||||||
eprintln!(" (48kHz mono s16le, play with ffplay -f s16le -ar 48000 -ch_layout mono file.raw)");
|
eprintln!(" (48kHz mono s16le, play with ffplay -f s16le -ar 48000 -ch_layout mono file.raw)");
|
||||||
@@ -292,6 +308,8 @@ fn parse_args() -> CliArgs {
|
|||||||
no_fec,
|
no_fec,
|
||||||
no_silence,
|
no_silence,
|
||||||
direct_playout,
|
direct_playout,
|
||||||
|
aec_delay_ms,
|
||||||
|
os_aec,
|
||||||
token,
|
token,
|
||||||
_metrics_file: metrics_file,
|
_metrics_file: metrics_file,
|
||||||
}
|
}
|
||||||
@@ -375,11 +393,13 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
{
|
{
|
||||||
let audio_opts = AudioOpts {
|
let audio_opts = AudioOpts {
|
||||||
no_denoise: cli.no_denoise || cli.direct_playout,
|
no_denoise: cli.no_denoise || cli.direct_playout,
|
||||||
no_aec: cli.no_aec || cli.direct_playout, // AEC disabled by default — macOS has built-in AEC
|
no_aec: cli.no_aec,
|
||||||
no_agc: cli.no_agc,
|
no_agc: cli.no_agc,
|
||||||
no_fec: cli.no_fec,
|
no_fec: cli.no_fec,
|
||||||
no_silence: cli.no_silence || cli.direct_playout,
|
no_silence: cli.no_silence || cli.direct_playout,
|
||||||
direct_playout: cli.direct_playout,
|
direct_playout: cli.direct_playout,
|
||||||
|
aec_delay_ms: cli.aec_delay_ms,
|
||||||
|
os_aec: cli.os_aec,
|
||||||
};
|
};
|
||||||
return run_live(transport, audio_opts).await;
|
return run_live(transport, audio_opts).await;
|
||||||
}
|
}
|
||||||
@@ -684,6 +704,8 @@ struct AudioOpts {
|
|||||||
no_fec: bool,
|
no_fec: bool,
|
||||||
no_silence: bool,
|
no_silence: bool,
|
||||||
direct_playout: bool,
|
direct_playout: bool,
|
||||||
|
aec_delay_ms: Option<u32>,
|
||||||
|
os_aec: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "audio")]
|
#[cfg(feature = "audio")]
|
||||||
@@ -697,15 +719,32 @@ async fn run_live(
|
|||||||
use wzp_client::audio_ring::AudioRing;
|
use wzp_client::audio_ring::AudioRing;
|
||||||
use wzp_client::call::JitterTelemetry;
|
use wzp_client::call::JitterTelemetry;
|
||||||
|
|
||||||
|
// Audio I/O: either VPIO (OS-level AEC) or separate CPAL streams.
|
||||||
|
#[cfg(feature = "vpio")]
|
||||||
|
let vpio;
|
||||||
|
let (capture_ring, playout_ring) = if opts.os_aec {
|
||||||
|
#[cfg(feature = "vpio")]
|
||||||
|
{
|
||||||
|
vpio = wzp_client::audio_vpio::VpioAudio::start()?;
|
||||||
|
(vpio.capture_ring().clone(), vpio.playout_ring().clone())
|
||||||
|
}
|
||||||
|
#[cfg(not(feature = "vpio"))]
|
||||||
|
{
|
||||||
|
anyhow::bail!("--os-aec requires the 'vpio' feature (build with: cargo build --features audio,vpio)");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
let capture = AudioCapture::start()?;
|
let capture = AudioCapture::start()?;
|
||||||
let playback = AudioPlayback::start()?;
|
let playback = AudioPlayback::start()?;
|
||||||
info!("audio I/O started (lock-free ring buffers) — press Ctrl+C to stop");
|
let cr = capture.ring().clone();
|
||||||
|
let pr = playback.ring().clone();
|
||||||
|
// Keep handles alive (streams stop when dropped)
|
||||||
|
std::mem::forget(capture);
|
||||||
|
std::mem::forget(playback);
|
||||||
|
(cr, pr)
|
||||||
|
};
|
||||||
|
info!(os_aec = opts.os_aec, "audio I/O started — press Ctrl+C to stop");
|
||||||
|
|
||||||
let capture_ring = capture.ring().clone();
|
// Far-end reference ring (only used when NOT using OS AEC).
|
||||||
let playout_ring = playback.ring().clone();
|
|
||||||
|
|
||||||
// Far-end reference ring: recv task writes decoded audio here,
|
|
||||||
// send task reads it to feed the AEC echo canceller.
|
|
||||||
let farend_ring = StdArc::new(AudioRing::new());
|
let farend_ring = StdArc::new(AudioRing::new());
|
||||||
|
|
||||||
let running = StdArc::new(AtomicBool::new(true));
|
let running = StdArc::new(AtomicBool::new(true));
|
||||||
@@ -728,6 +767,7 @@ async fn run_live(
|
|||||||
let config = CallConfig {
|
let config = CallConfig {
|
||||||
noise_suppression: !opts.no_denoise,
|
noise_suppression: !opts.no_denoise,
|
||||||
suppression_enabled: !opts.no_silence,
|
suppression_enabled: !opts.no_silence,
|
||||||
|
aec_delay_ms: opts.aec_delay_ms.unwrap_or(40),
|
||||||
..CallConfig::default()
|
..CallConfig::default()
|
||||||
};
|
};
|
||||||
{
|
{
|
||||||
@@ -747,7 +787,7 @@ async fn run_live(
|
|||||||
let send_transport = transport.clone();
|
let send_transport = transport.clone();
|
||||||
let send_running = running.clone();
|
let send_running = running.clone();
|
||||||
let send_mic_muted = mic_muted.clone();
|
let send_mic_muted = mic_muted.clone();
|
||||||
let no_aec = opts.no_aec;
|
let no_aec = opts.no_aec || opts.os_aec; // OS AEC replaces software AEC
|
||||||
let no_agc = opts.no_agc;
|
let no_agc = opts.no_agc;
|
||||||
let _no_fec = opts.no_fec;
|
let _no_fec = opts.no_fec;
|
||||||
let send_farend = farend_ring.clone();
|
let send_farend = farend_ring.clone();
|
||||||
@@ -1075,8 +1115,7 @@ async fn run_live(
|
|||||||
}
|
}
|
||||||
|
|
||||||
running.store(false, Ordering::SeqCst);
|
running.store(false, Ordering::SeqCst);
|
||||||
capture.stop();
|
// Audio streams stop when their handles are dropped (via mem::forget above or VPIO drop).
|
||||||
playback.stop();
|
|
||||||
|
|
||||||
// Give transport 2s to close gracefully, then bail
|
// Give transport 2s to close gracefully, then bail
|
||||||
match tokio::time::timeout(std::time::Duration::from_secs(2), transport.close()).await {
|
match tokio::time::timeout(std::time::Duration::from_secs(2), transport.close()).await {
|
||||||
|
|||||||
@@ -10,6 +10,8 @@
|
|||||||
pub mod audio_io;
|
pub mod audio_io;
|
||||||
#[cfg(feature = "audio")]
|
#[cfg(feature = "audio")]
|
||||||
pub mod audio_ring;
|
pub mod audio_ring;
|
||||||
|
#[cfg(feature = "vpio")]
|
||||||
|
pub mod audio_vpio;
|
||||||
pub mod bench;
|
pub mod bench;
|
||||||
pub mod call;
|
pub mod call;
|
||||||
pub mod drift_test;
|
pub mod drift_test;
|
||||||
|
|||||||
@@ -1,71 +1,127 @@
|
|||||||
//! Acoustic Echo Cancellation using NLMS adaptive filter.
|
//! Acoustic Echo Cancellation — delay-compensated leaky NLMS with
|
||||||
|
//! Geigel double-talk detection.
|
||||||
//!
|
//!
|
||||||
//! Improvements over naive NLMS:
|
//! Key insight: on a laptop, the round-trip audio latency (playout → speaker
|
||||||
//! - Double-talk detection: freezes adaptation when near-end speech dominates,
|
//! → air → mic → capture) is 30–50ms. The far-end reference must be delayed
|
||||||
//! preventing the filter from cancelling the local speaker's voice.
|
//! by this amount so the adaptive filter models the *echo path*, not the
|
||||||
//! - Short default tail (30ms) tuned for laptops/phones where speaker and mic
|
//! *system delay + echo path*.
|
||||||
//! are close together.
|
//!
|
||||||
//! - Residual suppression: attenuates output when echo estimate is confident.
|
//! The leaky coefficient decay prevents the filter from diverging when the
|
||||||
|
//! echo path changes (e.g. hand near laptop) or when the delay estimate
|
||||||
|
//! is slightly off.
|
||||||
|
|
||||||
/// NLMS (Normalized Least Mean Squares) adaptive filter echo canceller
|
/// Delay-compensated leaky NLMS echo canceller with Geigel DTD.
|
||||||
/// with double-talk detection.
|
|
||||||
pub struct EchoCanceller {
|
pub struct EchoCanceller {
|
||||||
filter_coeffs: Vec<f32>,
|
// --- Adaptive filter ---
|
||||||
|
filter: Vec<f32>,
|
||||||
filter_len: usize,
|
filter_len: usize,
|
||||||
far_end_buf: Vec<f32>,
|
/// Circular buffer of far-end reference samples (after delay).
|
||||||
far_end_pos: usize,
|
far_buf: Vec<f32>,
|
||||||
/// NLMS step size (adaptation rate).
|
far_pos: usize,
|
||||||
|
/// NLMS step size.
|
||||||
mu: f32,
|
mu: f32,
|
||||||
|
/// Leakage factor: coefficients are multiplied by (1 - leak) each frame.
|
||||||
|
/// Prevents unbounded growth / divergence. 0.0001 is gentle.
|
||||||
|
leak: f32,
|
||||||
enabled: bool,
|
enabled: bool,
|
||||||
/// Running far-end power estimate (for double-talk detection).
|
|
||||||
far_power_avg: f32,
|
// --- Delay buffer ---
|
||||||
/// Running near-end power estimate (for double-talk detection).
|
/// Raw far-end samples before delay compensation.
|
||||||
near_power_avg: f32,
|
delay_ring: Vec<f32>,
|
||||||
/// Smoothing factor for power estimates.
|
delay_write: usize,
|
||||||
power_alpha: f32,
|
delay_read: usize,
|
||||||
/// Double-talk threshold: if near/far power ratio exceeds this,
|
/// Delay in samples (e.g. 1920 = 40ms at 48kHz).
|
||||||
/// freeze adaptation to protect near-end speech.
|
delay_samples: usize,
|
||||||
dt_threshold: f32,
|
/// Capacity of the delay ring.
|
||||||
/// Residual echo suppression factor (0.0 = none, 1.0 = full).
|
delay_cap: usize,
|
||||||
suppress: f32,
|
|
||||||
|
// --- Double-talk detection (Geigel) ---
|
||||||
|
/// Peak far-end level over the last filter_len samples.
|
||||||
|
far_peak: f32,
|
||||||
|
/// Geigel threshold: if |near| > threshold * far_peak, assume double-talk.
|
||||||
|
geigel_threshold: f32,
|
||||||
|
/// Holdover counter: keep DTD active for a few frames after detection.
|
||||||
|
dtd_holdover: u32,
|
||||||
|
dtd_hold_frames: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl EchoCanceller {
|
impl EchoCanceller {
|
||||||
/// Create a new echo canceller.
|
/// Create a new echo canceller.
|
||||||
///
|
///
|
||||||
/// * `sample_rate` — typically 48000
|
/// * `sample_rate` — typically 48000
|
||||||
/// * `filter_ms` — echo-tail length in milliseconds (30ms recommended for laptops)
|
/// * `filter_ms` — echo-tail length in milliseconds (60ms recommended)
|
||||||
|
/// * `delay_ms` — far-end delay compensation in milliseconds (40ms for laptops)
|
||||||
pub fn new(sample_rate: u32, filter_ms: u32) -> Self {
|
pub fn new(sample_rate: u32, filter_ms: u32) -> Self {
|
||||||
|
Self::with_delay(sample_rate, filter_ms, 40)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_delay(sample_rate: u32, filter_ms: u32, delay_ms: u32) -> Self {
|
||||||
let filter_len = (sample_rate as usize) * (filter_ms as usize) / 1000;
|
let filter_len = (sample_rate as usize) * (filter_ms as usize) / 1000;
|
||||||
|
let delay_samples = (sample_rate as usize) * (delay_ms as usize) / 1000;
|
||||||
|
// Delay ring must hold at least delay_samples + one frame (960) of headroom.
|
||||||
|
let delay_cap = delay_samples + (sample_rate as usize / 10); // +100ms headroom
|
||||||
Self {
|
Self {
|
||||||
filter_coeffs: vec![0.0f32; filter_len],
|
filter: vec![0.0; filter_len],
|
||||||
filter_len,
|
filter_len,
|
||||||
far_end_buf: vec![0.0f32; filter_len],
|
far_buf: vec![0.0; filter_len],
|
||||||
far_end_pos: 0,
|
far_pos: 0,
|
||||||
mu: 0.005,
|
mu: 0.01,
|
||||||
|
leak: 0.0001,
|
||||||
enabled: true,
|
enabled: true,
|
||||||
far_power_avg: 0.0,
|
|
||||||
near_power_avg: 0.0,
|
delay_ring: vec![0.0; delay_cap],
|
||||||
power_alpha: 0.01,
|
delay_write: 0,
|
||||||
dt_threshold: 4.0,
|
delay_read: 0,
|
||||||
suppress: 0.6,
|
delay_samples,
|
||||||
|
delay_cap,
|
||||||
|
|
||||||
|
far_peak: 0.0,
|
||||||
|
geigel_threshold: 0.7,
|
||||||
|
dtd_holdover: 0,
|
||||||
|
dtd_hold_frames: 5,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Feed far-end (speaker/playback) samples into the circular buffer.
|
/// Feed far-end (speaker) samples. These go into the delay buffer first;
|
||||||
///
|
/// once enough samples have accumulated, they are released to the filter's
|
||||||
/// Must be called with the audio that was played out through the speaker
|
/// circular buffer with the correct delay offset.
|
||||||
/// *before* the corresponding near-end frame is processed.
|
|
||||||
pub fn feed_farend(&mut self, farend: &[i16]) {
|
pub fn feed_farend(&mut self, farend: &[i16]) {
|
||||||
|
// Write raw samples into the delay ring.
|
||||||
for &s in farend {
|
for &s in farend {
|
||||||
self.far_end_buf[self.far_end_pos] = s as f32;
|
self.delay_ring[self.delay_write % self.delay_cap] = s as f32;
|
||||||
self.far_end_pos = (self.far_end_pos + 1) % self.filter_len;
|
self.delay_write += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Release delayed samples to the filter's far-end buffer.
|
||||||
|
while self.delay_available() >= 1 {
|
||||||
|
let sample = self.delay_ring[self.delay_read % self.delay_cap];
|
||||||
|
self.delay_read += 1;
|
||||||
|
|
||||||
|
self.far_buf[self.far_pos] = sample;
|
||||||
|
self.far_pos = (self.far_pos + 1) % self.filter_len;
|
||||||
|
|
||||||
|
// Track peak far-end level for Geigel DTD.
|
||||||
|
let abs_s = sample.abs();
|
||||||
|
if abs_s > self.far_peak {
|
||||||
|
self.far_peak = abs_s;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Decay far_peak slowly (avoids stale peak from a loud burst long ago).
|
||||||
|
self.far_peak *= 0.9995;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Number of delayed samples available to release.
|
||||||
|
fn delay_available(&self) -> usize {
|
||||||
|
let buffered = self.delay_write - self.delay_read;
|
||||||
|
if buffered > self.delay_samples {
|
||||||
|
buffered - self.delay_samples
|
||||||
|
} else {
|
||||||
|
0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Process a near-end (microphone) frame, removing the estimated echo.
|
/// Process a near-end (microphone) frame, removing the estimated echo.
|
||||||
///
|
|
||||||
/// Returns the echo-return-loss enhancement (ERLE) as a ratio.
|
|
||||||
pub fn process_frame(&mut self, nearend: &mut [i16]) -> f32 {
|
pub fn process_frame(&mut self, nearend: &mut [i16]) -> f32 {
|
||||||
if !self.enabled {
|
if !self.enabled {
|
||||||
return 1.0;
|
return 1.0;
|
||||||
@@ -74,31 +130,33 @@ impl EchoCanceller {
|
|||||||
let n = nearend.len();
|
let n = nearend.len();
|
||||||
let fl = self.filter_len;
|
let fl = self.filter_len;
|
||||||
|
|
||||||
// Compute frame-level power for double-talk detection.
|
// --- Geigel double-talk detection ---
|
||||||
let near_power: f32 = nearend.iter().map(|&s| {
|
// If any near-end sample exceeds threshold * far_peak, assume
|
||||||
let f = s as f32;
|
// the local speaker is active and freeze adaptation.
|
||||||
f * f
|
let mut is_doubletalk = self.dtd_holdover > 0;
|
||||||
}).sum::<f32>() / n as f32;
|
if !is_doubletalk {
|
||||||
|
let threshold_level = self.geigel_threshold * self.far_peak;
|
||||||
|
for &s in nearend.iter() {
|
||||||
|
if (s as f32).abs() > threshold_level && self.far_peak > 100.0 {
|
||||||
|
is_doubletalk = true;
|
||||||
|
self.dtd_holdover = self.dtd_hold_frames;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if self.dtd_holdover > 0 {
|
||||||
|
self.dtd_holdover -= 1;
|
||||||
|
}
|
||||||
|
|
||||||
let far_start = (self.far_end_pos + fl * ((n / fl) + 1) - n) % fl;
|
// Check if far-end is active (otherwise nothing to cancel).
|
||||||
let far_power: f32 = (0..n).map(|i| {
|
let far_active = self.far_peak > 100.0;
|
||||||
let fe = self.far_end_buf[(far_start + i) % fl];
|
|
||||||
fe * fe
|
|
||||||
}).sum::<f32>() / n as f32;
|
|
||||||
|
|
||||||
// Smooth power estimates
|
// --- Leaky coefficient decay ---
|
||||||
self.far_power_avg += self.power_alpha * (far_power - self.far_power_avg);
|
// Applied once per frame for efficiency.
|
||||||
self.near_power_avg += self.power_alpha * (near_power - self.near_power_avg);
|
let decay = 1.0 - self.leak;
|
||||||
|
for c in self.filter.iter_mut() {
|
||||||
// Double-talk detection: if near-end is much louder than far-end,
|
*c *= decay;
|
||||||
// the local speaker is active — freeze adaptation.
|
}
|
||||||
let adapt = if self.far_power_avg < 1.0 {
|
|
||||||
// No far-end signal — nothing to cancel, skip adaptation
|
|
||||||
false
|
|
||||||
} else {
|
|
||||||
let ratio = self.near_power_avg / (self.far_power_avg + 1.0);
|
|
||||||
ratio < self.dt_threshold
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut sum_near_sq: f64 = 0.0;
|
let mut sum_near_sq: f64 = 0.0;
|
||||||
let mut sum_err_sq: f64 = 0.0;
|
let mut sum_err_sq: f64 = 0.0;
|
||||||
@@ -106,76 +164,62 @@ impl EchoCanceller {
|
|||||||
for i in 0..n {
|
for i in 0..n {
|
||||||
let near_f = nearend[i] as f32;
|
let near_f = nearend[i] as f32;
|
||||||
|
|
||||||
// Estimate echo: dot(coeffs, farend_window)
|
// Position of far-end "now" for this near-end sample.
|
||||||
let base = (self.far_end_pos + fl * ((n / fl) + 2) + i - n) % fl;
|
let base = (self.far_pos + fl * ((n / fl) + 2) + i - n) % fl;
|
||||||
|
|
||||||
|
// --- Echo estimation: dot(filter, far_end_window) ---
|
||||||
let mut echo_est: f32 = 0.0;
|
let mut echo_est: f32 = 0.0;
|
||||||
let mut power: f32 = 0.0;
|
let mut power: f32 = 0.0;
|
||||||
|
|
||||||
for k in 0..fl {
|
for k in 0..fl {
|
||||||
let fe_idx = (base + fl - k) % fl;
|
let fe_idx = (base + fl - k) % fl;
|
||||||
let fe = self.far_end_buf[fe_idx];
|
let fe = self.far_buf[fe_idx];
|
||||||
echo_est += self.filter_coeffs[k] * fe;
|
echo_est += self.filter[k] * fe;
|
||||||
power += fe * fe;
|
power += fe * fe;
|
||||||
}
|
}
|
||||||
|
|
||||||
let error = near_f - echo_est;
|
let error = near_f - echo_est;
|
||||||
|
|
||||||
// NLMS coefficient update — only when not in double-talk
|
// --- NLMS adaptation (only when far-end active & no double-talk) ---
|
||||||
if adapt && power > 1.0 {
|
if far_active && !is_doubletalk && power > 10.0 {
|
||||||
let norm = power + 1.0;
|
let step = self.mu * error / (power + 1.0);
|
||||||
let step = self.mu * error / norm;
|
|
||||||
|
|
||||||
for k in 0..fl {
|
for k in 0..fl {
|
||||||
let fe_idx = (base + fl - k) % fl;
|
let fe_idx = (base + fl - k) % fl;
|
||||||
let fe = self.far_end_buf[fe_idx];
|
self.filter[k] += step * self.far_buf[fe_idx];
|
||||||
self.filter_coeffs[k] += step * fe;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Residual echo suppression: when far-end is active, attenuate
|
let out = error.clamp(-32768.0, 32767.0);
|
||||||
// the residual to reduce perceptible echo.
|
|
||||||
let out = if self.far_power_avg > 100.0 && !adapt {
|
|
||||||
// Double-talk: pass through near-end with minimal suppression
|
|
||||||
error
|
|
||||||
} else if self.far_power_avg > 100.0 {
|
|
||||||
// Far-end active, not double-talk: apply suppression
|
|
||||||
error * (1.0 - self.suppress * (echo_est.abs() / (near_f.abs() + 1.0)).min(1.0))
|
|
||||||
} else {
|
|
||||||
// No far-end: pass through
|
|
||||||
error
|
|
||||||
};
|
|
||||||
|
|
||||||
let out = out.max(-32768.0).min(32767.0);
|
|
||||||
nearend[i] = out as i16;
|
nearend[i] = out as i16;
|
||||||
|
|
||||||
sum_near_sq += (near_f as f64) * (near_f as f64);
|
sum_near_sq += (near_f as f64).powi(2);
|
||||||
sum_err_sq += (out as f64) * (out as f64);
|
sum_err_sq += (out as f64).powi(2);
|
||||||
}
|
}
|
||||||
|
|
||||||
if sum_err_sq < 1.0 {
|
if sum_err_sq < 1.0 {
|
||||||
return 100.0;
|
100.0
|
||||||
}
|
} else {
|
||||||
(sum_near_sq / sum_err_sq).sqrt() as f32
|
(sum_near_sq / sum_err_sq).sqrt() as f32
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Enable or disable echo cancellation.
|
|
||||||
pub fn set_enabled(&mut self, enabled: bool) {
|
pub fn set_enabled(&mut self, enabled: bool) {
|
||||||
self.enabled = enabled;
|
self.enabled = enabled;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns whether echo cancellation is currently enabled.
|
|
||||||
pub fn is_enabled(&self) -> bool {
|
pub fn is_enabled(&self) -> bool {
|
||||||
self.enabled
|
self.enabled
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Reset the adaptive filter to its initial state.
|
|
||||||
pub fn reset(&mut self) {
|
pub fn reset(&mut self) {
|
||||||
self.filter_coeffs.iter_mut().for_each(|c| *c = 0.0);
|
self.filter.iter_mut().for_each(|c| *c = 0.0);
|
||||||
self.far_end_buf.iter_mut().for_each(|s| *s = 0.0);
|
self.far_buf.iter_mut().for_each(|s| *s = 0.0);
|
||||||
self.far_end_pos = 0;
|
self.far_pos = 0;
|
||||||
self.far_power_avg = 0.0;
|
self.far_peak = 0.0;
|
||||||
self.near_power_avg = 0.0;
|
self.delay_ring.iter_mut().for_each(|s| *s = 0.0);
|
||||||
|
self.delay_write = 0;
|
||||||
|
self.delay_read = 0;
|
||||||
|
self.dtd_holdover = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -184,46 +228,40 @@ mod tests {
|
|||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn aec_creates_with_correct_filter_len() {
|
fn creates_with_correct_sizes() {
|
||||||
let aec = EchoCanceller::new(48000, 30);
|
let aec = EchoCanceller::with_delay(48000, 60, 40);
|
||||||
assert_eq!(aec.filter_len, 1440);
|
assert_eq!(aec.filter_len, 2880); // 60ms @ 48kHz
|
||||||
assert_eq!(aec.filter_coeffs.len(), 1440);
|
assert_eq!(aec.delay_samples, 1920); // 40ms @ 48kHz
|
||||||
assert_eq!(aec.far_end_buf.len(), 1440);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn aec_passthrough_when_disabled() {
|
fn passthrough_when_disabled() {
|
||||||
let mut aec = EchoCanceller::new(48000, 30);
|
let mut aec = EchoCanceller::new(48000, 60);
|
||||||
aec.set_enabled(false);
|
aec.set_enabled(false);
|
||||||
assert!(!aec.is_enabled());
|
|
||||||
|
|
||||||
let original: Vec<i16> = (0..480).map(|i| (i * 10) as i16).collect();
|
let original: Vec<i16> = (0..960).map(|i| (i * 10) as i16).collect();
|
||||||
let mut frame = original.clone();
|
let mut frame = original.clone();
|
||||||
let erle = aec.process_frame(&mut frame);
|
aec.process_frame(&mut frame);
|
||||||
assert_eq!(erle, 1.0);
|
|
||||||
assert_eq!(frame, original);
|
assert_eq!(frame, original);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn aec_reset_zeroes_state() {
|
fn silence_passthrough() {
|
||||||
let mut aec = EchoCanceller::new(48000, 10);
|
let mut aec = EchoCanceller::with_delay(48000, 30, 0);
|
||||||
let farend: Vec<i16> = (0..480).map(|i| ((i * 37) % 1000) as i16).collect();
|
aec.feed_farend(&vec![0i16; 960]);
|
||||||
aec.feed_farend(&farend);
|
let mut frame = vec![0i16; 960];
|
||||||
|
aec.process_frame(&mut frame);
|
||||||
aec.reset();
|
assert!(frame.iter().all(|&s| s == 0));
|
||||||
|
|
||||||
assert!(aec.filter_coeffs.iter().all(|&c| c == 0.0));
|
|
||||||
assert!(aec.far_end_buf.iter().all(|&s| s == 0.0));
|
|
||||||
assert_eq!(aec.far_end_pos, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn aec_reduces_echo_of_known_signal() {
|
fn reduces_echo_with_no_delay() {
|
||||||
let filter_ms = 5;
|
// Simulate: far-end plays, echo arrives at mic attenuated by ~50%
|
||||||
let mut aec = EchoCanceller::new(48000, filter_ms);
|
// (realistic — speaker to mic on laptop loses volume).
|
||||||
|
let mut aec = EchoCanceller::with_delay(48000, 10, 0);
|
||||||
|
|
||||||
let frame_len = 480usize;
|
let frame_len = 480;
|
||||||
let make_frame = |offset: usize| -> Vec<i16> {
|
let make_tone = |offset: usize| -> Vec<i16> {
|
||||||
(0..frame_len)
|
(0..frame_len)
|
||||||
.map(|i| {
|
.map(|i| {
|
||||||
let t = (offset + i) as f64 / 48000.0;
|
let t = (offset + i) as f64 / 48000.0;
|
||||||
@@ -233,11 +271,12 @@ mod tests {
|
|||||||
};
|
};
|
||||||
|
|
||||||
let mut last_erle = 1.0f32;
|
let mut last_erle = 1.0f32;
|
||||||
for frame_idx in 0..40 {
|
for frame_idx in 0..100 {
|
||||||
let farend = make_frame(frame_idx * frame_len);
|
let farend = make_tone(frame_idx * frame_len);
|
||||||
aec.feed_farend(&farend);
|
aec.feed_farend(&farend);
|
||||||
|
|
||||||
let mut nearend = farend.clone();
|
// Near-end = attenuated copy of far-end (echo at ~50% volume).
|
||||||
|
let mut nearend: Vec<i16> = farend.iter().map(|&s| s / 2).collect();
|
||||||
last_erle = aec.process_frame(&mut nearend);
|
last_erle = aec.process_frame(&mut nearend);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -248,37 +287,24 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn aec_silence_passthrough() {
|
fn preserves_nearend_during_doubletalk() {
|
||||||
let mut aec = EchoCanceller::new(48000, 10);
|
let mut aec = EchoCanceller::with_delay(48000, 30, 0);
|
||||||
aec.feed_farend(&vec![0i16; 480]);
|
|
||||||
let mut frame = vec![0i16; 480];
|
|
||||||
let erle = aec.process_frame(&mut frame);
|
|
||||||
assert!(erle >= 1.0);
|
|
||||||
assert!(frame.iter().all(|&s| s == 0));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn aec_preserves_nearend_during_doubletalk() {
|
|
||||||
// When only near-end is active (no far-end), output should
|
|
||||||
// closely match input — the AEC should not suppress speech.
|
|
||||||
let mut aec = EchoCanceller::new(48000, 30);
|
|
||||||
|
|
||||||
let frame_len = 960;
|
let frame_len = 960;
|
||||||
let nearend_signal: Vec<i16> = (0..frame_len)
|
let nearend: Vec<i16> = (0..frame_len)
|
||||||
.map(|i| {
|
.map(|i| {
|
||||||
let t = i as f64 / 48000.0;
|
let t = i as f64 / 48000.0;
|
||||||
(10000.0 * (2.0 * std::f64::consts::PI * 440.0 * t).sin()) as i16
|
(10000.0 * (2.0 * std::f64::consts::PI * 440.0 * t).sin()) as i16
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
// Feed silence as far-end
|
// Feed silence as far-end (no echo source).
|
||||||
aec.feed_farend(&vec![0i16; frame_len]);
|
aec.feed_farend(&vec![0i16; frame_len]);
|
||||||
|
|
||||||
let mut frame = nearend_signal.clone();
|
let mut frame = nearend.clone();
|
||||||
aec.process_frame(&mut frame);
|
aec.process_frame(&mut frame);
|
||||||
|
|
||||||
// Output energy should be close to input energy (not suppressed)
|
let input_energy: f64 = nearend.iter().map(|&s| (s as f64).powi(2)).sum();
|
||||||
let input_energy: f64 = nearend_signal.iter().map(|&s| (s as f64).powi(2)).sum();
|
|
||||||
let output_energy: f64 = frame.iter().map(|&s| (s as f64).powi(2)).sum();
|
let output_energy: f64 = frame.iter().map(|&s| (s as f64).powi(2)).sum();
|
||||||
let ratio = output_energy / input_energy;
|
let ratio = output_energy / input_energy;
|
||||||
|
|
||||||
@@ -287,4 +313,23 @@ mod tests {
|
|||||||
"near-end speech should be preserved, energy ratio = {ratio:.3}"
|
"near-end speech should be preserved, energy ratio = {ratio:.3}"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn delay_buffer_holds_samples() {
|
||||||
|
let mut aec = EchoCanceller::with_delay(48000, 10, 20);
|
||||||
|
// 20ms delay = 960 samples @ 48kHz.
|
||||||
|
// After feeding, feed_farend auto-drains available samples to far_buf.
|
||||||
|
// So delay_available() is always 0 after feed_farend returns.
|
||||||
|
// Instead, verify far_pos advances only after the delay is filled.
|
||||||
|
|
||||||
|
// Feed 960 samples (= delay amount). No samples released yet.
|
||||||
|
aec.feed_farend(&vec![1i16; 960]);
|
||||||
|
// far_buf should still be all zeros (nothing released).
|
||||||
|
assert!(aec.far_buf.iter().all(|&s| s == 0.0), "nothing should be released yet");
|
||||||
|
|
||||||
|
// Feed 480 more. 480 should be released to far_buf.
|
||||||
|
aec.feed_farend(&vec![2i16; 480]);
|
||||||
|
let non_zero = aec.far_buf.iter().filter(|&&s| s != 0.0).count();
|
||||||
|
assert!(non_zero > 0, "samples should have been released to far_buf");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user