feat: Android VoIP client — Phase 1 (audio quality, network adaptation, crate skeleton)
- New wzp-android crate with Oboe C++ backend, lock-free SPSC ring buffers, engine orchestrator, codec pipeline, and Android Gradle project structure - AEC (NLMS adaptive filter), AGC (two-stage with fast attack/slow release), windowed-sinc FIR resampler replacing linear interpolation (wzp-codec) - Opus encoder tuning: complexity 7 default, set_expected_loss support - Mobile jitter buffer: asymmetric EMA (fast up/slow down), handoff spike detection with 2s cooldown, configurable safety margin - Network-aware quality control: cellular-specific thresholds, faster downgrade on cellular, proactive tier drop on WiFi→cellular handoff, FEC ratio boost during network transitions - Handoff detection in PathMonitor via RTT jitter spike analysis Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -14,7 +14,7 @@ use crate::codec2_dec::Codec2Decoder;
|
||||
use crate::codec2_enc::Codec2Encoder;
|
||||
use crate::opus_dec::OpusDecoder;
|
||||
use crate::opus_enc::OpusEncoder;
|
||||
use crate::resample;
|
||||
use crate::resample::{Downsampler48to8, Upsampler8to48};
|
||||
|
||||
// ─── Helpers ─────────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -54,6 +54,7 @@ pub struct AdaptiveEncoder {
|
||||
opus: OpusEncoder,
|
||||
codec2: Codec2Encoder,
|
||||
active: CodecId,
|
||||
downsampler: Downsampler48to8,
|
||||
}
|
||||
|
||||
impl AdaptiveEncoder {
|
||||
@@ -66,6 +67,7 @@ impl AdaptiveEncoder {
|
||||
opus,
|
||||
codec2,
|
||||
active: profile.codec,
|
||||
downsampler: Downsampler48to8::new(),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -74,7 +76,7 @@ impl AudioEncoder for AdaptiveEncoder {
|
||||
fn encode(&mut self, pcm: &[i16], out: &mut [u8]) -> Result<usize, CodecError> {
|
||||
if is_codec2(self.active) {
|
||||
// Downsample 48 kHz → 8 kHz then encode via Codec2.
|
||||
let pcm_8k = resample::resample_48k_to_8k(pcm);
|
||||
let pcm_8k = self.downsampler.process(pcm);
|
||||
self.codec2.encode(&pcm_8k, out)
|
||||
} else {
|
||||
self.opus.encode(pcm, out)
|
||||
@@ -126,6 +128,7 @@ pub struct AdaptiveDecoder {
|
||||
opus: OpusDecoder,
|
||||
codec2: Codec2Decoder,
|
||||
active: CodecId,
|
||||
upsampler: Upsampler8to48,
|
||||
}
|
||||
|
||||
impl AdaptiveDecoder {
|
||||
@@ -138,6 +141,7 @@ impl AdaptiveDecoder {
|
||||
opus,
|
||||
codec2,
|
||||
active: profile.codec,
|
||||
upsampler: Upsampler8to48::new(),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -149,7 +153,7 @@ impl AudioDecoder for AdaptiveDecoder {
|
||||
let c2_samples = self.codec2_frame_samples();
|
||||
let mut buf_8k = vec![0i16; c2_samples];
|
||||
let n = self.codec2.decode(encoded, &mut buf_8k)?;
|
||||
let pcm_48k = resample::resample_8k_to_48k(&buf_8k[..n]);
|
||||
let pcm_48k = self.upsampler.process(&buf_8k[..n]);
|
||||
let out_len = pcm_48k.len().min(pcm.len());
|
||||
pcm[..out_len].copy_from_slice(&pcm_48k[..out_len]);
|
||||
Ok(out_len)
|
||||
@@ -163,7 +167,7 @@ impl AudioDecoder for AdaptiveDecoder {
|
||||
let c2_samples = self.codec2_frame_samples();
|
||||
let mut buf_8k = vec![0i16; c2_samples];
|
||||
let n = self.codec2.decode_lost(&mut buf_8k)?;
|
||||
let pcm_48k = resample::resample_8k_to_48k(&buf_8k[..n]);
|
||||
let pcm_48k = self.upsampler.process(&buf_8k[..n]);
|
||||
let out_len = pcm_48k.len().min(pcm.len());
|
||||
pcm[..out_len].copy_from_slice(&pcm_48k[..out_len]);
|
||||
Ok(out_len)
|
||||
|
||||
228
crates/wzp-codec/src/aec.rs
Normal file
228
crates/wzp-codec/src/aec.rs
Normal file
@@ -0,0 +1,228 @@
|
||||
//! Acoustic Echo Cancellation using NLMS adaptive filter.
|
||||
//! Processes 480-sample (10ms) sub-frames at 48kHz.
|
||||
|
||||
/// NLMS (Normalized Least Mean Squares) adaptive filter echo canceller.
|
||||
///
|
||||
/// Removes acoustic echo by modelling the echo path between the far-end
|
||||
/// (speaker) signal and the near-end (microphone) signal, then subtracting
|
||||
/// the estimated echo from the near-end in real time.
|
||||
pub struct EchoCanceller {
|
||||
filter_coeffs: Vec<f32>,
|
||||
filter_len: usize,
|
||||
far_end_buf: Vec<f32>,
|
||||
far_end_pos: usize,
|
||||
mu: f32,
|
||||
enabled: bool,
|
||||
}
|
||||
|
||||
impl EchoCanceller {
|
||||
/// Create a new echo canceller.
|
||||
///
|
||||
/// * `sample_rate` — typically 48000
|
||||
/// * `filter_ms` — echo-tail length in milliseconds (e.g. 100 for 100 ms)
|
||||
pub fn new(sample_rate: u32, filter_ms: u32) -> Self {
|
||||
let filter_len = (sample_rate as usize) * (filter_ms as usize) / 1000;
|
||||
Self {
|
||||
filter_coeffs: vec![0.0f32; filter_len],
|
||||
filter_len,
|
||||
far_end_buf: vec![0.0f32; filter_len],
|
||||
far_end_pos: 0,
|
||||
mu: 0.01,
|
||||
enabled: true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Feed far-end (speaker/playback) samples into the circular buffer.
|
||||
///
|
||||
/// Must be called with the audio that was played out through the speaker
|
||||
/// *before* the corresponding near-end frame is processed.
|
||||
pub fn feed_farend(&mut self, farend: &[i16]) {
|
||||
for &s in farend {
|
||||
self.far_end_buf[self.far_end_pos] = s as f32;
|
||||
self.far_end_pos = (self.far_end_pos + 1) % self.filter_len;
|
||||
}
|
||||
}
|
||||
|
||||
/// Process a near-end (microphone) frame, removing the estimated echo.
|
||||
///
|
||||
/// Returns the echo-return-loss enhancement (ERLE) as a ratio: the RMS of
|
||||
/// the original near-end divided by the RMS of the residual. Values > 1.0
|
||||
/// mean echo was reduced.
|
||||
pub fn process_frame(&mut self, nearend: &mut [i16]) -> f32 {
|
||||
if !self.enabled {
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
let n = nearend.len();
|
||||
let fl = self.filter_len;
|
||||
|
||||
let mut sum_near_sq: f64 = 0.0;
|
||||
let mut sum_err_sq: f64 = 0.0;
|
||||
|
||||
for i in 0..n {
|
||||
let near_f = nearend[i] as f32;
|
||||
|
||||
// --- estimate echo as dot(coeffs, farend_window) ---
|
||||
// The far-end window for this sample starts at
|
||||
// (far_end_pos - 1 - i) mod filter_len (most recent)
|
||||
// and goes back filter_len samples.
|
||||
let mut echo_est: f32 = 0.0;
|
||||
let mut power: f32 = 0.0;
|
||||
|
||||
// Position of the most-recent far-end sample for this near-end sample.
|
||||
// far_end_pos points to the *next write* position, so the most-recent
|
||||
// sample written is at far_end_pos - 1. We have already called
|
||||
// feed_farend for this block, so the relevant samples are the last
|
||||
// filter_len entries ending just before the current write position,
|
||||
// offset by how far we are into this near-end frame.
|
||||
//
|
||||
// For sample i of the near-end frame, the corresponding far-end
|
||||
// "now" is far_end_pos - n + i (wrapping).
|
||||
// far_end_pos points to next-write, so most recent sample is at
|
||||
// far_end_pos - 1. For the i-th near-end sample we want the
|
||||
// far-end "now" to be at (far_end_pos - n + i). We add fl
|
||||
// repeatedly to avoid underflow on the usize subtraction.
|
||||
let base = (self.far_end_pos + fl * ((n / fl) + 2) + i - n) % fl;
|
||||
|
||||
for k in 0..fl {
|
||||
let fe_idx = (base + fl - k) % fl;
|
||||
let fe = self.far_end_buf[fe_idx];
|
||||
echo_est += self.filter_coeffs[k] * fe;
|
||||
power += fe * fe;
|
||||
}
|
||||
|
||||
let error = near_f - echo_est;
|
||||
|
||||
// --- NLMS coefficient update ---
|
||||
let norm = power + 1.0; // +1 regularisation to avoid div-by-zero
|
||||
let step = self.mu * error / norm;
|
||||
|
||||
for k in 0..fl {
|
||||
let fe_idx = (base + fl - k) % fl;
|
||||
let fe = self.far_end_buf[fe_idx];
|
||||
self.filter_coeffs[k] += step * fe;
|
||||
}
|
||||
|
||||
// Clamp output
|
||||
let out = error.max(-32768.0).min(32767.0);
|
||||
nearend[i] = out as i16;
|
||||
|
||||
sum_near_sq += (near_f as f64) * (near_f as f64);
|
||||
sum_err_sq += (out as f64) * (out as f64);
|
||||
}
|
||||
|
||||
// ERLE ratio
|
||||
if sum_err_sq < 1.0 {
|
||||
return 100.0; // near-perfect cancellation
|
||||
}
|
||||
(sum_near_sq / sum_err_sq).sqrt() as f32
|
||||
}
|
||||
|
||||
/// Enable or disable echo cancellation.
|
||||
pub fn set_enabled(&mut self, enabled: bool) {
|
||||
self.enabled = enabled;
|
||||
}
|
||||
|
||||
/// Returns whether echo cancellation is currently enabled.
|
||||
pub fn is_enabled(&self) -> bool {
|
||||
self.enabled
|
||||
}
|
||||
|
||||
/// Reset the adaptive filter to its initial state.
|
||||
///
|
||||
/// Zeroes out all filter coefficients and the far-end circular buffer.
|
||||
pub fn reset(&mut self) {
|
||||
self.filter_coeffs.iter_mut().for_each(|c| *c = 0.0);
|
||||
self.far_end_buf.iter_mut().for_each(|s| *s = 0.0);
|
||||
self.far_end_pos = 0;
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn aec_creates_with_correct_filter_len() {
|
||||
let aec = EchoCanceller::new(48000, 100);
|
||||
assert_eq!(aec.filter_len, 4800);
|
||||
assert_eq!(aec.filter_coeffs.len(), 4800);
|
||||
assert_eq!(aec.far_end_buf.len(), 4800);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn aec_passthrough_when_disabled() {
|
||||
let mut aec = EchoCanceller::new(48000, 100);
|
||||
aec.set_enabled(false);
|
||||
assert!(!aec.is_enabled());
|
||||
|
||||
let original: Vec<i16> = (0..480).map(|i| (i * 10) as i16).collect();
|
||||
let mut frame = original.clone();
|
||||
let erle = aec.process_frame(&mut frame);
|
||||
assert_eq!(erle, 1.0);
|
||||
assert_eq!(frame, original);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn aec_reset_zeroes_state() {
|
||||
let mut aec = EchoCanceller::new(48000, 10); // short for test speed
|
||||
let farend: Vec<i16> = (0..480).map(|i| ((i * 37) % 1000) as i16).collect();
|
||||
aec.feed_farend(&farend);
|
||||
|
||||
aec.reset();
|
||||
|
||||
assert!(aec.filter_coeffs.iter().all(|&c| c == 0.0));
|
||||
assert!(aec.far_end_buf.iter().all(|&s| s == 0.0));
|
||||
assert_eq!(aec.far_end_pos, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn aec_reduces_echo_of_known_signal() {
|
||||
// Use a small filter for speed. Feed a known far-end signal, then
|
||||
// present the *same* signal as near-end (perfect echo, no room).
|
||||
// After adaptation the output energy should drop.
|
||||
let filter_ms = 5; // 240 taps at 48 kHz
|
||||
let mut aec = EchoCanceller::new(48000, filter_ms);
|
||||
|
||||
// Generate a simple repeating pattern.
|
||||
let frame_len = 480usize;
|
||||
let make_frame = |offset: usize| -> Vec<i16> {
|
||||
(0..frame_len)
|
||||
.map(|i| {
|
||||
let t = (offset + i) as f64 / 48000.0;
|
||||
(5000.0 * (2.0 * std::f64::consts::PI * 300.0 * t).sin()) as i16
|
||||
})
|
||||
.collect()
|
||||
};
|
||||
|
||||
// Warm up the adaptive filter with several frames.
|
||||
let mut last_erle = 1.0f32;
|
||||
for frame_idx in 0..40 {
|
||||
let farend = make_frame(frame_idx * frame_len);
|
||||
aec.feed_farend(&farend);
|
||||
|
||||
// Near-end = exact copy of far-end (pure echo).
|
||||
let mut nearend = farend.clone();
|
||||
last_erle = aec.process_frame(&mut nearend);
|
||||
}
|
||||
|
||||
// After 40 frames the ERLE should be meaningfully > 1.
|
||||
assert!(
|
||||
last_erle > 1.0,
|
||||
"expected ERLE > 1.0 after adaptation, got {last_erle}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn aec_silence_passthrough() {
|
||||
let mut aec = EchoCanceller::new(48000, 10);
|
||||
// Feed silence far-end
|
||||
aec.feed_farend(&vec![0i16; 480]);
|
||||
// Near-end is silence too
|
||||
let mut frame = vec![0i16; 480];
|
||||
let erle = aec.process_frame(&mut frame);
|
||||
assert!(erle >= 1.0);
|
||||
// Output should still be silence
|
||||
assert!(frame.iter().all(|&s| s == 0));
|
||||
}
|
||||
}
|
||||
219
crates/wzp-codec/src/agc.rs
Normal file
219
crates/wzp-codec/src/agc.rs
Normal file
@@ -0,0 +1,219 @@
|
||||
//! Automatic Gain Control (AGC) with two-stage smoothing.
|
||||
//!
|
||||
//! Uses a fast attack / slow release envelope follower to keep the
|
||||
//! output signal near a configurable target RMS level. This prevents
|
||||
//! both clipping (when the speaker is too loud) and inaudibility (when
|
||||
//! the speaker is too quiet or far from the mic).
|
||||
|
||||
/// Two-stage automatic gain control.
|
||||
///
|
||||
/// The gain is adjusted per-frame based on the measured RMS energy,
|
||||
/// with a fast attack (gain decreases quickly when signal gets louder)
|
||||
/// and a slow release (gain increases gradually when signal gets quieter).
|
||||
pub struct AutoGainControl {
|
||||
target_rms: f64,
|
||||
current_gain: f64,
|
||||
min_gain: f64,
|
||||
max_gain: f64,
|
||||
attack_alpha: f64,
|
||||
release_alpha: f64,
|
||||
enabled: bool,
|
||||
}
|
||||
|
||||
impl AutoGainControl {
|
||||
/// Create a new AGC with sensible VoIP defaults.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
target_rms: 3000.0, // ~-20 dBFS for i16
|
||||
current_gain: 1.0,
|
||||
min_gain: 0.5,
|
||||
max_gain: 32.0,
|
||||
attack_alpha: 0.3, // fast attack
|
||||
release_alpha: 0.02, // slow release
|
||||
enabled: true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Process a frame of PCM audio in-place, applying gain adjustment.
|
||||
pub fn process_frame(&mut self, pcm: &mut [i16]) {
|
||||
if !self.enabled {
|
||||
return;
|
||||
}
|
||||
|
||||
// Compute RMS of the frame.
|
||||
let rms = Self::compute_rms(pcm);
|
||||
|
||||
// Don't amplify near-silence — it would just boost noise.
|
||||
if rms < 10.0 {
|
||||
return;
|
||||
}
|
||||
|
||||
// Desired instantaneous gain.
|
||||
let desired_gain = (self.target_rms / rms).clamp(self.min_gain, self.max_gain);
|
||||
|
||||
// Smooth the gain transition.
|
||||
let alpha = if desired_gain < self.current_gain {
|
||||
// Signal is louder than target → reduce gain quickly (attack).
|
||||
self.attack_alpha
|
||||
} else {
|
||||
// Signal is quieter than target → raise gain slowly (release).
|
||||
self.release_alpha
|
||||
};
|
||||
|
||||
self.current_gain = self.current_gain * (1.0 - alpha) + desired_gain * alpha;
|
||||
|
||||
// Apply gain to each sample with hard limiting at ±31000 (~0.946 * i16::MAX).
|
||||
const LIMIT: f64 = 31000.0;
|
||||
let gain = self.current_gain;
|
||||
for sample in pcm.iter_mut() {
|
||||
let amplified = (*sample as f64) * gain;
|
||||
let clamped = amplified.clamp(-LIMIT, LIMIT);
|
||||
*sample = clamped as i16;
|
||||
}
|
||||
}
|
||||
|
||||
/// Enable or disable the AGC.
|
||||
pub fn set_enabled(&mut self, enabled: bool) {
|
||||
self.enabled = enabled;
|
||||
}
|
||||
|
||||
/// Returns whether the AGC is currently enabled.
|
||||
pub fn is_enabled(&self) -> bool {
|
||||
self.enabled
|
||||
}
|
||||
|
||||
/// Current gain expressed in dB.
|
||||
pub fn current_gain_db(&self) -> f64 {
|
||||
20.0 * self.current_gain.log10()
|
||||
}
|
||||
|
||||
/// Compute the RMS (root mean square) of a PCM buffer.
|
||||
fn compute_rms(pcm: &[i16]) -> f64 {
|
||||
if pcm.is_empty() {
|
||||
return 0.0;
|
||||
}
|
||||
let sum_sq: f64 = pcm.iter().map(|&s| (s as f64) * (s as f64)).sum();
|
||||
(sum_sq / pcm.len() as f64).sqrt()
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for AutoGainControl {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn agc_creates_with_defaults() {
|
||||
let agc = AutoGainControl::new();
|
||||
assert!(agc.is_enabled());
|
||||
assert!((agc.current_gain - 1.0).abs() < f64::EPSILON);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn agc_passthrough_when_disabled() {
|
||||
let mut agc = AutoGainControl::new();
|
||||
agc.set_enabled(false);
|
||||
|
||||
let original: Vec<i16> = (0..960).map(|i| (i * 5) as i16).collect();
|
||||
let mut frame = original.clone();
|
||||
agc.process_frame(&mut frame);
|
||||
|
||||
assert_eq!(frame, original);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn agc_does_not_amplify_silence() {
|
||||
let mut agc = AutoGainControl::new();
|
||||
let mut frame = vec![0i16; 960];
|
||||
agc.process_frame(&mut frame);
|
||||
assert!(frame.iter().all(|&s| s == 0));
|
||||
// Gain should remain at initial value.
|
||||
assert!((agc.current_gain - 1.0).abs() < f64::EPSILON);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn agc_amplifies_quiet_signal() {
|
||||
let mut agc = AutoGainControl::new();
|
||||
|
||||
// Very quiet signal (RMS ~ 50).
|
||||
let mut frame: Vec<i16> = (0..960)
|
||||
.map(|i| {
|
||||
let t = i as f64 / 48000.0;
|
||||
(50.0 * (2.0 * std::f64::consts::PI * 440.0 * t).sin()) as i16
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Process several frames to let the gain ramp up.
|
||||
for _ in 0..50 {
|
||||
let mut f = frame.clone();
|
||||
agc.process_frame(&mut f);
|
||||
frame = f;
|
||||
}
|
||||
|
||||
// Gain should have increased past 1.0.
|
||||
assert!(
|
||||
agc.current_gain > 1.05,
|
||||
"expected gain > 1.05 for quiet signal, got {}",
|
||||
agc.current_gain
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn agc_attenuates_loud_signal() {
|
||||
let mut agc = AutoGainControl::new();
|
||||
|
||||
// Loud signal (RMS ~ 20000).
|
||||
let frame: Vec<i16> = (0..960)
|
||||
.map(|i| {
|
||||
let t = i as f64 / 48000.0;
|
||||
(28000.0 * (2.0 * std::f64::consts::PI * 440.0 * t).sin()) as i16
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Process several frames.
|
||||
for _ in 0..20 {
|
||||
let mut f = frame.clone();
|
||||
agc.process_frame(&mut f);
|
||||
}
|
||||
|
||||
// Gain should have decreased below 1.0.
|
||||
assert!(
|
||||
agc.current_gain < 1.0,
|
||||
"expected gain < 1.0 for loud signal, got {}",
|
||||
agc.current_gain
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn agc_output_within_limits() {
|
||||
let mut agc = AutoGainControl::new();
|
||||
// Force a high gain by processing many quiet frames first.
|
||||
for _ in 0..100 {
|
||||
let mut f: Vec<i16> = vec![100; 960];
|
||||
agc.process_frame(&mut f);
|
||||
}
|
||||
|
||||
// Now send a louder frame — output should still be within ±31000.
|
||||
let mut frame: Vec<i16> = vec![20000; 960];
|
||||
agc.process_frame(&mut frame);
|
||||
assert!(
|
||||
frame.iter().all(|&s| s.abs() <= 31000),
|
||||
"output samples must be within ±31000"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn agc_gain_db_at_unity() {
|
||||
let agc = AutoGainControl::new();
|
||||
let db = agc.current_gain_db();
|
||||
assert!(
|
||||
db.abs() < 0.01,
|
||||
"expected ~0 dB at unity gain, got {db}"
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -10,6 +10,8 @@
|
||||
//! trait-object encoders/decoders that handle adaptive switching internally.
|
||||
|
||||
pub mod adaptive;
|
||||
pub mod aec;
|
||||
pub mod agc;
|
||||
pub mod codec2_dec;
|
||||
pub mod codec2_enc;
|
||||
pub mod denoise;
|
||||
@@ -19,6 +21,8 @@ pub mod resample;
|
||||
pub mod silence;
|
||||
|
||||
pub use adaptive::{AdaptiveDecoder, AdaptiveEncoder};
|
||||
pub use aec::EchoCanceller;
|
||||
pub use agc::AutoGainControl;
|
||||
pub use denoise::NoiseSupressor;
|
||||
pub use silence::{ComfortNoise, SilenceDetector};
|
||||
pub use wzp_proto::{AudioDecoder, AudioEncoder, CodecId, QualityProfile};
|
||||
|
||||
@@ -40,6 +40,11 @@ impl OpusEncoder {
|
||||
.set_signal(Signal::Voice)
|
||||
.map_err(|e| CodecError::EncodeFailed(format!("set signal: {e}")))?;
|
||||
|
||||
// Default complexity 7 — good quality/CPU trade-off for VoIP
|
||||
enc.inner
|
||||
.set_complexity(7)
|
||||
.map_err(|e| CodecError::EncodeFailed(format!("set complexity: {e}")))?;
|
||||
|
||||
Ok(enc)
|
||||
}
|
||||
|
||||
@@ -56,6 +61,21 @@ impl OpusEncoder {
|
||||
pub fn frame_samples(&self) -> usize {
|
||||
(48_000 * self.frame_duration_ms as usize) / 1000
|
||||
}
|
||||
|
||||
/// Set the encoder complexity (0-10). Higher values produce better quality
|
||||
/// at the cost of more CPU. Default is 7.
|
||||
pub fn set_complexity(&mut self, complexity: i32) {
|
||||
let c = (complexity as u8).min(10);
|
||||
let _ = self.inner.set_complexity(c);
|
||||
}
|
||||
|
||||
/// Hint the encoder about expected packet loss percentage (0-100).
|
||||
///
|
||||
/// Higher values cause the encoder to use more redundancy to survive
|
||||
/// packet loss, at the expense of slightly higher bitrate.
|
||||
pub fn set_expected_loss(&mut self, loss_pct: u8) {
|
||||
let _ = self.inner.set_packet_loss_perc(loss_pct.min(100));
|
||||
}
|
||||
}
|
||||
|
||||
impl AudioEncoder for OpusEncoder {
|
||||
|
||||
@@ -1,55 +1,258 @@
|
||||
//! Simple linear resampler for 48 kHz <-> 8 kHz conversion.
|
||||
//! Windowed-sinc FIR resampler for 48 kHz <-> 8 kHz conversion.
|
||||
//!
|
||||
//! These are basic implementations suitable for voice. For higher quality,
|
||||
//! replace with the `rubato` crate later.
|
||||
//! Provides both stateless free functions (backward-compatible) and stateful
|
||||
//! `Downsampler48to8` / `Upsampler8to48` structs that maintain overlap history
|
||||
//! between frames for glitch-free streaming.
|
||||
|
||||
/// Downsample from 48 kHz to 8 kHz (6:1 decimation with averaging).
|
||||
use std::f64::consts::PI;
|
||||
|
||||
// ─── FIR kernel parameters ─────────────────────────────────────────────────
|
||||
|
||||
/// Number of FIR taps in the anti-alias / interpolation filter.
|
||||
const FIR_TAPS: usize = 48;
|
||||
/// Kaiser window beta parameter — controls sidelobe attenuation.
|
||||
const KAISER_BETA: f64 = 8.0;
|
||||
/// Cutoff frequency in Hz for the low-pass filter (just below 4 kHz Nyquist of 8 kHz).
|
||||
const CUTOFF_HZ: f64 = 3800.0;
|
||||
/// Working sample rate in Hz.
|
||||
const SAMPLE_RATE: f64 = 48000.0;
|
||||
/// Decimation / interpolation ratio between 48 kHz and 8 kHz.
|
||||
const RATIO: usize = 6;
|
||||
|
||||
// ─── Kaiser window helpers ─────────────────────────────────────────────────
|
||||
|
||||
/// Zeroth-order modified Bessel function of the first kind, I₀(x).
|
||||
///
|
||||
/// Each output sample is the average of 6 consecutive input samples,
|
||||
/// providing basic anti-aliasing via a box filter.
|
||||
pub fn resample_48k_to_8k(input: &[i16]) -> Vec<i16> {
|
||||
const RATIO: usize = 6;
|
||||
let out_len = input.len() / RATIO;
|
||||
let mut output = Vec::with_capacity(out_len);
|
||||
|
||||
for chunk in input.chunks_exact(RATIO) {
|
||||
let sum: i32 = chunk.iter().map(|&s| s as i32).sum();
|
||||
output.push((sum / RATIO as i32) as i16);
|
||||
/// Computed via the well-known power-series expansion, converging rapidly
|
||||
/// for the moderate values of x used in Kaiser window design.
|
||||
fn bessel_i0(x: f64) -> f64 {
|
||||
let mut sum = 1.0f64;
|
||||
let mut term = 1.0f64;
|
||||
let half_x = x / 2.0;
|
||||
for k in 1..=25 {
|
||||
term *= (half_x / k as f64) * (half_x / k as f64);
|
||||
sum += term;
|
||||
if term < 1e-12 * sum {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
output
|
||||
sum
|
||||
}
|
||||
|
||||
/// Upsample from 8 kHz to 48 kHz (1:6 interpolation with linear interp).
|
||||
/// Build a windowed-sinc low-pass FIR kernel.
|
||||
///
|
||||
/// Linearly interpolates between each pair of input samples to produce
|
||||
/// 6 output samples per input sample.
|
||||
pub fn resample_8k_to_48k(input: &[i16]) -> Vec<i16> {
|
||||
const RATIO: usize = 6;
|
||||
if input.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
/// Returns `FIR_TAPS` coefficients normalised so that the DC gain is exactly 1.0.
|
||||
fn build_fir_kernel() -> [f64; FIR_TAPS] {
|
||||
let mut kernel = [0.0f64; FIR_TAPS];
|
||||
let m = (FIR_TAPS - 1) as f64;
|
||||
let fc = CUTOFF_HZ / SAMPLE_RATE; // normalised cutoff (0..0.5)
|
||||
let beta_denom = bessel_i0(KAISER_BETA);
|
||||
|
||||
let out_len = input.len() * RATIO;
|
||||
let mut output = Vec::with_capacity(out_len);
|
||||
|
||||
for i in 0..input.len() {
|
||||
let current = input[i] as i32;
|
||||
let next = if i + 1 < input.len() {
|
||||
input[i + 1] as i32
|
||||
for i in 0..FIR_TAPS {
|
||||
// Sinc
|
||||
let n = i as f64 - m / 2.0;
|
||||
let sinc = if n.abs() < 1e-12 {
|
||||
2.0 * fc
|
||||
} else {
|
||||
current // hold last sample
|
||||
(2.0 * PI * fc * n).sin() / (PI * n)
|
||||
};
|
||||
|
||||
for j in 0..RATIO {
|
||||
let interp = current + (next - current) * j as i32 / RATIO as i32;
|
||||
output.push(interp as i16);
|
||||
// Kaiser window
|
||||
let t = 2.0 * i as f64 / m - 1.0; // range [-1, 1]
|
||||
let kaiser = bessel_i0(KAISER_BETA * (1.0 - t * t).max(0.0).sqrt()) / beta_denom;
|
||||
|
||||
kernel[i] = sinc * kaiser;
|
||||
}
|
||||
|
||||
// Normalise to unity DC gain.
|
||||
let sum: f64 = kernel.iter().sum();
|
||||
if sum.abs() > 1e-15 {
|
||||
for k in kernel.iter_mut() {
|
||||
*k /= sum;
|
||||
}
|
||||
}
|
||||
|
||||
output
|
||||
kernel
|
||||
}
|
||||
|
||||
// ─── Stateful Downsampler 48→8 ─────────────────────────────────────────────
|
||||
|
||||
/// Stateful polyphase FIR downsampler from 48 kHz to 8 kHz.
|
||||
///
|
||||
/// Maintains `FIR_TAPS - 1` samples of history between successive calls to
|
||||
/// `process()` for seamless frame boundaries.
|
||||
pub struct Downsampler48to8 {
|
||||
kernel: [f64; FIR_TAPS],
|
||||
history: Vec<f64>,
|
||||
}
|
||||
|
||||
impl Downsampler48to8 {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
kernel: build_fir_kernel(),
|
||||
history: vec![0.0; FIR_TAPS - 1],
|
||||
}
|
||||
}
|
||||
|
||||
/// Downsample a block of 48 kHz samples to 8 kHz.
|
||||
///
|
||||
/// The input length should be a multiple of 6; any trailing samples that
|
||||
/// don't form a complete output sample are consumed into the history.
|
||||
pub fn process(&mut self, input: &[i16]) -> Vec<i16> {
|
||||
let hist_len = self.history.len(); // FIR_TAPS - 1
|
||||
let total_len = hist_len + input.len();
|
||||
|
||||
// Build a working buffer: history ++ input (as f64).
|
||||
let mut work = Vec::with_capacity(total_len);
|
||||
work.extend_from_slice(&self.history);
|
||||
work.extend(input.iter().map(|&s| s as f64));
|
||||
|
||||
let out_len = input.len() / RATIO;
|
||||
let mut output = Vec::with_capacity(out_len);
|
||||
|
||||
for i in 0..out_len {
|
||||
// The centre of the filter for output sample i sits at
|
||||
// position hist_len + i*RATIO in the work buffer (aligning
|
||||
// with the first new input sample at decimation phase 0).
|
||||
let centre = hist_len + i * RATIO;
|
||||
let start = centre + 1 - FIR_TAPS; // may be 0 for the first few
|
||||
|
||||
let mut acc = 0.0f64;
|
||||
for k in 0..FIR_TAPS {
|
||||
let idx = start + k;
|
||||
if idx < work.len() {
|
||||
acc += work[idx] * self.kernel[k];
|
||||
}
|
||||
}
|
||||
output.push(acc.round().clamp(-32768.0, 32767.0) as i16);
|
||||
}
|
||||
|
||||
// Update history: keep the last (FIR_TAPS - 1) samples from work.
|
||||
if work.len() >= hist_len {
|
||||
self.history
|
||||
.copy_from_slice(&work[work.len() - hist_len..]);
|
||||
} else {
|
||||
// Input was shorter than history — shift.
|
||||
let shift = hist_len - work.len();
|
||||
self.history.copy_within(shift.., 0);
|
||||
for (i, &v) in work.iter().enumerate() {
|
||||
self.history[hist_len - work.len() + i] = v;
|
||||
}
|
||||
}
|
||||
|
||||
output
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Downsampler48to8 {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Stateful Upsampler 8→48 ───────────────────────────────────────────────
|
||||
|
||||
/// Stateful FIR upsampler from 8 kHz to 48 kHz.
|
||||
///
|
||||
/// Inserts zeros between input samples (zero-stuffing), then applies the
|
||||
/// low-pass FIR to remove imaging, with gain compensation of `RATIO`.
|
||||
pub struct Upsampler8to48 {
|
||||
kernel: [f64; FIR_TAPS],
|
||||
history: Vec<f64>,
|
||||
}
|
||||
|
||||
impl Upsampler8to48 {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
kernel: build_fir_kernel(),
|
||||
history: vec![0.0; FIR_TAPS - 1],
|
||||
}
|
||||
}
|
||||
|
||||
/// Upsample a block of 8 kHz samples to 48 kHz.
|
||||
pub fn process(&mut self, input: &[i16]) -> Vec<i16> {
|
||||
let hist_len = self.history.len(); // FIR_TAPS - 1
|
||||
|
||||
// Zero-stuff: insert RATIO-1 zeros between each input sample.
|
||||
let stuffed_len = input.len() * RATIO;
|
||||
let total_len = hist_len + stuffed_len;
|
||||
|
||||
let mut work = Vec::with_capacity(total_len);
|
||||
work.extend_from_slice(&self.history);
|
||||
for &s in input {
|
||||
work.push(s as f64);
|
||||
for _ in 1..RATIO {
|
||||
work.push(0.0);
|
||||
}
|
||||
}
|
||||
|
||||
let out_len = stuffed_len;
|
||||
let mut output = Vec::with_capacity(out_len);
|
||||
|
||||
// The gain factor compensates for the zeros introduced by stuffing.
|
||||
let gain = RATIO as f64;
|
||||
|
||||
for i in 0..out_len {
|
||||
let centre = hist_len + i;
|
||||
let start = centre + 1 - FIR_TAPS;
|
||||
|
||||
let mut acc = 0.0f64;
|
||||
for k in 0..FIR_TAPS {
|
||||
let idx = start + k;
|
||||
if idx < work.len() {
|
||||
acc += work[idx] * self.kernel[k];
|
||||
}
|
||||
}
|
||||
acc *= gain;
|
||||
output.push(acc.round().clamp(-32768.0, 32767.0) as i16);
|
||||
}
|
||||
|
||||
// Update history.
|
||||
if work.len() >= hist_len {
|
||||
self.history
|
||||
.copy_from_slice(&work[work.len() - hist_len..]);
|
||||
} else {
|
||||
let shift = hist_len - work.len();
|
||||
self.history.copy_within(shift.., 0);
|
||||
for (i, &v) in work.iter().enumerate() {
|
||||
self.history[hist_len - work.len() + i] = v;
|
||||
}
|
||||
}
|
||||
|
||||
output
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Upsampler8to48 {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Backward-compatible free functions ─────────────────────────────────────
|
||||
|
||||
/// Downsample from 48 kHz to 8 kHz (6:1 decimation with FIR anti-alias filter).
|
||||
///
|
||||
/// This is a convenience wrapper that creates a temporary [`Downsampler48to8`].
|
||||
/// For streaming use, prefer the stateful struct to avoid edge artefacts between
|
||||
/// frames.
|
||||
pub fn resample_48k_to_8k(input: &[i16]) -> Vec<i16> {
|
||||
let mut ds = Downsampler48to8::new();
|
||||
ds.process(input)
|
||||
}
|
||||
|
||||
/// Upsample from 8 kHz to 48 kHz (1:6 interpolation with FIR imaging filter).
|
||||
///
|
||||
/// This is a convenience wrapper that creates a temporary [`Upsampler8to48`].
|
||||
/// For streaming use, prefer the stateful struct to avoid edge artefacts between
|
||||
/// frames.
|
||||
pub fn resample_8k_to_48k(input: &[i16]) -> Vec<i16> {
|
||||
let mut us = Upsampler8to48::new();
|
||||
us.process(input)
|
||||
}
|
||||
|
||||
// ─── Tests ──────────────────────────────────────────────────────────────────
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -66,12 +269,28 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn dc_signal_preserved() {
|
||||
// A constant signal should survive resampling
|
||||
// A constant signal should survive resampling (approximately).
|
||||
let input = vec![1000i16; 960];
|
||||
let down = resample_48k_to_8k(&input);
|
||||
assert!(down.iter().all(|&s| s == 1000));
|
||||
// Allow some edge transient — check that the middle samples are close.
|
||||
let mid_start = down.len() / 4;
|
||||
let mid_end = 3 * down.len() / 4;
|
||||
for &s in &down[mid_start..mid_end] {
|
||||
assert!(
|
||||
(s - 1000).abs() < 50,
|
||||
"DC downsampled sample {s} too far from 1000"
|
||||
);
|
||||
}
|
||||
|
||||
let up = resample_8k_to_48k(&down);
|
||||
assert!(up.iter().all(|&s| s == 1000));
|
||||
let mid_start_up = up.len() / 4;
|
||||
let mid_end_up = 3 * up.len() / 4;
|
||||
for &s in &up[mid_start_up..mid_end_up] {
|
||||
assert!(
|
||||
(s - 1000).abs() < 100,
|
||||
"DC upsampled sample {s} too far from 1000"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -79,4 +298,40 @@ mod tests {
|
||||
assert!(resample_48k_to_8k(&[]).is_empty());
|
||||
assert!(resample_8k_to_48k(&[]).is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stateful_downsampler_produces_correct_length() {
|
||||
let mut ds = Downsampler48to8::new();
|
||||
let out = ds.process(&vec![0i16; 960]);
|
||||
assert_eq!(out.len(), 160);
|
||||
let out2 = ds.process(&vec![0i16; 960]);
|
||||
assert_eq!(out2.len(), 160);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stateful_upsampler_produces_correct_length() {
|
||||
let mut us = Upsampler8to48::new();
|
||||
let out = us.process(&vec![0i16; 160]);
|
||||
assert_eq!(out.len(), 960);
|
||||
let out2 = us.process(&vec![0i16; 160]);
|
||||
assert_eq!(out2.len(), 960);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fir_kernel_has_unity_dc_gain() {
|
||||
let kernel = build_fir_kernel();
|
||||
let sum: f64 = kernel.iter().sum();
|
||||
assert!(
|
||||
(sum - 1.0).abs() < 1e-10,
|
||||
"FIR kernel DC gain should be 1.0, got {sum}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bessel_i0_known_values() {
|
||||
// I₀(0) = 1
|
||||
assert!((bessel_i0(0.0) - 1.0).abs() < 1e-12);
|
||||
// I₀(1) ≈ 1.2660658
|
||||
assert!((bessel_i0(1.0) - 1.2660658).abs() < 1e-5);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user