WZP-P2-T6: Trunking - TrunkFrame/TrunkEntry: pack N session packets into one datagram - Wire format: [count:u16][session_id:2][len:u16][payload]... - TrunkBatcher: batches by count (10) or bytes (1200), flushes on limit - 5 tests: encode/decode roundtrip, empty frame, batcher fill/flush, byte limit WZP-P2-T7: Mini-frames - MiniHeader: 4-byte delta header (timestamp_delta + payload_len) - FRAME_TYPE_FULL (0x00) / FRAME_TYPE_MINI (0x01) discriminator - MiniFrameContext: expands mini-headers to full by tracking baseline - Saves 8 bytes per packet (5 vs 13 bytes with type prefix) - 5 tests: encode/decode, wire size, context expand, no baseline, size comparison WZP-P2-T8: Silence suppression - SilenceDetector: RMS-based detection with hangover (5 frames = 100ms) - ComfortNoise: low-level random noise generator - CodecId::ComfortNoise variant for CN packets - CallEncoder: suppresses silent frames, sends 1-byte CN every 200ms - CallDecoder: generates comfort noise on CN packets - ~50% bandwidth savings in typical conversations - 6 tests: silence/speech detection, hangover, CN generation, RMS math, suppression WZP-P2-T9: Call control signals - SignalMessage: Hold, Unhold, Mute, Unmute, Transfer, TransferAck - CallSignalType mapping in featherchat.rs for all new variants - 4 serde roundtrip tests + signal type mapping tests 255 tests passing across all crates. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
192 lines
6.3 KiB
Rust
192 lines
6.3 KiB
Rust
//! Silence suppression and comfort noise generation.
|
|
//!
|
|
//! During silent periods (~50% of a typical call), full encoded frames waste
|
|
//! bandwidth. [`SilenceDetector`] detects silent audio based on RMS energy,
|
|
//! and [`ComfortNoise`] generates low-level background noise to fill gaps on
|
|
//! the decoder side.
|
|
|
|
use rand::Rng;
|
|
|
|
/// Detects silence in PCM audio using RMS energy with a hangover period.
|
|
///
|
|
/// The hangover prevents clipping the onset of speech: after silence is first
|
|
/// detected, the detector continues reporting "not silent" for `hangover_frames`
|
|
/// additional frames before transitioning to suppression.
|
|
pub struct SilenceDetector {
|
|
/// RMS threshold below which audio is considered silent (for i16 samples).
|
|
threshold_rms: f64,
|
|
/// Number of frames to keep sending after silence starts (prevents speech clipping).
|
|
hangover_frames: u32,
|
|
/// Count of consecutive frames whose RMS is below the threshold.
|
|
silent_frames: u32,
|
|
/// Whether suppression is currently active.
|
|
is_suppressing: bool,
|
|
}
|
|
|
|
impl SilenceDetector {
|
|
/// Create a new silence detector.
|
|
///
|
|
/// * `threshold_rms` — RMS energy below which a frame is silent (default: 100.0 for i16).
|
|
/// * `hangover_frames` — frames to keep sending after silence onset (default: 5 = 100ms at 20ms frames).
|
|
pub fn new(threshold_rms: f64, hangover_frames: u32) -> Self {
|
|
Self {
|
|
threshold_rms,
|
|
hangover_frames,
|
|
silent_frames: 0,
|
|
is_suppressing: false,
|
|
}
|
|
}
|
|
|
|
/// Compute the RMS (root mean square) energy of a PCM buffer.
|
|
pub fn rms(pcm: &[i16]) -> f64 {
|
|
if pcm.is_empty() {
|
|
return 0.0;
|
|
}
|
|
let sum_sq: f64 = pcm.iter().map(|&s| (s as f64) * (s as f64)).sum();
|
|
(sum_sq / pcm.len() as f64).sqrt()
|
|
}
|
|
|
|
/// Returns `true` if the frame should be suppressed (i.e. is silence past
|
|
/// the hangover period).
|
|
///
|
|
/// Call once per frame. The detector tracks consecutive silent frames
|
|
/// internally and only reports suppression after the hangover expires.
|
|
pub fn is_silent(&mut self, pcm: &[i16]) -> bool {
|
|
let energy = Self::rms(pcm);
|
|
|
|
if energy < self.threshold_rms {
|
|
self.silent_frames = self.silent_frames.saturating_add(1);
|
|
|
|
if self.silent_frames > self.hangover_frames {
|
|
self.is_suppressing = true;
|
|
}
|
|
} else {
|
|
// Speech detected — reset.
|
|
self.silent_frames = 0;
|
|
self.is_suppressing = false;
|
|
}
|
|
|
|
self.is_suppressing
|
|
}
|
|
|
|
/// Whether the detector is currently in the suppressing state.
|
|
pub fn suppressing(&self) -> bool {
|
|
self.is_suppressing
|
|
}
|
|
}
|
|
|
|
/// Generates low-level comfort noise to fill silent periods.
|
|
///
|
|
/// When the decoder receives a comfort-noise descriptor (or detects a gap
|
|
/// caused by silence suppression), it uses this to produce a natural-sounding
|
|
/// background hiss instead of dead silence.
|
|
pub struct ComfortNoise {
|
|
/// Peak amplitude of the generated noise (default: 50).
|
|
level: i16,
|
|
}
|
|
|
|
impl ComfortNoise {
|
|
/// Create a comfort noise generator with the given amplitude level.
|
|
pub fn new(level: i16) -> Self {
|
|
Self { level }
|
|
}
|
|
|
|
/// Fill `pcm` with low-level random noise in the range `[-level, level]`.
|
|
pub fn generate(&self, pcm: &mut [i16]) {
|
|
let mut rng = rand::thread_rng();
|
|
for sample in pcm.iter_mut() {
|
|
*sample = rng.gen_range(-self.level..=self.level);
|
|
}
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn silence_detector_detects_silence() {
|
|
let mut det = SilenceDetector::new(100.0, 5);
|
|
let silence = vec![0i16; 960];
|
|
|
|
// First 5 frames are hangover — should NOT suppress yet.
|
|
for _ in 0..5 {
|
|
assert!(!det.is_silent(&silence));
|
|
}
|
|
// Frame 6 onward: past hangover, should suppress.
|
|
assert!(det.is_silent(&silence));
|
|
assert!(det.is_silent(&silence));
|
|
}
|
|
|
|
#[test]
|
|
fn silence_detector_detects_speech() {
|
|
let mut det = SilenceDetector::new(100.0, 5);
|
|
|
|
// Generate a 1kHz sine wave at decent amplitude.
|
|
let pcm: Vec<i16> = (0..960)
|
|
.map(|i| {
|
|
let t = i as f64 / 48000.0;
|
|
(10000.0 * (2.0 * std::f64::consts::PI * 1000.0 * t).sin()) as i16
|
|
})
|
|
.collect();
|
|
|
|
// Should never report silent.
|
|
for _ in 0..20 {
|
|
assert!(!det.is_silent(&pcm));
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn silence_detector_hangover() {
|
|
let mut det = SilenceDetector::new(100.0, 3);
|
|
let silence = vec![0i16; 960];
|
|
let speech: Vec<i16> = (0..960)
|
|
.map(|i| {
|
|
let t = i as f64 / 48000.0;
|
|
(5000.0 * (2.0 * std::f64::consts::PI * 440.0 * t).sin()) as i16
|
|
})
|
|
.collect();
|
|
|
|
// Feed silence past hangover to enter suppression.
|
|
for _ in 0..4 {
|
|
det.is_silent(&silence);
|
|
}
|
|
assert!(det.is_silent(&silence), "should be suppressing after hangover");
|
|
|
|
// Speech arrives — should immediately stop suppressing.
|
|
assert!(!det.is_silent(&speech));
|
|
assert!(!det.is_silent(&speech));
|
|
}
|
|
|
|
#[test]
|
|
fn comfort_noise_generates_nonzero() {
|
|
let cn = ComfortNoise::new(50);
|
|
let mut pcm = vec![0i16; 960];
|
|
cn.generate(&mut pcm);
|
|
|
|
// At least some samples should be non-zero.
|
|
assert!(pcm.iter().any(|&s| s != 0), "CN output should not be all zeros");
|
|
|
|
// All samples should be within [-50, 50].
|
|
assert!(pcm.iter().all(|&s| s.abs() <= 50), "CN samples out of range");
|
|
}
|
|
|
|
#[test]
|
|
fn rms_calculation() {
|
|
// All zeros → RMS 0.
|
|
assert_eq!(SilenceDetector::rms(&[0i16; 100]), 0.0);
|
|
|
|
// Constant value: RMS of [v, v, v, ...] = |v|.
|
|
let pcm = vec![100i16; 100];
|
|
let rms = SilenceDetector::rms(&pcm);
|
|
assert!((rms - 100.0).abs() < 0.01, "RMS of constant 100 should be 100, got {rms}");
|
|
|
|
// Known pattern: [3, 4] → sqrt((9+16)/2) = sqrt(12.5) ≈ 3.5355
|
|
let rms2 = SilenceDetector::rms(&[3, 4]);
|
|
assert!((rms2 - 3.5355).abs() < 0.01, "RMS of [3,4] should be ~3.5355, got {rms2}");
|
|
|
|
// Empty buffer → 0.
|
|
assert_eq!(SilenceDetector::rms(&[]), 0.0);
|
|
}
|
|
}
|