From 16890576fb9c002e2b03476092974217e2344ffc Mon Sep 17 00:00:00 2001 From: Siavash Sameni Date: Sat, 11 Apr 2026 08:58:03 +0400 Subject: [PATCH] feat(observability): logcat-visible DRED proof of life on Android MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds enough INFO-level logging that an opus-DRED-v2 APK on Android can be verified end-to-end by reading logcat alone — no debugger, no Prometheus, no telemetry pipeline required. Three observation points: 1. Encoder construction (opus_enc.rs) - Bumped the "DRED enabled" log from debug! to info! so the per-call DRED config is in logcat by default. Each call's first OpusEncoder construction logs codec, dred_frames, dred_ms, loss_floor_pct. - Added a one-shot static OnceLock that logs `opusic_c::version()` the first time an OpusEncoder is built in the process. This is the smoking gun for "is the new libopus actually loaded" — pre- Phase-0 audiopus shipped libopus 1.3 with no DRED, post-Phase-0 should print 1.5.2 here. 2. DRED state ingest (DredRecvState::ingest_opus in desktop/src-tauri/src/engine.rs) - First successful parse on a call logs immediately so we can see "DRED is on the wire" in logcat. - Subsequent parses sample every 100th to confirm steady-state samples_available without drowning the log. - New parses_total / parses_with_data counters track the parse rate vs the success rate (a packet without DRED in it returns `available == 0`, so a low ratio means the encoder isn't emitting DRED bytes). 3. DRED reconstruction events (DredRecvState::fill_gap_to) - Every DRED reconstruction logs at INFO with missing_seq, anchor_seq, offset_samples, offset_ms, samples_available, gap_size, and the running total. These events are rare on a clean network and we want to know exactly which gap was filled. - First three classical PLC fills + every 50th thereafter log so we can see when DRED couldn't cover a gap (offset out of range, no good state, or reconstruct error). 4. Recv heartbeat (Android start() in engine.rs) - Existing 2-second heartbeat now includes dred_recv, classical_plc, dred_parses_with_data, dred_parses_total so a steady-state call shows the cumulative counters in logcat without parsing. How to verify on a real call: adb logcat -s 'RustStdoutStderr:*' | grep -i 'dred\|libopus version' Expected output sequence on a successful Opus call: - "linked libopus version libopus_version=libopus 1.5.2-..." (once per process) - "opus encoder: DRED enabled codec=Opus24k dred_frames=20 dred_ms=200 loss_floor_pct=15" (per call) - "DRED state parsed from Opus packet seq=N samples_available=4560 ms=95 ..." (after first DRED-bearing packet) - "recv heartbeat (android) ... dred_recv=0 classical_plc=0 dred_parses_with_data=58 dred_parses_total=58" (every 2s) If you see "linked libopus version libopus 1.3" — the FFI swap didn't take. If dred_parses_with_data stays at 0 while dred_parses_total climbs — the sender isn't emitting DRED (check the encoder's loss floor and the receiver's libopus version). If gaps trigger "classical PLC fill" instead of "DRED reconstruction fired" — DRED state coverage is too small for the observed loss pattern, and the loss floor or DRED duration policy needs tuning. Verification: - cargo check -p wzp-codec -p wzp-client: 0 errors - cargo check -p wzp-desktop: 0 Rust errors (only the pre-existing tauri::generate_context!() proc macro panic on missing ../dist which fires at host check time, irrelevant on the remote build) - cargo test -p wzp-codec --lib: 69 passing (no regressions) Co-Authored-By: Claude Opus 4.6 (1M context) --- crates/wzp-codec/src/opus_enc.rs | 24 +++++++++++- desktop/src-tauri/src/engine.rs | 64 ++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 2 deletions(-) diff --git a/crates/wzp-codec/src/opus_enc.rs b/crates/wzp-codec/src/opus_enc.rs index 5892fea..43bf756 100644 --- a/crates/wzp-codec/src/opus_enc.rs +++ b/crates/wzp-codec/src/opus_enc.rs @@ -38,10 +38,18 @@ //! keeping the bitrate overhead modest compared to 25%. Real measurements //! from the quality adapter override upward when loss exceeds the floor. +use std::sync::OnceLock; + use opusic_c::{Application, Bitrate, Channels, Encoder, InbandFec, SampleRate, Signal}; -use tracing::{debug, warn}; +use tracing::{debug, info, warn}; use wzp_proto::{AudioEncoder, CodecError, CodecId, QualityProfile}; +/// Logged exactly once per process the first time an OpusEncoder is built. +/// Confirms that libopus 1.5.2 (the version with DRED) is actually linked +/// at runtime — invaluable when chasing "is the new codec loaded?" +/// regressions on Android, where the only debug surface is logcat. +static LIBOPUS_VERSION_LOGGED: OnceLock<()> = OnceLock::new(); + /// Minimum `OPUS_SET_PACKET_LOSS_PERC` value used in DRED mode. libopus /// scales the DRED emission window with the assumed loss percentage: /// empirically, 5% gives a 15 ms window (useless), 10% gives 55 ms, 15% @@ -178,7 +186,11 @@ impl OpusEncoder { .set_packet_loss(DRED_LOSS_FLOOR_PCT) .map_err(|e| CodecError::EncodeFailed(format!("set packet loss floor: {e:?}")))?; - debug!( + // Bumped from debug! to info! so the DRED config is visible in + // logcat without enabling debug-level filtering. Each call's + // first OpusEncoder construction will log this; subsequent + // profile switches log it again with the new tier. + info!( codec = ?codec, dred_frames, dred_ms = dred_frames as u32 * 10, @@ -186,6 +198,14 @@ impl OpusEncoder { "opus encoder: DRED enabled" ); + // One-shot logging of the linked libopus version so we can + // confirm at a glance that opusic-c (libopus 1.5.2) is loaded. + // Pre-Phase-0 audiopus shipped libopus 1.3 which has no DRED; + // if this log says "libopus 1.3" something is very wrong. + LIBOPUS_VERSION_LOGGED.get_or_init(|| { + info!(libopus_version = %opusic_c::version(), "linked libopus version"); + }); + Ok(()) } diff --git a/desktop/src-tauri/src/engine.rs b/desktop/src-tauri/src/engine.rs index eefba1e..884095b 100644 --- a/desktop/src-tauri/src/engine.rs +++ b/desktop/src-tauri/src/engine.rs @@ -113,6 +113,12 @@ struct DredRecvState { expected_seq: Option, pub dred_reconstructions: u64, pub classical_plc_invocations: u64, + /// Number of arriving Opus packets we have parsed for DRED so far — + /// used to throttle the periodic "DRED state observed" log to one + /// line every N packets so logcat doesn't drown. + parses_total: u64, + /// Counter of parses that yielded a non-zero `samples_available`. + parses_with_data: u64, } impl DredRecvState { @@ -128,6 +134,8 @@ impl DredRecvState { expected_seq: None, dred_reconstructions: 0, classical_plc_invocations: 0, + parses_total: 0, + parses_with_data: 0, } } @@ -138,10 +146,29 @@ impl DredRecvState { /// Call this BEFORE `fill_gap_to` so the anchor reflects the freshest /// DRED source available for gap reconstruction. fn ingest_opus(&mut self, seq: u16, payload: &[u8]) { + self.parses_total += 1; match self.dred_decoder.parse_into(&mut self.scratch, payload) { Ok(available) if available > 0 => { + self.parses_with_data += 1; std::mem::swap(&mut self.scratch, &mut self.last_good); self.last_good_seq = Some(seq); + + // First successful parse on this call: log loudly so the + // user can see "DRED is on the wire" in logcat. After + // that, sample every 100th parse to confirm the window + // is steady-state without drowning the log. + let should_log = self.parses_with_data == 1 + || self.parses_with_data % 100 == 0; + if should_log { + info!( + seq, + samples_available = available, + ms = available / 48, + parses_with_data = self.parses_with_data, + parses_total = self.parses_total, + "DRED state parsed from Opus packet" + ); + } } _ => { // Packet carried no DRED data, or parse failed — keep @@ -198,11 +225,44 @@ impl DredRecvState { match reconstructed { Some(_n) => { self.dred_reconstructions += 1; + // Log every DRED reconstruction. These are + // rare events on a clean network — when + // they fire, we want to know exactly which + // gap was filled and how the offset math + // played out. Acceptable to be chatty here. + info!( + missing_seq, + anchor_seq = ?self.last_good_seq, + offset_samples, + offset_ms = offset_samples / 48, + samples_available = available, + gap_size = gap, + total_dred_recoveries = self.dred_reconstructions, + "DRED reconstruction fired for missing frame" + ); emit(out); } None => { if decoder.decode_lost(out).is_ok() { self.classical_plc_invocations += 1; + // Log the first few classical PLC fills + // and then sample, so we can see when + // DRED couldn't cover a gap. The reason + // is whichever check failed in the if + // above (offset out of range, no good + // state, or reconstruct error). + if self.classical_plc_invocations <= 3 + || self.classical_plc_invocations % 50 == 0 + { + info!( + missing_seq, + anchor_seq = ?self.last_good_seq, + offset_samples, + samples_available = available, + total_classical_plc = self.classical_plc_invocations, + "classical PLC fill (DRED could not cover gap)" + ); + } emit(out); } } @@ -643,6 +703,10 @@ impl CallEngine { written_samples, decode_errs, codec = ?current_codec, + dred_recv = dred_recv.dred_reconstructions, + classical_plc = dred_recv.classical_plc_invocations, + dred_parses_with_data = dred_recv.parses_with_data, + dred_parses_total = dred_recv.parses_total, "recv heartbeat (android)" ); heartbeat = std::time::Instant::now();