feat(observability): logcat-visible DRED proof of life on Android
Adds enough INFO-level logging that an opus-DRED-v2 APK on Android can
be verified end-to-end by reading logcat alone — no debugger, no
Prometheus, no telemetry pipeline required. Three observation points:
1. Encoder construction (opus_enc.rs)
- Bumped the "DRED enabled" log from debug! to info! so the
per-call DRED config is in logcat by default. Each call's first
OpusEncoder construction logs codec, dred_frames, dred_ms,
loss_floor_pct.
- Added a one-shot static OnceLock that logs `opusic_c::version()`
the first time an OpusEncoder is built in the process. This is
the smoking gun for "is the new libopus actually loaded" — pre-
Phase-0 audiopus shipped libopus 1.3 with no DRED, post-Phase-0
should print 1.5.2 here.
2. DRED state ingest (DredRecvState::ingest_opus in
desktop/src-tauri/src/engine.rs)
- First successful parse on a call logs immediately so we can see
"DRED is on the wire" in logcat.
- Subsequent parses sample every 100th to confirm steady-state
samples_available without drowning the log.
- New parses_total / parses_with_data counters track the parse
rate vs the success rate (a packet without DRED in it returns
`available == 0`, so a low ratio means the encoder isn't
emitting DRED bytes).
3. DRED reconstruction events (DredRecvState::fill_gap_to)
- Every DRED reconstruction logs at INFO with missing_seq,
anchor_seq, offset_samples, offset_ms, samples_available,
gap_size, and the running total. These events are rare on a
clean network and we want to know exactly which gap was filled.
- First three classical PLC fills + every 50th thereafter log so
we can see when DRED couldn't cover a gap (offset out of range,
no good state, or reconstruct error).
4. Recv heartbeat (Android start() in engine.rs)
- Existing 2-second heartbeat now includes dred_recv,
classical_plc, dred_parses_with_data, dred_parses_total
so a steady-state call shows the cumulative counters in
logcat without parsing.
How to verify on a real call:
adb logcat -s 'RustStdoutStderr:*' | grep -i 'dred\|libopus version'
Expected output sequence on a successful Opus call:
- "linked libopus version libopus_version=libopus 1.5.2-..." (once per process)
- "opus encoder: DRED enabled codec=Opus24k dred_frames=20 dred_ms=200 loss_floor_pct=15" (per call)
- "DRED state parsed from Opus packet seq=N samples_available=4560 ms=95 ..." (after first DRED-bearing packet)
- "recv heartbeat (android) ... dred_recv=0 classical_plc=0 dred_parses_with_data=58 dred_parses_total=58" (every 2s)
If you see "linked libopus version libopus 1.3" — the FFI swap didn't
take. If dred_parses_with_data stays at 0 while dred_parses_total
climbs — the sender isn't emitting DRED (check the encoder's loss
floor and the receiver's libopus version). If gaps trigger
"classical PLC fill" instead of "DRED reconstruction fired" —
DRED state coverage is too small for the observed loss pattern,
and the loss floor or DRED duration policy needs tuning.
Verification:
- cargo check -p wzp-codec -p wzp-client: 0 errors
- cargo check -p wzp-desktop: 0 Rust errors (only the pre-existing
tauri::generate_context!() proc macro panic on missing ../dist
which fires at host check time, irrelevant on the remote build)
- cargo test -p wzp-codec --lib: 69 passing (no regressions)
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -38,10 +38,18 @@
|
||||
//! keeping the bitrate overhead modest compared to 25%. Real measurements
|
||||
//! from the quality adapter override upward when loss exceeds the floor.
|
||||
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use opusic_c::{Application, Bitrate, Channels, Encoder, InbandFec, SampleRate, Signal};
|
||||
use tracing::{debug, warn};
|
||||
use tracing::{debug, info, warn};
|
||||
use wzp_proto::{AudioEncoder, CodecError, CodecId, QualityProfile};
|
||||
|
||||
/// Logged exactly once per process the first time an OpusEncoder is built.
|
||||
/// Confirms that libopus 1.5.2 (the version with DRED) is actually linked
|
||||
/// at runtime — invaluable when chasing "is the new codec loaded?"
|
||||
/// regressions on Android, where the only debug surface is logcat.
|
||||
static LIBOPUS_VERSION_LOGGED: OnceLock<()> = OnceLock::new();
|
||||
|
||||
/// Minimum `OPUS_SET_PACKET_LOSS_PERC` value used in DRED mode. libopus
|
||||
/// scales the DRED emission window with the assumed loss percentage:
|
||||
/// empirically, 5% gives a 15 ms window (useless), 10% gives 55 ms, 15%
|
||||
@@ -178,7 +186,11 @@ impl OpusEncoder {
|
||||
.set_packet_loss(DRED_LOSS_FLOOR_PCT)
|
||||
.map_err(|e| CodecError::EncodeFailed(format!("set packet loss floor: {e:?}")))?;
|
||||
|
||||
debug!(
|
||||
// Bumped from debug! to info! so the DRED config is visible in
|
||||
// logcat without enabling debug-level filtering. Each call's
|
||||
// first OpusEncoder construction will log this; subsequent
|
||||
// profile switches log it again with the new tier.
|
||||
info!(
|
||||
codec = ?codec,
|
||||
dred_frames,
|
||||
dred_ms = dred_frames as u32 * 10,
|
||||
@@ -186,6 +198,14 @@ impl OpusEncoder {
|
||||
"opus encoder: DRED enabled"
|
||||
);
|
||||
|
||||
// One-shot logging of the linked libopus version so we can
|
||||
// confirm at a glance that opusic-c (libopus 1.5.2) is loaded.
|
||||
// Pre-Phase-0 audiopus shipped libopus 1.3 which has no DRED;
|
||||
// if this log says "libopus 1.3" something is very wrong.
|
||||
LIBOPUS_VERSION_LOGGED.get_or_init(|| {
|
||||
info!(libopus_version = %opusic_c::version(), "linked libopus version");
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -113,6 +113,12 @@ struct DredRecvState {
|
||||
expected_seq: Option<u16>,
|
||||
pub dred_reconstructions: u64,
|
||||
pub classical_plc_invocations: u64,
|
||||
/// Number of arriving Opus packets we have parsed for DRED so far —
|
||||
/// used to throttle the periodic "DRED state observed" log to one
|
||||
/// line every N packets so logcat doesn't drown.
|
||||
parses_total: u64,
|
||||
/// Counter of parses that yielded a non-zero `samples_available`.
|
||||
parses_with_data: u64,
|
||||
}
|
||||
|
||||
impl DredRecvState {
|
||||
@@ -128,6 +134,8 @@ impl DredRecvState {
|
||||
expected_seq: None,
|
||||
dred_reconstructions: 0,
|
||||
classical_plc_invocations: 0,
|
||||
parses_total: 0,
|
||||
parses_with_data: 0,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -138,10 +146,29 @@ impl DredRecvState {
|
||||
/// Call this BEFORE `fill_gap_to` so the anchor reflects the freshest
|
||||
/// DRED source available for gap reconstruction.
|
||||
fn ingest_opus(&mut self, seq: u16, payload: &[u8]) {
|
||||
self.parses_total += 1;
|
||||
match self.dred_decoder.parse_into(&mut self.scratch, payload) {
|
||||
Ok(available) if available > 0 => {
|
||||
self.parses_with_data += 1;
|
||||
std::mem::swap(&mut self.scratch, &mut self.last_good);
|
||||
self.last_good_seq = Some(seq);
|
||||
|
||||
// First successful parse on this call: log loudly so the
|
||||
// user can see "DRED is on the wire" in logcat. After
|
||||
// that, sample every 100th parse to confirm the window
|
||||
// is steady-state without drowning the log.
|
||||
let should_log = self.parses_with_data == 1
|
||||
|| self.parses_with_data % 100 == 0;
|
||||
if should_log {
|
||||
info!(
|
||||
seq,
|
||||
samples_available = available,
|
||||
ms = available / 48,
|
||||
parses_with_data = self.parses_with_data,
|
||||
parses_total = self.parses_total,
|
||||
"DRED state parsed from Opus packet"
|
||||
);
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
// Packet carried no DRED data, or parse failed — keep
|
||||
@@ -198,11 +225,44 @@ impl DredRecvState {
|
||||
match reconstructed {
|
||||
Some(_n) => {
|
||||
self.dred_reconstructions += 1;
|
||||
// Log every DRED reconstruction. These are
|
||||
// rare events on a clean network — when
|
||||
// they fire, we want to know exactly which
|
||||
// gap was filled and how the offset math
|
||||
// played out. Acceptable to be chatty here.
|
||||
info!(
|
||||
missing_seq,
|
||||
anchor_seq = ?self.last_good_seq,
|
||||
offset_samples,
|
||||
offset_ms = offset_samples / 48,
|
||||
samples_available = available,
|
||||
gap_size = gap,
|
||||
total_dred_recoveries = self.dred_reconstructions,
|
||||
"DRED reconstruction fired for missing frame"
|
||||
);
|
||||
emit(out);
|
||||
}
|
||||
None => {
|
||||
if decoder.decode_lost(out).is_ok() {
|
||||
self.classical_plc_invocations += 1;
|
||||
// Log the first few classical PLC fills
|
||||
// and then sample, so we can see when
|
||||
// DRED couldn't cover a gap. The reason
|
||||
// is whichever check failed in the if
|
||||
// above (offset out of range, no good
|
||||
// state, or reconstruct error).
|
||||
if self.classical_plc_invocations <= 3
|
||||
|| self.classical_plc_invocations % 50 == 0
|
||||
{
|
||||
info!(
|
||||
missing_seq,
|
||||
anchor_seq = ?self.last_good_seq,
|
||||
offset_samples,
|
||||
samples_available = available,
|
||||
total_classical_plc = self.classical_plc_invocations,
|
||||
"classical PLC fill (DRED could not cover gap)"
|
||||
);
|
||||
}
|
||||
emit(out);
|
||||
}
|
||||
}
|
||||
@@ -643,6 +703,10 @@ impl CallEngine {
|
||||
written_samples,
|
||||
decode_errs,
|
||||
codec = ?current_codec,
|
||||
dred_recv = dred_recv.dred_reconstructions,
|
||||
classical_plc = dred_recv.classical_plc_invocations,
|
||||
dred_parses_with_data = dred_recv.parses_with_data,
|
||||
dred_parses_total = dred_recv.parses_total,
|
||||
"recv heartbeat (android)"
|
||||
);
|
||||
heartbeat = std::time::Instant::now();
|
||||
|
||||
Reference in New Issue
Block a user