diag(android-engine): first-join no-audio ordering instrumentation

Adds a single call_t0 = Instant::now() at the top of the Android
CallEngine::start path, threaded through send + recv tasks as
send_t0 / recv_t0, and tags the following milestones with
t_ms_since_call_start so we can build a clean side-by-side log of
first-call vs rejoin:

  1. QUIC connection established
  2. handshake complete
  3. wzp-native audio_start returned (+ how long audio_start itself took)
  4. send task spawned
  5. send: first full capture frame read (+ short_reads_before count)
  6. send: first non-zero capture RMS
  7. recv task spawned
  8. recv: first media packet received
  9. recv: first successful decode
 10. recv: first playout-ring write

Combined with the existing C++-side cb#0 logs in
crates/wzp-native/cpp/oboe_bridge.cpp ("capture cb#0", "playout
cb#0") this gives us full-pipeline ordering with no native-side
changes needed.

PRD: .taskmaster/docs/prd_android_first_join_no_audio.txt
Task: 32 (first task in the chain — diagnostics before any fix
attempts so we know which of the 5 suspect causes is real).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Siavash Sameni
2026-04-11 10:00:20 +04:00
parent 578ff8cff4
commit 7e7968b2f9

View File

@@ -302,7 +302,14 @@ impl CallEngine {
where
F: Fn(&str, &str) + Send + Sync + 'static,
{
info!(%relay, %room, %alias, %quality, has_reuse = reuse_endpoint.is_some(), "CallEngine::start (android) invoked");
// Single "call epoch" timestamp threaded through send + recv tasks
// so every milestone log can carry t_ms_since_call_start. Used to
// diagnose the first-join no-audio regression by giving us a clean
// ordering between audio_start, first capture, first recv, first
// decode, first playout-ring write, and the C++ Oboe first-callback
// logs (which already exist in cpp/oboe_bridge.cpp).
let call_t0 = std::time::Instant::now();
info!(%relay, %room, %alias, %quality, has_reuse = reuse_endpoint.is_some(), t_ms = 0u128, "CallEngine::start (android) invoked");
let _ = rustls::crypto::ring::default_provider().install_default();
let relay_addr: SocketAddr = relay.parse()?;
@@ -348,7 +355,7 @@ impl CallEngine {
return Err(anyhow::anyhow!("QUIC connect timeout (10s)"));
}
};
info!("QUIC connection established, performing handshake");
info!(t_ms = call_t0.elapsed().as_millis(), "first-join diag: QUIC connection established, performing handshake");
let transport = Arc::new(wzp_transport::QuinnTransport::new(conn));
let _session = wzp_client::handshake::perform_handshake(
@@ -358,7 +365,7 @@ impl CallEngine {
)
.await
.map_err(|e| { error!("perform_handshake failed: {e}"); e })?;
info!("connected to relay, handshake complete");
info!(t_ms = call_t0.elapsed().as_millis(), "first-join diag: connected to relay, handshake complete");
event_cb("connected", &format!("joined room {room}"));
// Oboe audio via the wzp-native cdylib that was dlopen'd at
@@ -370,10 +377,21 @@ impl CallEngine {
"wzp-native not loaded — dlopen failed at startup"
));
}
let t_pre_audio = call_t0.elapsed().as_millis();
if let Err(code) = crate::wzp_native::audio_start() {
return Err(anyhow::anyhow!("wzp_native_audio_start failed: code {code}"));
}
info!("wzp-native audio started");
// Diagnostic: how long did audio_start() take, and at what
// wall-clock offset from CallEngine::start did it complete?
// Compare to the C++ "playout cb#0" log timestamp in logcat to
// see whether the Oboe playout callback fires before or after
// the recv task starts pushing decoded frames.
let t_audio_start_done = call_t0.elapsed().as_millis();
info!(
t_ms = t_audio_start_done,
audio_start_ms = t_audio_start_done.saturating_sub(t_pre_audio),
"first-join diag: wzp-native audio started"
);
let running = Arc::new(AtomicBool::new(true));
let mic_muted = Arc::new(AtomicBool::new(false));
@@ -394,6 +412,7 @@ impl CallEngine {
let send_drops = Arc::new(AtomicU64::new(0));
let send_quality = quality.clone();
let send_tx_codec = tx_codec.clone();
let send_t0 = call_t0;
tokio::spawn(async move {
let profile = resolve_quality(&send_quality);
let config = match profile {
@@ -409,7 +428,7 @@ impl CallEngine {
},
};
let frame_samples = (config.profile.frame_duration_ms as usize) * 48;
info!(codec = ?config.profile.codec, frame_samples, "send task starting (android/oboe)");
info!(codec = ?config.profile.codec, frame_samples, t_ms = send_t0.elapsed().as_millis(), "first-join diag: send task spawned (android/oboe)");
*send_tx_codec.lock().await = format!("{:?}", config.profile.codec);
let mut encoder = CallEncoder::new(&config);
encoder.set_aec_enabled(false);
@@ -419,6 +438,13 @@ impl CallEngine {
let mut last_rms: u32 = 0;
let mut last_pkt_bytes: usize = 0;
let mut short_reads: u64 = 0;
// First-join diagnostic: latch the wall-clock offset of the
// first full-frame capture read and the first non-zero RMS
// reading separately. The gap between them tells us how long
// Oboe input took to actually start delivering real samples
// after returning a "started" status from audio_start.
let mut first_full_read_logged = false;
let mut first_nonzero_rms_logged = false;
loop {
if !send_r.load(Ordering::Relaxed) {
@@ -434,12 +460,29 @@ impl CallEngine {
tokio::time::sleep(std::time::Duration::from_millis(5)).await;
continue;
}
if !first_full_read_logged {
info!(
t_ms = send_t0.elapsed().as_millis(),
short_reads_before = short_reads,
frame_samples,
"first-join diag: send first full capture frame read"
);
first_full_read_logged = true;
}
// RMS for UI meter
let sum_sq: f64 = buf.iter().map(|&s| (s as f64) * (s as f64)).sum();
let rms = (sum_sq / buf.len() as f64).sqrt() as u32;
send_level.store(rms, Ordering::Relaxed);
last_rms = rms;
if !first_nonzero_rms_logged && rms > 0 {
info!(
t_ms = send_t0.elapsed().as_millis(),
rms,
"first-join diag: send first non-zero capture RMS"
);
first_nonzero_rms_logged = true;
}
if send_mic.load(Ordering::Relaxed) {
buf.fill(0);
@@ -483,6 +526,7 @@ impl CallEngine {
let recv_spk = spk_muted.clone();
let recv_fr = frames_received.clone();
let recv_rx_codec = rx_codec.clone();
let recv_t0 = call_t0;
tokio::spawn(async move {
let initial_profile = resolve_quality(&quality).unwrap_or(QualityProfile::GOOD);
// Phase 3b/3c: use concrete AdaptiveDecoder (not Box<dyn
@@ -497,7 +541,11 @@ impl CallEngine {
// Phase 3b/3c DRED reconstruction state — see DredRecvState
// above for the full flow.
let mut dred_recv = DredRecvState::new();
info!(codec = ?current_codec, "recv task starting (android/oboe)");
info!(codec = ?current_codec, t_ms = recv_t0.elapsed().as_millis(), "first-join diag: recv task spawned (android/oboe)");
// First-join diagnostic latches — see send task above for the
// sibling capture milestones.
let mut first_decode_logged = false;
let mut first_playout_write_logged = false;
// ─── Decoded-PCM recorder (debug) ────────────────────────────
// Dumps the first ~10 seconds of post-AGC PCM to a raw i16 LE
@@ -546,7 +594,13 @@ impl CallEngine {
{
Ok(Ok(Some(pkt))) => {
if !first_packet_logged {
info!(codec_id = ?pkt.header.codec_id, payload_bytes = pkt.payload.len(), is_repair = pkt.header.is_repair, "recv: first media packet received");
info!(
t_ms = recv_t0.elapsed().as_millis(),
codec_id = ?pkt.header.codec_id,
payload_bytes = pkt.payload.len(),
is_repair = pkt.header.is_repair,
"first-join diag: recv first media packet"
);
first_packet_logged = true;
}
if !pkt.header.is_repair && pkt.header.codec_id != CodecId::ComfortNoise {
@@ -613,6 +667,15 @@ impl CallEngine {
Ok(n) => {
last_decode_n = n;
decoded_frames += 1;
if !first_decode_logged {
info!(
t_ms = recv_t0.elapsed().as_millis(),
n,
codec = ?current_codec,
"first-join diag: recv first successful decode"
);
first_decode_logged = true;
}
// Log sample range for the first few decoded frames and periodically
if decoded_frames <= 3 || decoded_frames % 100 == 0 {
let slice = &pcm[..n];
@@ -663,6 +726,15 @@ impl CallEngine {
if !recv_spk.load(Ordering::Relaxed) {
let w = crate::wzp_native::audio_write_playout(&pcm[..n]);
if !first_playout_write_logged {
info!(
t_ms = recv_t0.elapsed().as_millis(),
n,
w,
"first-join diag: recv first playout-ring write"
);
first_playout_write_logged = true;
}
last_written = w;
written_samples = written_samples.saturating_add(w as u64);
if w < n && decoded_frames <= 10 {