From 2aa65825857591999d1982c32ff5c7892a545af8 Mon Sep 17 00:00:00 2001 From: Siavash Sameni Date: Mon, 25 May 2026 07:38:38 +0400 Subject: [PATCH] fix(android): call-debug instrumentation for audio startup path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add emit_call_debug events at every step of the Android connect/audio path so failures are visible in the Settings debug log without needing adb logcat: - connect:handshake_start/done/failed (with timing) - connect:android_audio_preflight (wzp_native loaded + RECORD_AUDIO permission check via new has_record_audio_permission() JNI helper) - connect:audio_stop_start/done - connect:audio_mode_start/done/failed - connect:audio_start_start/failed/panic/done (with oboe error code) - connect:reuse_endpoint (endpoint reuse diagnostic) Also adds has_record_audio_permission() to android_audio.rs — used in the preflight event to confirm the OS has granted mic access before wzp_oboe_start is called. Co-Authored-By: Claude Sonnet 4.6 --- desktop/src-tauri/src/android_audio.rs | 24 +++++ desktop/src-tauri/src/engine.rs | 125 +++++++++++++++++++++-- desktop/src-tauri/src/lib.rs | 12 +++ docs/bugs/001-android-join-voice-hang.md | 8 ++ 4 files changed, 158 insertions(+), 11 deletions(-) diff --git a/desktop/src-tauri/src/android_audio.rs b/desktop/src-tauri/src/android_audio.rs index ffcc556..8f9c001 100644 --- a/desktop/src-tauri/src/android_audio.rs +++ b/desktop/src-tauri/src/android_audio.rs @@ -56,6 +56,30 @@ fn audio_manager<'local>( Ok(am) } +fn has_permission(permission: &str) -> Result { + let (vm, activity) = jvm_and_activity()?; + let mut env = vm + .attach_current_thread() + .map_err(|e| format!("attach_current_thread: {e}"))?; + let permission = env + .new_string(permission) + .map_err(|e| format!("new_string(permission): {e}"))?; + let result = env + .call_method( + &activity, + "checkSelfPermission", + "(Ljava/lang/String;)I", + &[JValue::Object(&permission)], + ) + .and_then(|v| v.i()) + .map_err(|e| format!("checkSelfPermission: {e}"))?; + Ok(result == 0) +} + +pub fn has_record_audio_permission() -> Result { + has_permission("android.permission.RECORD_AUDIO") +} + /// Set `AudioManager.MODE_IN_COMMUNICATION`. Call when a VoIP call starts. /// This tells the audio policy to route through the communication device /// path (earpiece/BT SCO) instead of the media path (speaker/BT A2DP). diff --git a/desktop/src-tauri/src/engine.rs b/desktop/src-tauri/src/engine.rs index 2cdda6c..770f683 100644 --- a/desktop/src-tauri/src/engine.rs +++ b/desktop/src-tauri/src/engine.rs @@ -544,13 +544,43 @@ impl CallEngine { // through the signal channel (DirectCallOffer/Answer carry // identity_pub + ephemeral_pub + signature). if !is_direct_p2p { - let _session = - wzp_client::handshake::perform_handshake(&*transport, &seed.0, Some(&alias)) - .await - .map_err(|e| { - error!("perform_handshake failed: {e}"); - e - })?; + crate::emit_call_debug( + &app, + "connect:handshake_start", + serde_json::json!({ + "t_ms": call_t0.elapsed().as_millis(), + "room": room, + "remote": transport.remote_address().to_string(), + }), + ); + let _session = match wzp_client::handshake::perform_handshake( + &*transport, + &seed.0, + Some(&alias), + ) + .await + { + Ok(session) => session, + Err(e) => { + error!("perform_handshake failed: {e}"); + crate::emit_call_debug( + &app, + "connect:handshake_failed", + serde_json::json!({ + "t_ms": call_t0.elapsed().as_millis(), + "error": e.to_string(), + }), + ); + return Err(e.into()); + } + }; + crate::emit_call_debug( + &app, + "connect:handshake_done", + serde_json::json!({ + "t_ms": call_t0.elapsed().as_millis(), + }), + ); info!( t_ms = call_t0.elapsed().as_millis(), "first-join diag: connected to relay, handshake complete" @@ -567,7 +597,19 @@ impl CallEngine { // startup. `wzp_native::audio_start()` brings up the capture + // playout streams; send/recv tasks below pull/push PCM through // the extern "C" bridge rings. - if !crate::wzp_native::is_loaded() { + let native_loaded = crate::wzp_native::is_loaded(); + crate::emit_call_debug( + &app, + "connect:android_audio_preflight", + serde_json::json!({ + "t_ms": call_t0.elapsed().as_millis(), + "wzp_native_loaded": native_loaded, + "record_audio_permission": crate::android_audio::has_record_audio_permission() + .map(|v| serde_json::json!(v)) + .unwrap_or_else(|e| serde_json::json!({ "error": e })), + }), + ); + if !native_loaded { return Err(anyhow::anyhow!( "wzp-native not loaded — dlopen failed at startup" )); @@ -584,7 +626,17 @@ impl CallEngine { // running stop first (no-op on cold start when not yet // started), we get the same "fresh rebuild" behavior on // every call. + crate::emit_call_debug( + &app, + "connect:audio_stop_start", + serde_json::json!({ "t_ms": call_t0.elapsed().as_millis() }), + ); crate::wzp_native::audio_stop(); + crate::emit_call_debug( + &app, + "connect:audio_stop_done", + serde_json::json!({ "t_ms": call_t0.elapsed().as_millis() }), + ); // Brief pause to let Android's audio routing + AudioManager // settle after the stop. 50ms is enough for the driver to // release the audio session; shorter risks the new start @@ -596,8 +648,28 @@ impl CallEngine { // (music drops from BT A2DP to earpiece, etc.). #[cfg(target_os = "android")] { - if let Err(e) = crate::android_audio::set_audio_mode_communication() { - tracing::warn!("set_audio_mode_communication failed: {e}"); + crate::emit_call_debug( + &app, + "connect:audio_mode_start", + serde_json::json!({ "t_ms": call_t0.elapsed().as_millis() }), + ); + match crate::android_audio::set_audio_mode_communication() { + Ok(()) => crate::emit_call_debug( + &app, + "connect:audio_mode_done", + serde_json::json!({ "t_ms": call_t0.elapsed().as_millis() }), + ), + Err(e) => { + tracing::warn!("set_audio_mode_communication failed: {e}"); + crate::emit_call_debug( + &app, + "connect:audio_mode_failed", + serde_json::json!({ + "t_ms": call_t0.elapsed().as_millis(), + "error": e, + }), + ); + } } } @@ -606,10 +678,33 @@ impl CallEngine { // HAL. Calling it directly blocks the tokio worker thread, // which freezes all async tasks including our own timeouts. let t_pre_audio = call_t0.elapsed().as_millis(); + crate::emit_call_debug( + &app, + "connect:audio_start_start", + serde_json::json!({ "t_ms": t_pre_audio }), + ); let audio_start_result = tokio::task::spawn_blocking(crate::wzp_native::audio_start) .await - .map_err(|e| anyhow::anyhow!("audio_start task panic: {e}"))?; + .map_err(|e| { + crate::emit_call_debug( + &app, + "connect:audio_start_panic", + serde_json::json!({ + "t_ms": call_t0.elapsed().as_millis(), + "error": e.to_string(), + }), + ); + anyhow::anyhow!("audio_start task panic: {e}") + })?; if let Err(code) = audio_start_result { + crate::emit_call_debug( + &app, + "connect:audio_start_failed", + serde_json::json!({ + "t_ms": call_t0.elapsed().as_millis(), + "code": code, + }), + ); return Err(anyhow::anyhow!( "wzp_native_audio_start failed: code {code}" )); @@ -633,6 +728,14 @@ impl CallEngine { audio_start_ms = t_audio_start_done.saturating_sub(t_pre_audio), "first-join diag: wzp-native audio started (with stop+prime cycle)" ); + crate::emit_call_debug( + &app, + "connect:audio_start_done", + serde_json::json!({ + "t_ms": t_audio_start_done, + "audio_start_ms": t_audio_start_done.saturating_sub(t_pre_audio), + }), + ); let running = Arc::new(AtomicBool::new(true)); let mic_muted = Arc::new(AtomicBool::new(false)); diff --git a/desktop/src-tauri/src/lib.rs b/desktop/src-tauri/src/lib.rs index e4dbd6d..4a5da33 100644 --- a/desktop/src-tauri/src/lib.rs +++ b/desktop/src-tauri/src/lib.rs @@ -772,6 +772,18 @@ async fn connect( if reuse_endpoint.is_some() && pre_connected_transport.is_none() { tracing::info!("connect: reusing existing signal endpoint for media connection"); } + emit_call_debug( + &app, + "connect:reuse_endpoint", + serde_json::json!({ + "has_reuse_endpoint": reuse_endpoint.is_some(), + "reuse_local_addr": reuse_endpoint + .as_ref() + .and_then(|ep| ep.local_addr().ok()) + .map(|addr| addr.to_string()), + "has_pre_connected_transport": pre_connected_transport.is_some(), + }), + ); let app_clone = app.clone(); // Log transport details for debugging direct P2P media issues diff --git a/docs/bugs/001-android-join-voice-hang.md b/docs/bugs/001-android-join-voice-hang.md index 268fd5b..179627f 100644 --- a/docs/bugs/001-android-join-voice-hang.md +++ b/docs/bugs/001-android-join-voice-hang.md @@ -25,6 +25,8 @@ Working-tree diagnostic changes applied during this investigation: - `crates/wzp-native/cpp/oboe_bridge.cpp`: return `-6` if both streams do not reach `Started` before the 2s poll deadline. This turns Oboe false-success into a visible Rust/JS error. - `desktop/src/main.ts`: shared `connectWithTimeout()` for room joins and direct-call auto-connect; shared `errorMessage()` for useful toast text. +- `desktop/src-tauri/src/engine.rs`: emit `connect:handshake_*`, `connect:android_audio_preflight`, `connect:audio_*` markers around each Android-only join step. +- `desktop/src-tauri/src/lib.rs`: emit `connect:reuse_endpoint` so we can see whether the room join is sharing the signal QUIC endpoint. Next Android repro should distinguish: @@ -32,6 +34,7 @@ Next Android repro should distinguish: |---|---| | `Join failed: wzp_native_audio_start failed: code -2` | mic permission / capture open failure | | `Join failed: wzp_native_audio_start failed: code -6` | Oboe streams opened/requested start, but HAL never transitioned both to `Started` | +| `Join failed: transport: timeout after 10000ms` or similar after `connect:handshake_start` | QUIC connected, but relay media handshake did not return `CallAnswer` | | `Join failed: connect timed out (15s) - check audio permissions` | Tauri command did not resolve to JS; collect Rust/Tauri logs around `connect:call_engine_starting` | --- @@ -156,6 +159,11 @@ Key log lines to look for: | Log line | Diagnosis | |----------|-----------| +| `connect:reuse_endpoint` | Whether media is sharing the existing signal endpoint | +| `connect:handshake_start` followed by 10s timeout | Relay media handshake is stuck before Android audio starts | +| `connect:handshake_done` | Network/relay handshake succeeded; continue to audio diagnostics | +| `connect:android_audio_preflight` | Shows `wzp-native` load state and RECORD_AUDIO permission | +| `connect:audio_start_start` with no done/failed | Native Oboe call is hanging | | `wzp_oboe_start: already running` | Issue A — g_running not cleared | | `Failed to open capture stream: ErrorPermissionDenied` | Issue B — mic permission delay | | `Failed to start capture` / `Failed to start playout` | Oboe HAL error, code -4 or -5 |