fix(android): reuse signal endpoint for direct-call media connection
Direct-call accept hangs forever at the QUIC handshake on Android. Logs
from d7b37a5 showed:
CallEngine::start (android) invoked relay=172.16.81.172:4433 room=call-…
resolved relay addr
identity loaded
endpoint created, dialing relay ← reached
← nothing, 90s+, no error
The "connect failed" and "QUIC connection established" log lines never
fire, meaning endpoint.connect_with(…).await never makes progress.
Repro is 100%: SFU room join (one endpoint) works perfectly; direct call
(opens a SECOND quinn::Endpoint on top of the signal one) hangs in the
QUIC handshake. Creating two quinn::Endpoints on Android's AAudio-adjacent
UDP stack apparently causes the second one's datagrams to never reach the
relay (the server never sees the Initial packet). Rather than fight the
platform, quinn is happy to multiplex multiple Connections on a single
Endpoint — so we reuse the signal endpoint for the media connection.
- SignalState now stores the quinn::Endpoint alongside the QuinnTransport.
register_signal populates both at the same time.
- CallEngine::start (both android and desktop branches) takes an
Option<wzp_transport::Endpoint>. Some → reuse (direct-call path, after
register_signal). None → create fresh (SFU room join path).
- The connect tauri command reads state.signal.endpoint and threads it
through to CallEngine::start, so the direct-call auto-connect (fired by
the "setup" signal-event in main.ts) lands on the existing UDP socket.
- wzp_transport re-exports quinn::Endpoint so wzp-desktop doesn't need to
depend on quinn directly.
- Also wraps the android connect in tokio::time::timeout(10s) so future
hangs become deterministic "connect TIMED OUT" errors in logcat
instead of silent deadlock.
Same fix applies verbatim to the desktop client — the user suspects
direct call is broken there too and this was likely always the cause,
just never surfaced because desktop was only tested via SFU rooms.
This commit is contained in:
@@ -27,3 +27,8 @@ pub use connection::{accept, connect, create_endpoint};
|
||||
pub use path_monitor::PathMonitor;
|
||||
pub use quic::QuinnTransport;
|
||||
pub use wzp_proto::{MediaTransport, PathQuality, TransportError};
|
||||
|
||||
// Re-export the quinn Endpoint type so downstream crates (wzp-desktop) can
|
||||
// thread a shared endpoint between signaling and media connections without
|
||||
// needing to depend on quinn directly.
|
||||
pub use quinn::Endpoint;
|
||||
|
||||
@@ -105,12 +105,13 @@ impl CallEngine {
|
||||
alias: String,
|
||||
_os_aec: bool,
|
||||
quality: String,
|
||||
reuse_endpoint: Option<wzp_transport::Endpoint>,
|
||||
event_cb: F,
|
||||
) -> Result<Self, anyhow::Error>
|
||||
where
|
||||
F: Fn(&str, &str) + Send + Sync + 'static,
|
||||
{
|
||||
info!(%relay, %room, %alias, %quality, "CallEngine::start (android) invoked");
|
||||
info!(%relay, %room, %alias, %quality, has_reuse = reuse_endpoint.is_some(), "CallEngine::start (android) invoked");
|
||||
let _ = rustls::crypto::ring::default_provider().install_default();
|
||||
|
||||
let relay_addr: SocketAddr = relay.parse()?;
|
||||
@@ -124,14 +125,38 @@ impl CallEngine {
|
||||
info!(%fp, "identity loaded");
|
||||
|
||||
// QUIC transport + handshake.
|
||||
let bind_addr: SocketAddr = "0.0.0.0:0".parse().unwrap();
|
||||
let endpoint = wzp_transport::create_endpoint(bind_addr, None)
|
||||
.map_err(|e| { error!("create_endpoint failed: {e}"); e })?;
|
||||
info!("endpoint created, dialing relay");
|
||||
//
|
||||
// If a `reuse_endpoint` was passed in (the direct-call path, where we
|
||||
// already opened a quinn::Endpoint for the signal connection), reuse
|
||||
// it: a second quinn::Endpoint on Android silently fails to complete
|
||||
// the QUIC handshake against the same relay. Reusing the existing
|
||||
// socket lets quinn multiplex the signal + media connections on one
|
||||
// UDP port.
|
||||
let endpoint = if let Some(ep) = reuse_endpoint {
|
||||
info!(local_addr = ?ep.local_addr().ok(), "reusing signal endpoint for media connection");
|
||||
ep
|
||||
} else {
|
||||
let bind_addr: SocketAddr = "0.0.0.0:0".parse().unwrap();
|
||||
let ep = wzp_transport::create_endpoint(bind_addr, None)
|
||||
.map_err(|e| { error!("create_endpoint failed: {e}"); e })?;
|
||||
info!(local_addr = ?ep.local_addr().ok(), "created new endpoint, dialing relay");
|
||||
ep
|
||||
};
|
||||
let client_config = wzp_transport::client_config();
|
||||
let conn = wzp_transport::connect(&endpoint, relay_addr, &room, client_config)
|
||||
.await
|
||||
.map_err(|e| { error!("connect failed: {e}"); e })?;
|
||||
let conn = match tokio::time::timeout(
|
||||
std::time::Duration::from_secs(10),
|
||||
wzp_transport::connect(&endpoint, relay_addr, &room, client_config),
|
||||
).await {
|
||||
Ok(Ok(c)) => c,
|
||||
Ok(Err(e)) => {
|
||||
error!("connect failed: {e}");
|
||||
return Err(e.into());
|
||||
}
|
||||
Err(_) => {
|
||||
error!("connect TIMED OUT after 10s — QUIC handshake never completed. Relay may be unreachable from this endpoint.");
|
||||
return Err(anyhow::anyhow!("QUIC connect timeout (10s)"));
|
||||
}
|
||||
};
|
||||
info!("QUIC connection established, performing handshake");
|
||||
let transport = Arc::new(wzp_transport::QuinnTransport::new(conn));
|
||||
|
||||
@@ -378,6 +403,7 @@ impl CallEngine {
|
||||
alias: String,
|
||||
_os_aec: bool,
|
||||
quality: String,
|
||||
reuse_endpoint: Option<wzp_transport::Endpoint>,
|
||||
event_cb: F,
|
||||
) -> Result<Self, anyhow::Error>
|
||||
where
|
||||
@@ -418,9 +444,15 @@ impl CallEngine {
|
||||
let fingerprint = fp.to_string();
|
||||
info!(%fp, "identity loaded");
|
||||
|
||||
// Connect
|
||||
let bind_addr: SocketAddr = "0.0.0.0:0".parse().unwrap();
|
||||
let endpoint = wzp_transport::create_endpoint(bind_addr, None)?;
|
||||
// Connect — reuse the signal endpoint if the direct-call path gave us
|
||||
// one, otherwise create a fresh one (SFU room join path).
|
||||
let endpoint = if let Some(ep) = reuse_endpoint {
|
||||
info!("reusing signal endpoint for media connection");
|
||||
ep
|
||||
} else {
|
||||
let bind_addr: SocketAddr = "0.0.0.0:0".parse().unwrap();
|
||||
wzp_transport::create_endpoint(bind_addr, None)?
|
||||
};
|
||||
let client_config = wzp_transport::client_config();
|
||||
let conn = wzp_transport::connect(&endpoint, relay_addr, &room, client_config).await?;
|
||||
let transport = Arc::new(wzp_transport::QuinnTransport::new(conn));
|
||||
|
||||
@@ -243,8 +243,17 @@ async fn connect(
|
||||
return Err("already connected".into());
|
||||
}
|
||||
|
||||
// If we previously opened a quinn::Endpoint for the signaling connection
|
||||
// (direct-call path), reuse it so the media connection shares the same
|
||||
// UDP socket. This side-steps the Android issue where a second
|
||||
// quinn::Endpoint silently hangs in the QUIC handshake.
|
||||
let reuse_endpoint = state.signal.lock().await.endpoint.clone();
|
||||
if reuse_endpoint.is_some() {
|
||||
tracing::info!("connect: reusing existing signal endpoint for media connection");
|
||||
}
|
||||
|
||||
let app_clone = app.clone();
|
||||
match CallEngine::start(relay, room, alias, os_aec, quality, move |event_kind, message| {
|
||||
match CallEngine::start(relay, room, alias, os_aec, quality, reuse_endpoint, move |event_kind, message| {
|
||||
let _ = app_clone.emit(
|
||||
"call-event",
|
||||
CallEvent {
|
||||
@@ -341,6 +350,11 @@ async fn get_status(state: tauri::State<'_, Arc<AppState>>) -> Result<CallStatus
|
||||
|
||||
struct SignalState {
|
||||
transport: Option<Arc<wzp_transport::QuinnTransport>>,
|
||||
/// The quinn::Endpoint backing the signal connection. Reused for the
|
||||
/// media connection when a direct call is accepted — Android phones
|
||||
/// silently drop packets from a second quinn::Endpoint to the same
|
||||
/// relay, so every call after register_signal MUST share this socket.
|
||||
endpoint: Option<wzp_transport::Endpoint>,
|
||||
fingerprint: String,
|
||||
signal_status: String,
|
||||
incoming_call_id: Option<String>,
|
||||
@@ -380,7 +394,7 @@ async fn register_signal(
|
||||
_ => return Err("registration failed".into()),
|
||||
}
|
||||
|
||||
{ let mut sig = state.signal.lock().await; sig.transport = Some(transport.clone()); sig.fingerprint = fp.clone(); sig.signal_status = "registered".into(); }
|
||||
{ let mut sig = state.signal.lock().await; sig.transport = Some(transport.clone()); sig.endpoint = Some(endpoint.clone()); sig.fingerprint = fp.clone(); sig.signal_status = "registered".into(); }
|
||||
|
||||
tracing::info!(%fp, "signal registered, spawning recv loop");
|
||||
let signal_state = Arc::clone(&state.signal);
|
||||
@@ -483,7 +497,7 @@ pub fn run() {
|
||||
let state = Arc::new(AppState {
|
||||
engine: Mutex::new(None),
|
||||
signal: Arc::new(Mutex::new(SignalState {
|
||||
transport: None, fingerprint: String::new(), signal_status: "idle".into(),
|
||||
transport: None, endpoint: None, fingerprint: String::new(), signal_status: "idle".into(),
|
||||
incoming_call_id: None, incoming_caller_fp: None, incoming_caller_alias: None,
|
||||
})),
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user