feat(debug): per-candidate dial diagnostics in dual-path race
Some checks failed
Mirror to GitHub / mirror (push) Failing after 28s
Build Release Binaries / build-amd64 (push) Failing after 3m24s

Added CandidateDiag struct to RaceResult with per-candidate:
- address attempted
- result (ok / skipped:ipv6 / error:reason)
- elapsed time in ms

Surfaced in call-debug events:
- connect:dual_path_race_start now includes dial_order + peer_mapped
- connect:dual_path_race_done now includes candidate_diags array

Upgraded dual_path tracing from debug to info for IPv6 skips and
dial failures so they appear in logcat/console.

Helps diagnose why P2P fails on specific networks (5G CGNAT,
address-restricted NATs, etc).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Siavash Sameni
2026-04-14 12:16:34 +04:00
parent 6805caae0e
commit c0dd6c06ff
2 changed files with 54 additions and 2 deletions

View File

@@ -38,6 +38,15 @@ pub enum WinningPath {
Relay,
}
/// Diagnostic info for a single candidate dial attempt.
#[derive(Debug, Clone, serde::Serialize)]
pub struct CandidateDiag {
pub index: usize,
pub addr: String,
pub result: String, // "ok", "skipped:ipv6", "error:..."
pub elapsed_ms: Option<u32>,
}
/// Phase 6: the race now returns BOTH transports (when available)
/// so the connect command can negotiate with the peer before
/// committing. The negotiation decides which transport to use
@@ -54,6 +63,8 @@ pub struct RaceResult {
/// Informational — the actual path used is decided by the
/// Phase 6 negotiation after both sides exchange reports.
pub local_winner: WinningPath,
/// Per-candidate diagnostic info for debugging.
pub candidate_diags: Vec<CandidateDiag>,
}
/// Attempt a direct QUIC connection to the peer in parallel with
@@ -152,6 +163,10 @@ pub async fn race(
// is created. Install attempt is idempotent.
let _ = rustls::crypto::ring::default_provider().install_default();
// Shared diagnostic collector for per-candidate results.
let diags_collector: Arc<std::sync::Mutex<Vec<CandidateDiag>>> =
Arc::new(std::sync::Mutex::new(Vec::new()));
// Build the direct-path endpoint + future based on role.
//
// A-role: one accept future on the shared endpoint. The
@@ -283,6 +298,7 @@ pub async fn race(
let _v6_ep_for_dial = ipv6_endpoint.clone();
let dial_order = peer_candidates.dial_order();
let sni = call_sni.clone();
let diags = diags_collector.clone();
direct_fut = Box::pin(async move {
if dial_order.is_empty() {
// No candidates — the race reduces to
@@ -311,17 +327,32 @@ pub async fn race(
// Re-enable once IPv6 datagram delivery is
// verified on target networks.
if candidate.is_ipv6() {
tracing::debug!(
tracing::info!(
%candidate,
candidate_idx = idx,
"dual_path: skipping IPv6 candidate (disabled)"
);
if let Ok(mut d) = diags.lock() {
d.push(CandidateDiag {
index: idx,
addr: candidate.to_string(),
result: "skipped:ipv6".into(),
elapsed_ms: None,
});
}
continue;
}
let ep = ep_for_fut.clone();
let client_cfg = wzp_transport::client_config();
let sni = sni.clone();
let diags_inner = diags.clone();
set.spawn(async move {
let start = std::time::Instant::now();
tracing::info!(
%candidate,
candidate_idx = idx,
"dual_path: dialing candidate"
);
let result = wzp_transport::connect(
&ep,
candidate,
@@ -329,6 +360,19 @@ pub async fn race(
client_cfg,
)
.await;
let elapsed = start.elapsed().as_millis() as u32;
let diag_result = match &result {
Ok(_) => "ok".to_string(),
Err(e) => format!("error:{e}"),
};
if let Ok(mut d) = diags_inner.lock() {
d.push(CandidateDiag {
index: idx,
addr: candidate.to_string(),
result: diag_result,
elapsed_ms: Some(elapsed),
});
}
(idx, candidate, result)
});
}
@@ -357,7 +401,7 @@ pub async fn race(
return Ok(QuinnTransport::new(conn));
}
Err(e) => {
tracing::debug!(
tracing::info!(
%candidate,
candidate_idx = idx,
error = %e,
@@ -545,6 +589,10 @@ pub async fn race(
let _ = (direct_ep, relay_ep, ipv6_endpoint);
let candidate_diags = diags_collector.lock()
.map(|d| d.clone())
.unwrap_or_default();
Ok(RaceResult {
direct_transport: direct_result
.and_then(|r| r.ok())
@@ -553,6 +601,7 @@ pub async fn race(
.and_then(|r| r.ok())
.map(|t| Arc::new(t)),
local_winner,
candidate_diags,
})
}

View File

@@ -456,7 +456,9 @@ async fn connect(
emit_call_debug(&app, "connect:dual_path_race_start", serde_json::json!({
"role": format!("{:?}", r),
"peer_reflex": peer_addr_parsed.map(|a| a.to_string()),
"peer_mapped": peer_mapped_parsed.map(|a| a.to_string()),
"peer_local": peer_local_parsed.iter().map(|a| a.to_string()).collect::<Vec<_>>(),
"dial_order": candidates.dial_order().iter().map(|a| a.to_string()).collect::<Vec<_>>(),
"relay_addr": relay_sockaddr.to_string(),
"own_reflex_addr": own_reflex_addr,
}));
@@ -498,6 +500,7 @@ async fn connect(
"local_winner": format!("{:?}", local_winner),
"local_direct_ok": local_direct_ok,
"has_relay": race_result.relay_transport.is_some(),
"candidate_diags": race_result.candidate_diags,
}));
// Phase 6: send our report to the peer and