fix(nat): working NAT tickle + smart filter debug + timeout diags
Some checks failed
Mirror to GitHub / mirror (push) Failing after 27s
Build Release Binaries / build-amd64 (push) Failing after 3m39s

Fixes from real-world 5G↔Starlink testing:

NAT tickle fix:
- tokio::net::UdpSocket::bind() doesn't set SO_REUSEADDR, so binding
  to the same port as quinn silently failed. Now uses socket2::Socket
  with explicit SO_REUSEADDR + SO_REUSEPORT (via libc on unix).
- Tickle now logs success/failure for debugging.

Diagnostic fixes:
- connect:dual_path_race_start shows both dial_order_raw and
  dial_order_smart so we can see what filtering removed
- Grace-period timeout (relay wins first, direct still running)
  now fills "timeout:grace" diags for unrecorded candidates
- Previously candidate_diags was empty when relay won the race

Dependencies:
- Added socket2 = "0.5" to wzp-client

593 tests pass, 0 regressions.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Siavash Sameni
2026-04-14 15:58:13 +04:00
parent bc6d327ebb
commit 1de280fe04
4 changed files with 67 additions and 26 deletions

View File

@@ -453,21 +453,18 @@ async fn connect(
own = ?own_reflex_addr,
"connect: starting dual-path race"
);
let own_reflex_parsed: Option<std::net::SocketAddr> =
own_reflex_addr.as_deref().and_then(|s| s.parse().ok());
emit_call_debug(&app, "connect:dual_path_race_start", serde_json::json!({
"role": format!("{:?}", r),
"peer_reflex": peer_addr_parsed.map(|a| a.to_string()),
"peer_mapped": peer_mapped_parsed.map(|a| a.to_string()),
"peer_local": peer_local_parsed.iter().map(|a| a.to_string()).collect::<Vec<_>>(),
"dial_order": candidates.dial_order().iter().map(|a| a.to_string()).collect::<Vec<_>>(),
"dial_order_raw": candidates.dial_order().iter().map(|a| a.to_string()).collect::<Vec<_>>(),
"dial_order_smart": candidates.smart_dial_order(own_reflex_parsed.as_ref()).iter().map(|a| a.to_string()).collect::<Vec<_>>(),
"relay_addr": relay_sockaddr.to_string(),
"own_reflex_addr": own_reflex_addr,
}));
// Phase 6 fix: install the oneshot BEFORE the race
// starts. The peer's MediaPathReport can arrive
// while our race is still running — if we set up
// the oneshot after the race, the recv loop has
// nowhere to send the report and it gets dropped,
// causing a 3s timeout and false relay fallback.
let (path_report_tx, path_report_rx) = tokio::sync::oneshot::channel::<bool>();
{
let mut sig = state.signal.lock().await;
@@ -476,8 +473,6 @@ async fn connect(
let room_sni = room.clone();
let call_sni = format!("call-{room}");
let own_reflex_parsed: Option<std::net::SocketAddr> =
own_reflex_addr.as_deref().and_then(|s| s.parse().ok());
match wzp_client::dual_path::race(
r,
candidates,