From fa038df05744e5aaba1ff69f78473ac5ce9557e0 Mon Sep 17 00:00:00 2001 From: Siavash Sameni Date: Sun, 12 Apr 2026 07:34:49 +0400 Subject: [PATCH] =?UTF-8?q?feat(p2p):=20Phase=205.5=20=E2=80=94=20ICE=20LA?= =?UTF-8?q?N=20host=20candidates=20(IPv4=20+=20IPv6)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Same-LAN P2P was failing because MikroTik masquerade (like most consumer NATs) doesn't support NAT hairpinning — the advertised WAN reflex addr is unreachable from a peer on the same LAN as the advertiser. Phase 5 got us Cone NAT classification and fixed the measurement artifact, but same-LAN direct dials still had nowhere to land. Phase 5.5 adds ICE-style host candidates: each client enumerates its LAN-local network interface addresses, includes them in the DirectCallOffer/Answer alongside the reflex addr, and the dual-path race fans out to ALL peer candidates in parallel. Same-LAN peers find each other via their RFC1918 IPv4 + ULA / global-unicast IPv6 addresses without touching the NAT at all. Dual-stack IPv6 is in scope from the start — on modern ISPs (including Starlink) the v6 path often works even when v4 hairpinning doesn't, because there's no NAT on the v6 side. ## Changes ### `wzp_client::reflect::local_host_candidates(port)` (new) Enumerates network interfaces via `if-addrs` and returns SocketAddrs paired with the caller's port. Filters: - IPv4: RFC1918 (10/8, 172.16/12, 192.168/16) + CGNAT (100.64/10) - IPv6: global unicast (2000::/3) + ULA (fc00::/7) - Skipped: loopback, link-local (169.254, fe80::), public v4 (already covered by reflex-addr), unspecified Safe from any thread, one `getifaddrs(3)` syscall. ### Wire protocol (wzp-proto/packet.rs) Three new `#[serde(default, skip_serializing_if = "Vec::is_empty")]` fields, backward-compat with pre-5.5 clients/relays by construction: - `DirectCallOffer.caller_local_addrs: Vec` - `DirectCallAnswer.callee_local_addrs: Vec` - `CallSetup.peer_local_addrs: Vec` ### Call registry (wzp-relay/call_registry.rs) `DirectCall` gains `caller_local_addrs` + `callee_local_addrs` Vec fields. New `set_caller_local_addrs` / `set_callee_local_addrs` setters. Follow the same pattern as the reflex addr fields. ### Relay cross-wiring (wzp-relay/main.rs) Both the local-call and cross-relay-federation paths now track the local_addrs through the registry and inject them into the CallSetup's peer_local_addrs. Cross-wiring is identical to the existing peer_direct_addr logic — each party's CallSetup carries the OTHER party's LAN candidates. ### Client side (desktop/src-tauri/lib.rs) - `place_call`: gathers local host candidates via `local_host_candidates(signal_endpoint.local_addr().port())` and includes them in `DirectCallOffer.caller_local_addrs`. The port match is critical — it's the Phase 5 shared signal socket, so incoming dials to these addrs land on the same endpoint that's already listening. - `answer_call`: same, AcceptTrusted only (privacy mode keeps LAN addrs hidden too, for consistency with the reflex addr). - `connect` Tauri command: new `peer_local_addrs: Vec` arg. Builds a `PeerCandidates` bundle and passes it to the dual-path race. - Recv loop's CallSetup handler: destructures + forwards the new field to JS via the signal-event payload. ### `dual_path::race` (wzp-client/dual_path.rs) Signature change: takes `PeerCandidates` (reflex + local Vec) instead of a single SocketAddr. The D-role branch now fans out N parallel dials via `tokio::task::JoinSet` — one per candidate — and the first successful dial wins (losers are aborted immediately via `set.abort_all()`). Only when ALL candidates have failed do we return Err; individual candidate failures are just traced at debug level and the race waits for the others. LAN host candidates are tried BEFORE the reflex addr in `PeerCandidates::dial_order()` — they're faster when they work, and the reflex addr is the fallback for the not-on-same-LAN case. ### JS side (desktop/main.ts) `connect` invoke now passes `peerLocalAddrs: data.peer_local_addrs ?? []` alongside the existing `peerDirectAddr`. ### Tests All existing test callsites updated for the new Vec fields (defaults to Vec::new() in tests — they don't exercise the multi-candidate path). `dual_path.rs` integration tests wrap the single `dead_peer` / `acceptor_listen_addr` in a `PeerCandidates { reflexive: Some(_), local: Vec::new() }`. Full workspace test: 423 passing (same as before 5.5). ## Expected behavior on the reporter's setup Two phones behind MikroTik, both on the same LAN: place_call:host_candidates {"local_addrs": ["192.168.88.21:XXX", "2001:...:YY:XXX"]} recv:DirectCallAnswer {"callee_local_addrs": ["192.168.88.22:ZZZ", "2001:...:WW:ZZZ"]} recv:CallSetup {"peer_direct_addr":"150.228.49.65:NN", "peer_local_addrs":["192.168.88.22:ZZZ","2001:...:WW:ZZZ"]} connect:dual_path_race_start {"peer_reflex":"...","peer_local":[...]} dual_path: direct dial succeeded on candidate 0 ← LAN v4 wins connect:dual_path_race_won {"path":"Direct"} Co-Authored-By: Claude Opus 4.6 (1M context) --- Cargo.lock | 11 + crates/wzp-client/Cargo.toml | 6 + crates/wzp-client/src/cli.rs | 4 +- crates/wzp-client/src/dual_path.rs | 192 ++++++++++++++---- crates/wzp-client/src/reflect.rs | 82 ++++++++ crates/wzp-client/tests/dual_path.rs | 17 +- crates/wzp-proto/src/packet.rs | 31 +++ crates/wzp-relay/src/call_registry.rs | 30 +++ crates/wzp-relay/src/main.rs | 57 ++++-- .../tests/cross_relay_direct_call.rs | 4 + crates/wzp-relay/tests/hole_punching.rs | 4 + desktop/src-tauri/src/lib.rs | 106 +++++++--- desktop/src/main.ts | 1 + 13 files changed, 463 insertions(+), 82 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 65de776..a296fbc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2723,6 +2723,16 @@ dependencies = [ "icu_properties", ] +[[package]] +name = "if-addrs" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69b2eeee38fef3aa9b4cc5f1beea8a2444fc00e7377cafae396de3f5c2065e24" +dependencies = [ + "libc", + "windows-sys 0.59.0", +] + [[package]] name = "indexmap" version = "1.9.3" @@ -7646,6 +7656,7 @@ dependencies = [ "chrono", "coreaudio-rs", "cpal", + "if-addrs", "libc", "rustls", "serde", diff --git a/crates/wzp-client/Cargo.toml b/crates/wzp-client/Cargo.toml index 2911ae9..95fee81 100644 --- a/crates/wzp-client/Cargo.toml +++ b/crates/wzp-client/Cargo.toml @@ -24,6 +24,12 @@ chrono = "0.4" rustls = { version = "0.23", default-features = false, features = ["ring", "std"] } cpal = { version = "0.15", optional = true } libc = "0.2" +# Phase 5.5 — LAN host-candidate ICE: enumerate local network +# interface addresses for inclusion in DirectCallOffer/Answer so +# peers on the same LAN can direct-connect without NAT hairpinning +# through the WAN reflex addr (which many consumer NATs, including +# MikroTik's default masquerade, don't support). +if-addrs = "0.13" # coreaudio-rs is Apple-framework-only; gate it to macOS so enabling # the `vpio` feature from a non-macOS target builds cleanly instead of diff --git a/crates/wzp-client/src/cli.rs b/crates/wzp-client/src/cli.rs index c9140ad..bb4e61f 100644 --- a/crates/wzp-client/src/cli.rs +++ b/crates/wzp-client/src/cli.rs @@ -773,6 +773,7 @@ async fn run_signal_mode( // CLI client doesn't attempt hole-punching; always // relay-path. caller_reflexive_addr: None, + caller_local_addrs: Vec::new(), }).await?; } @@ -805,12 +806,13 @@ async fn run_signal_mode( // CLI auto-accept uses generic (privacy) mode, // so callee addr stays hidden from the caller. callee_reflexive_addr: None, + callee_local_addrs: Vec::new(), }).await; } SignalMessage::DirectCallAnswer { call_id, accept_mode, .. } => { info!(call_id = %call_id, mode = ?accept_mode, "call answered"); } - SignalMessage::CallSetup { call_id, room, relay_addr: setup_relay, peer_direct_addr: _ } => { + SignalMessage::CallSetup { call_id, room, relay_addr: setup_relay, peer_direct_addr: _, peer_local_addrs: _ } => { info!(call_id = %call_id, room = %room, relay = %setup_relay, "call setup — connecting to media room"); // Connect to the media room diff --git a/crates/wzp-client/src/dual_path.rs b/crates/wzp-client/src/dual_path.rs index d9355e9..102d4c8 100644 --- a/crates/wzp-client/src/dual_path.rs +++ b/crates/wzp-client/src/dual_path.rs @@ -52,28 +52,66 @@ pub enum WinningPath { /// genuinely fail (network partition). Returns /// `Err(anyhow::anyhow!(...))` if both paths fail within the /// timeout. +/// Phase 5.5 candidate bundle — full ICE-ish candidate list for +/// the peer. The race tries them all in parallel alongside the +/// relay path. At minimum this should contain the peer's +/// server-reflexive address; `local_addrs` carries LAN host +/// candidates gathered from their physical interfaces. +/// +/// Empty is valid: the D-role has nothing to dial and the race +/// reduces to "relay only" + (if A-role) accepting on the +/// shared endpoint. +#[derive(Debug, Clone, Default)] +pub struct PeerCandidates { + /// Peer's server-reflexive address (Phase 3). `None` if the + /// peer didn't advertise one. + pub reflexive: Option, + /// Peer's LAN host addresses (Phase 5.5). Tried first on + /// same-LAN pairs — direct dials to these bypass the NAT + /// entirely. + pub local: Vec, +} + +impl PeerCandidates { + /// Flatten into the list of addrs the D-role should dial. + /// Order: LAN host candidates first (fastest when they + /// work), then reflexive (covers the non-LAN case). + pub fn dial_order(&self) -> Vec { + let mut out = Vec::with_capacity(self.local.len() + 1); + out.extend(self.local.iter().copied()); + if let Some(a) = self.reflexive { + // Only add if it's not already in the list (some + // edge cases on same-LAN could have the same addr + // in both). + if !out.contains(&a) { + out.push(a); + } + } + out + } + + /// Is there anything for the D-role to dial? If not, the + /// race reduces to relay-only. + pub fn is_empty(&self) -> bool { + self.reflexive.is_none() && self.local.is_empty() + } +} + #[allow(clippy::too_many_arguments)] pub async fn race( role: Role, - peer_direct_addr: SocketAddr, + peer_candidates: PeerCandidates, relay_addr: SocketAddr, room_sni: String, call_sni: String, // Phase 5: when `Some`, reuse this endpoint for BOTH the - // direct-path branch AND the relay dial. This is critical - // for hole-punching through port-preserving NATs — the - // advertised reflex addr only matches what peers can dial if - // the listening socket is the SAME one that registered with - // the relay. Pass the signal endpoint here. + // direct-path branch AND the relay dial. Pass the signal + // endpoint. The endpoint MUST be server-capable (created + // with a server config) for the A-role accept branch to + // work. // - // The endpoint MUST have been created with a server config - // (`create_endpoint(bind, Some(server_config()))`) if the - // A-role branch is going to run, otherwise `accept()` will - // return None immediately. - // - // When `None`, falls back to the pre-Phase-5 behavior of - // creating fresh endpoints per role. Used by tests and by - // paths where we're not registered to a relay. + // When `None`, falls back to fresh endpoints per role. + // Used by tests. shared_endpoint: Option, ) -> anyhow::Result<(Arc, WinningPath)> { // Rustls provider must be installed before any quinn endpoint @@ -81,9 +119,22 @@ pub async fn race( let _ = rustls::crypto::ring::default_provider().install_default(); // Build the direct-path endpoint + future based on role. - // Each future returns an already-wrapped `QuinnTransport` so we - // don't need a direct `quinn::Connection` type in scope here - // (this crate doesn't depend on quinn directly). + // + // A-role: one accept future on the shared endpoint. The + // first incoming QUIC connection wins — we don't care + // which peer candidate the dialer used to reach us. + // + // D-role: N parallel dial futures, one per peer candidate + // (all LAN host addrs + the reflex addr), consolidated + // into a single direct_fut via FuturesUnordered-style + // "first OK wins" semantics. The first successful dial + // becomes the direct path; the losers are dropped (quinn + // will abort the in-flight handshakes via the dropped + // Connecting futures). + // + // Either way, direct_fut resolves to a single QuinnTransport + // (or an error) and is raced against the relay_fut by the + // outer tokio::select!. let direct_ep: wzp_transport::Endpoint; let direct_fut: std::pin::Pin< Box> + Send>, @@ -113,15 +164,12 @@ pub async fn race( let ep_for_fut = ep.clone(); direct_fut = Box::pin(async move { // `wzp_transport::accept` wraps the same - // `endpoint.accept().await?.await?` dance we want - // and maps errors into TransportError for us. - // + // `endpoint.accept().await?.await?` dance we want. // If `ep_for_fut` is the shared signal endpoint, - // this accept pulls the NEXT incoming connection - // — normally that's the peer's direct-P2P dial. - // Signal recv is done via the existing signal - // CONNECTION (accept_bi), not the endpoint, so - // there's no conflict. + // this pulls the NEXT incoming connection — + // normally that's the peer's direct-P2P dial. + // Signal recv is done via the signal CONNECTION + // (accept_bi), not the endpoint, so no conflict. let conn = wzp_transport::accept(&ep_for_fut) .await .map_err(|e| anyhow::anyhow!("direct accept: {e}"))?; @@ -134,8 +182,8 @@ pub async fn race( Some(ep) => { tracing::info!( local_addr = ?ep.local_addr().ok(), - %peer_direct_addr, - "dual_path: D-role reusing shared endpoint to dial peer" + candidates = ?peer_candidates.dial_order(), + "dual_path: D-role reusing shared endpoint to dial peer candidates" ); ep } @@ -144,21 +192,86 @@ pub async fn race( let fresh = wzp_transport::create_endpoint(bind, None)?; tracing::info!( local_addr = ?fresh.local_addr().ok(), - %peer_direct_addr, - "dual_path: D-role fresh endpoint up, dialing peer" + candidates = ?peer_candidates.dial_order(), + "dual_path: D-role fresh endpoint up, dialing peer candidates" ); fresh } }; let ep_for_fut = ep.clone(); - let client_cfg = wzp_transport::client_config(); + let dial_order = peer_candidates.dial_order(); let sni = call_sni.clone(); direct_fut = Box::pin(async move { - let conn = - wzp_transport::connect(&ep_for_fut, peer_direct_addr, &sni, client_cfg) - .await - .map_err(|e| anyhow::anyhow!("direct dial: {e}"))?; - Ok(QuinnTransport::new(conn)) + if dial_order.is_empty() { + // No candidates — the race reduces to + // relay-only. Surface a stable error so the + // outer select falls through to relay_fut + // without a spurious "direct failed" warning. + // Use a pending future that never resolves so + // the select's "other side wins" branch is + // the natural outcome. + std::future::pending::>().await + } else { + // Fan out N parallel dials via JoinSet. First + // `Ok` wins; `Err` from a single candidate is + // not fatal — we wait for the others. Only + // when ALL have failed do we return Err. + let mut set = tokio::task::JoinSet::new(); + for (idx, candidate) in dial_order.iter().enumerate() { + let ep = ep_for_fut.clone(); + let client_cfg = wzp_transport::client_config(); + let sni = sni.clone(); + let candidate = *candidate; + set.spawn(async move { + let result = wzp_transport::connect( + &ep, + candidate, + &sni, + client_cfg, + ) + .await; + (idx, candidate, result) + }); + } + let mut last_err: Option = None; + while let Some(join_res) = set.join_next().await { + let (idx, candidate, dial_res) = match join_res { + Ok(t) => t, + Err(e) => { + last_err = Some(format!("join {e}")); + continue; + } + }; + match dial_res { + Ok(conn) => { + tracing::info!( + %candidate, + candidate_idx = idx, + "dual_path: direct dial succeeded on candidate" + ); + // Abort the remaining in-flight + // dials so they don't complete + // and leak QUIC sessions. + set.abort_all(); + return Ok(QuinnTransport::new(conn)); + } + Err(e) => { + tracing::debug!( + %candidate, + candidate_idx = idx, + error = %e, + "dual_path: direct dial failed, trying others" + ); + last_err = Some(format!("candidate {candidate}: {e}")); + } + } + } + Err(anyhow::anyhow!( + "all {} direct candidates failed; last: {}", + dial_order.len(), + last_err.unwrap_or_else(|| "n/a".into()) + )) + } }); direct_ep = ep; } @@ -193,7 +306,12 @@ pub async fn race( // below need to await the OPPOSITE future after the winning // branch fires. Without pinning, tokio::select! moves the // future out and we can't touch it again. - tracing::info!(?role, %peer_direct_addr, %relay_addr, "dual_path: racing direct vs relay"); + tracing::info!( + ?role, + candidates = ?peer_candidates.dial_order(), + %relay_addr, + "dual_path: racing direct vs relay" + ); let direct_timed = tokio::time::timeout(Duration::from_secs(2), direct_fut); tokio::pin!(direct_timed, relay_fut); @@ -202,7 +320,7 @@ pub async fn race( direct_result = &mut direct_timed => { match direct_result { Ok(Ok(transport)) => { - tracing::info!(%peer_direct_addr, "dual_path: direct WON"); + tracing::info!("dual_path: direct WON"); Ok((Arc::new(transport), WinningPath::Direct)) } Ok(Err(e)) => { diff --git a/crates/wzp-client/src/reflect.rs b/crates/wzp-client/src/reflect.rs index f9083e1..c22a8c7 100644 --- a/crates/wzp-client/src/reflect.rs +++ b/crates/wzp-client/src/reflect.rs @@ -262,6 +262,88 @@ pub async fn detect_nat_type( } } +/// Enumerate LAN-local host candidates this client is reachable +/// on, paired with the given port (typically the signal +/// endpoint's bound port so that incoming dials land on the same +/// socket the advertised reflex addr points to). +/// +/// Gathers BOTH IPv4 and IPv6 candidates: +/// +/// - **IPv4**: RFC1918 private ranges (10/8, 172.16/12, 192.168/16) +/// and CGNAT shared-transition (100.64/10). Public IPv4 is +/// skipped because the reflex-addr path already covers it. +/// Loopback and link-local (169.254/16) are skipped. +/// +/// - **IPv6**: ALL global-unicast addresses (2000::/3 — the real +/// routable IPv6 space) AND unique-local (fc00::/7). These +/// are directly dialable from a peer on the same LAN, and on +/// true dual-stack LANs (which most consumer ISPs now provide, +/// including Starlink) IPv6 often gives a direct path even +/// when IPv4 can't hairpin. Loopback (::1), unspecified (::), +/// and link-local (fe80::/10) are skipped — link-local would +/// require a scope ID to be useful and is basically never +/// reachable across interface boundaries. +/// +/// The port must come from the caller — typically +/// `signal_endpoint.local_addr()?.port()`, so that the peer's +/// dials to these addresses land on the same socket that's +/// already listening (Phase 5 shared-endpoint architecture). +/// +/// Safe to call from any thread; no I/O, no async. The `if-addrs` +/// crate reads the kernel's interface table via a single +/// getifaddrs(3) syscall. +pub fn local_host_candidates(port: u16) -> Vec { + let Ok(ifaces) = if_addrs::get_if_addrs() else { + return Vec::new(); + }; + let mut out = Vec::new(); + for iface in ifaces { + if iface.is_loopback() { + continue; + } + match iface.ip() { + std::net::IpAddr::V4(v4) => { + if v4.is_link_local() { + continue; + } + // Keep RFC1918 private ranges and CGNAT — those + // are the LAN-dialable addrs we actually want. + // Skip public v4 because the reflex addr already + // covers that path. + if v4.is_private() { + out.push(SocketAddr::new(std::net::IpAddr::V4(v4), port)); + } else if v4.octets()[0] == 100 && (v4.octets()[1] & 0xc0) == 0x40 { + // 100.64/10 CGNAT — rare but valid if two + // phones are on the same CGNAT-hairpinned + // carrier LAN (some hotspot setups). + out.push(SocketAddr::new(std::net::IpAddr::V4(v4), port)); + } + } + std::net::IpAddr::V6(v6) => { + if v6.is_loopback() || v6.is_unspecified() { + continue; + } + // Link-local (fe80::/10) — skip because it needs + // a zone/scope ID to be usable and that scope is + // meaningless to the peer. + let first = v6.segments()[0]; + if (first & 0xffc0) == 0xfe80 { + continue; + } + // Include everything else: ULA (fc00::/7, high + // bits 0xfc00/0xfd00) and global unicast + // (2000::/3, first segment 0x2000-0x3fff). Both + // are directly dialable from a peer on the same + // dual-stack LAN, and on Starlink / most modern + // ISPs the IPv6 path usually has no CGNAT and + // works even when the v4 path doesn't hairpin. + out.push(SocketAddr::new(std::net::IpAddr::V6(v6), port)); + } + } + } + out +} + /// Role assignment for the Phase 3.5 dual-path QUIC race. /// /// Both peers already know two strings at CallSetup time: their diff --git a/crates/wzp-client/tests/dual_path.rs b/crates/wzp-client/tests/dual_path.rs index 7c66023..b7cc537 100644 --- a/crates/wzp-client/tests/dual_path.rs +++ b/crates/wzp-client/tests/dual_path.rs @@ -19,7 +19,7 @@ use std::net::{Ipv4Addr, SocketAddr}; use std::time::Duration; -use wzp_client::dual_path::{race, WinningPath}; +use wzp_client::dual_path::{race, PeerCandidates, WinningPath}; use wzp_client::reflect::Role; use wzp_transport::{create_endpoint, server_config}; @@ -110,7 +110,10 @@ async fn dual_path_direct_wins_on_loopback() { // should win. let result = race( Role::Dialer, - acceptor_listen_addr, + PeerCandidates { + reflexive: Some(acceptor_listen_addr), + local: Vec::new(), + }, relay_addr, "test-room".into(), "call-test".into(), @@ -148,7 +151,10 @@ async fn dual_path_relay_wins_when_direct_is_dead() { let result = race( Role::Dialer, - dead_peer, + PeerCandidates { + reflexive: Some(dead_peer), + local: Vec::new(), + }, relay_addr, "test-room".into(), "call-test".into(), @@ -182,7 +188,10 @@ async fn dual_path_errors_cleanly_when_both_paths_dead() { let start = std::time::Instant::now(); let result = race( Role::Dialer, - dead_peer, + PeerCandidates { + reflexive: Some(dead_peer), + local: Vec::new(), + }, dead_relay, "test-room".into(), "call-test".into(), diff --git a/crates/wzp-proto/src/packet.rs b/crates/wzp-proto/src/packet.rs index 01cb78f..df50848 100644 --- a/crates/wzp-proto/src/packet.rs +++ b/crates/wzp-proto/src/packet.rs @@ -745,6 +745,16 @@ pub enum SignalMessage { /// `None` means "caller doesn't want P2P, use relay only". #[serde(default, skip_serializing_if = "Option::is_none")] caller_reflexive_addr: Option, + /// Phase 5.5 (ICE host candidates): caller's LAN-local + /// interface addresses paired with its signal endpoint's + /// port. Peers on the same physical LAN can direct-dial + /// these without going through the WAN reflex addr, + /// which is important because most consumer NATs + /// (including MikroTik masquerade) don't support NAT + /// hairpinning — the reflex addr is unreachable from + /// the same LAN. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + caller_local_addrs: Vec, }, /// Callee's response to a direct call. @@ -771,6 +781,13 @@ pub enum SignalMessage { /// carries it opaquely into the caller's `CallSetup`. #[serde(default, skip_serializing_if = "Option::is_none")] callee_reflexive_addr: Option, + /// Phase 5.5 (ICE host candidates): callee's LAN-local + /// interface addresses. Same purpose as + /// `caller_local_addrs` in `DirectCallOffer`. Only + /// populated on `AcceptTrusted` alongside + /// `callee_reflexive_addr`. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + callee_local_addrs: Vec, }, /// Relay tells both parties: media room is ready. @@ -791,6 +808,14 @@ pub enum SignalMessage { /// wasn't viable. #[serde(default, skip_serializing_if = "Option::is_none")] peer_direct_addr: Option, + /// Phase 5.5 (ICE host candidates): the OTHER party's LAN + /// host addresses (RFC1918 IPv4 + CGNAT + non-link-local + /// IPv6). On same-LAN calls these are directly dialable + /// and bypass the NAT-hairpinning problem that blocks + /// same-LAN peers from using `peer_direct_addr`. + /// Client-side race tries all of these in parallel. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + peer_local_addrs: Vec, }, /// Ringing notification (relay → caller, callee received the offer). @@ -1034,6 +1059,7 @@ mod tests { signature: vec![3u8; 64], supported_profiles: vec![], caller_reflexive_addr: Some("192.0.2.1:4433".into()), + caller_local_addrs: Vec::new(), }; let forward = SignalMessage::FederatedSignalForward { inner: Box::new(inner), @@ -1075,6 +1101,7 @@ mod tests { signature: None, chosen_profile: None, callee_reflexive_addr: Some("198.51.100.9:4433".into()), + callee_local_addrs: Vec::new(), }, SignalMessage::CallRinging { call_id: "c1".into() }, SignalMessage::Hangup { reason: HangupReason::Normal }, @@ -1109,6 +1136,7 @@ mod tests { signature: vec![], supported_profiles: vec![], caller_reflexive_addr: Some("192.0.2.1:4433".into()), + caller_local_addrs: Vec::new(), }; let json = serde_json::to_string(&offer).unwrap(); assert!( @@ -1136,6 +1164,7 @@ mod tests { signature: vec![], supported_profiles: vec![], caller_reflexive_addr: None, + caller_local_addrs: Vec::new(), }; let json_none = serde_json::to_string(&offer_none).unwrap(); assert!( @@ -1152,6 +1181,7 @@ mod tests { signature: None, chosen_profile: None, callee_reflexive_addr: Some("198.51.100.9:4433".into()), + callee_local_addrs: Vec::new(), }; let decoded: SignalMessage = serde_json::from_str(&serde_json::to_string(&answer).unwrap()).unwrap(); @@ -1171,6 +1201,7 @@ mod tests { room: "call-c1".into(), relay_addr: "203.0.113.5:4433".into(), peer_direct_addr: Some("192.0.2.1:4433".into()), + peer_local_addrs: Vec::new(), }; let decoded: SignalMessage = serde_json::from_str(&serde_json::to_string(&setup).unwrap()).unwrap(); diff --git a/crates/wzp-relay/src/call_registry.rs b/crates/wzp-relay/src/call_registry.rs index 8439c25..b2ca77d 100644 --- a/crates/wzp-relay/src/call_registry.rs +++ b/crates/wzp-relay/src/call_registry.rs @@ -50,6 +50,17 @@ pub struct DirectCall { /// `DirectCallAnswer` handling uses this to route the reply /// back through the SAME link instead of broadcasting again. pub peer_relay_fp: Option, + /// Phase 5.5 (ICE host candidates): caller's LAN-local + /// interface addresses from the `DirectCallOffer`. Cross- + /// wired into the callee's `CallSetup.peer_local_addrs` so + /// the callee can direct-dial the caller over the same LAN + /// without going through the WAN reflex addr (NAT + /// hairpinning often doesn't work for same-LAN peers). + pub caller_local_addrs: Vec, + /// Phase 5.5 (ICE host candidates): callee's LAN-local + /// interface addresses from the `DirectCallAnswer`. Cross- + /// wired into the caller's `CallSetup.peer_local_addrs`. + pub callee_local_addrs: Vec, } /// Registry of active direct calls. @@ -79,11 +90,30 @@ impl CallRegistry { caller_reflexive_addr: None, callee_reflexive_addr: None, peer_relay_fp: None, + caller_local_addrs: Vec::new(), + callee_local_addrs: Vec::new(), }; self.calls.insert(call_id.clone(), call); self.calls.get(&call_id).unwrap() } + /// Phase 5.5: stash the caller's LAN host candidates from + /// the `DirectCallOffer`. Empty Vec is a valid value meaning + /// "caller has no LAN candidates" (e.g. old client). + pub fn set_caller_local_addrs(&mut self, call_id: &str, addrs: Vec) { + if let Some(call) = self.calls.get_mut(call_id) { + call.caller_local_addrs = addrs; + } + } + + /// Phase 5.5: stash the callee's LAN host candidates from + /// the `DirectCallAnswer`. + pub fn set_callee_local_addrs(&mut self, call_id: &str, addrs: Vec) { + if let Some(call) = self.calls.get_mut(call_id) { + call.callee_local_addrs = addrs; + } + } + /// Phase 4: stash the federation TLS fingerprint of the peer /// relay that originated (or will receive) the cross-relay /// forward for this call. Safe to call with `None` to clear diff --git a/crates/wzp-relay/src/main.rs b/crates/wzp-relay/src/main.rs index f0b64da..0899fd8 100644 --- a/crates/wzp-relay/src/main.rs +++ b/crates/wzp-relay/src/main.rs @@ -543,6 +543,7 @@ async fn main() -> anyhow::Result<()> { ref caller_fingerprint, ref call_id, ref caller_reflexive_addr, + ref caller_local_addrs, .. } => { // Is the target on THIS relay? If not, drop — @@ -561,7 +562,8 @@ async fn main() -> anyhow::Result<()> { } // Stash in local registry so the answer path // can find the call + route the reply back - // through the same federation link. + // through the same federation link. Include + // Phase 5.5 LAN host candidates too. { let mut reg = call_registry_d.lock().await; reg.create_call( @@ -570,6 +572,7 @@ async fn main() -> anyhow::Result<()> { target_fingerprint.clone(), ); reg.set_caller_reflexive_addr(call_id, caller_reflexive_addr.clone()); + reg.set_caller_local_addrs(call_id, caller_local_addrs.clone()); reg.set_peer_relay_fp(call_id, Some(origin_relay_fp.clone())); } // Deliver the offer to the local target. @@ -587,6 +590,7 @@ async fn main() -> anyhow::Result<()> { ref call_id, accept_mode, ref callee_reflexive_addr, + ref callee_local_addrs, .. } => { // Look up the local caller fp from the registry. @@ -616,24 +620,26 @@ async fn main() -> anyhow::Result<()> { continue; } - // Accept — stash the callee's reflex addr + mark - // the call active, then read back BOTH addrs so - // we can cross-wire peer_direct_addr in CallSetup. + // Accept — stash the callee's reflex addr + LAN + // host candidates + mark the call active, + // then read back everything needed to cross- + // wire peer_direct_addr + peer_local_addrs in + // the local CallSetup. let room_name = format!("call-{call_id}"); - let (caller_addr, callee_addr_for_setup) = { + let (callee_addr_for_setup, callee_local_for_setup) = { let mut reg = call_registry_d.lock().await; reg.set_active(call_id, accept_mode, room_name.clone()); reg.set_callee_reflexive_addr( call_id, callee_reflexive_addr.clone(), ); + reg.set_callee_local_addrs(call_id, callee_local_addrs.clone()); let c = reg.get(call_id); ( - c.and_then(|c| c.caller_reflexive_addr.clone()), c.and_then(|c| c.callee_reflexive_addr.clone()), + c.map(|c| c.callee_local_addrs.clone()).unwrap_or_default(), ) }; - let _ = caller_addr; // unused on the caller side; callee holds the relevant addr // Forward the raw answer to the local caller so // the JS side sees DirectCallAnswer (fires any @@ -649,12 +655,14 @@ async fn main() -> anyhow::Result<()> { // (single-relay fallback — Phase 4.1 will wire // federated media so that actually reaches the // peer). peer_direct_addr = the callee's reflex - // addr carried in the answer. + // addr carried in the answer. peer_local_addrs + // = callee's LAN host candidates (Phase 5.5 ICE). let setup = SignalMessage::CallSetup { call_id: call_id.clone(), room: room_name.clone(), relay_addr: advertised_addr_d.clone(), peer_direct_addr: callee_addr_for_setup, + peer_local_addrs: callee_local_for_setup, }; let hub = signal_hub_d.lock().await; let _ = hub.send_to(&caller_fp, &setup).await; @@ -984,11 +992,13 @@ async fn main() -> anyhow::Result<()> { ref target_fingerprint, ref call_id, ref caller_reflexive_addr, + ref caller_local_addrs, .. } => { let target_fp = target_fingerprint.clone(); let call_id = call_id.clone(); let caller_addr_for_registry = caller_reflexive_addr.clone(); + let caller_local_for_registry = caller_local_addrs.clone(); // Check if target is online let online = { @@ -1035,7 +1045,8 @@ async fn main() -> anyhow::Result<()> { } // Create call in registry with the - // caller's reflex addr + mark it as + // caller's reflex addr + LAN host + // candidates, and mark it as // cross-relay so the answer path knows // to route the CallSetup's // peer_direct_addr from what the @@ -1053,7 +1064,11 @@ async fn main() -> anyhow::Result<()> { ); reg.set_caller_reflexive_addr( &call_id, - caller_addr_for_registry, + caller_addr_for_registry.clone(), + ); + reg.set_caller_local_addrs( + &call_id, + caller_local_for_registry.clone(), ); } @@ -1067,14 +1082,15 @@ async fn main() -> anyhow::Result<()> { } // Create call in registry + stash the caller's - // reflex addr (Phase 3 hole-punching). The relay - // treats the addr as opaque — no validation. - // Injected later into the callee's CallSetup as - // peer_direct_addr. + // reflex addr (Phase 3 hole-punching) AND its + // LAN host candidates (Phase 5.5 ICE). The + // relay treats both as opaque. Both are + // injected later into the callee's CallSetup. { let mut reg = call_registry.lock().await; reg.create_call(call_id.clone(), client_fp.clone(), target_fp.clone()); reg.set_caller_reflexive_addr(&call_id, caller_addr_for_registry); + reg.set_caller_local_addrs(&call_id, caller_local_for_registry); } // Forward offer to callee @@ -1095,11 +1111,13 @@ async fn main() -> anyhow::Result<()> { ref call_id, ref accept_mode, ref callee_reflexive_addr, + ref callee_local_addrs, .. } => { let call_id = call_id.clone(); let mode = *accept_mode; let callee_addr_for_registry = callee_reflexive_addr.clone(); + let callee_local_for_registry = callee_local_addrs.clone(); // Phase 4: look up peer fingerprint AND // peer_relay_fp in one lock acquisition. @@ -1160,14 +1178,17 @@ async fn main() -> anyhow::Result<()> { // BOTH parties' addrs so we can cross-wire // peer_direct_addr on the CallSetups below. let room = format!("call-{call_id}"); - let (caller_addr, callee_addr) = { + let (caller_addr, callee_addr, caller_local, callee_local) = { let mut reg = call_registry.lock().await; reg.set_active(&call_id, mode, room.clone()); reg.set_callee_reflexive_addr(&call_id, callee_addr_for_registry); + reg.set_callee_local_addrs(&call_id, callee_local_for_registry.clone()); let call = reg.get(&call_id); ( call.and_then(|c| c.caller_reflexive_addr.clone()), call.and_then(|c| c.callee_reflexive_addr.clone()), + call.map(|c| c.caller_local_addrs.clone()).unwrap_or_default(), + call.map(|c| c.callee_local_addrs.clone()).unwrap_or_default(), ) }; info!( @@ -1215,6 +1236,7 @@ async fn main() -> anyhow::Result<()> { room: room.clone(), relay_addr: relay_addr_for_setup, peer_direct_addr: caller_addr.clone(), + peer_local_addrs: caller_local.clone(), }; let hub = signal_hub.lock().await; let _ = hub.send_to(&client_fp, &setup_for_callee).await; @@ -1227,18 +1249,21 @@ async fn main() -> anyhow::Result<()> { } // Send CallSetup to BOTH parties with - // cross-wired peer_direct_addr. + // cross-wired peer_direct_addr + + // peer_local_addrs (Phase 5.5 ICE). let setup_for_caller = SignalMessage::CallSetup { call_id: call_id.clone(), room: room.clone(), relay_addr: relay_addr_for_setup.clone(), peer_direct_addr: callee_addr.clone(), + peer_local_addrs: callee_local.clone(), }; let setup_for_callee = SignalMessage::CallSetup { call_id: call_id.clone(), room: room.clone(), relay_addr: relay_addr_for_setup, peer_direct_addr: caller_addr.clone(), + peer_local_addrs: caller_local.clone(), }; let hub = signal_hub.lock().await; let _ = hub.send_to(&peer_fp, &setup_for_caller).await; diff --git a/crates/wzp-relay/tests/cross_relay_direct_call.rs b/crates/wzp-relay/tests/cross_relay_direct_call.rs index cf3ecd4..3d0425a 100644 --- a/crates/wzp-relay/tests/cross_relay_direct_call.rs +++ b/crates/wzp-relay/tests/cross_relay_direct_call.rs @@ -51,6 +51,7 @@ fn alice_offer(call_id: &str) -> SignalMessage { signature: vec![], supported_profiles: vec![], caller_reflexive_addr: Some(ALICE_ADDR.into()), + caller_local_addrs: Vec::new(), } } @@ -130,6 +131,7 @@ fn bob_answer(call_id: &str) -> SignalMessage { signature: None, chosen_profile: None, callee_reflexive_addr: Some(BOB_ADDR.into()), + callee_local_addrs: Vec::new(), } } @@ -173,6 +175,7 @@ fn relay_b_handle_local_answer( room: format!("call-{call_id}"), relay_addr: RELAY_B_ADDR.into(), peer_direct_addr: caller_addr, + peer_local_addrs: Vec::new(), }; let _ = callee_addr; (forward, setup_for_bob) @@ -213,6 +216,7 @@ fn relay_a_handle_forwarded_answer( room: format!("call-{call_id}"), relay_addr: RELAY_A_ADDR.into(), peer_direct_addr: callee_reflexive_addr, + peer_local_addrs: Vec::new(), } } diff --git a/crates/wzp-relay/tests/hole_punching.rs b/crates/wzp-relay/tests/hole_punching.rs index 58ed6f4..c3b26a4 100644 --- a/crates/wzp-relay/tests/hole_punching.rs +++ b/crates/wzp-relay/tests/hole_punching.rs @@ -81,12 +81,14 @@ fn handle_answer_and_build_setups( room: room.clone(), relay_addr: "203.0.113.5:4433".into(), peer_direct_addr: callee_addr, + peer_local_addrs: Vec::new(), }; let setup_for_callee = SignalMessage::CallSetup { call_id, room, relay_addr: "203.0.113.5:4433".into(), peer_direct_addr: caller_addr, + peer_local_addrs: Vec::new(), }; (setup_for_caller, setup_for_callee) } @@ -102,6 +104,7 @@ fn mk_offer(call_id: &str, caller_reflexive_addr: Option<&str>) -> SignalMessage signature: vec![], supported_profiles: vec![], caller_reflexive_addr: caller_reflexive_addr.map(String::from), + caller_local_addrs: Vec::new(), } } @@ -118,6 +121,7 @@ fn mk_answer( signature: None, chosen_profile: None, callee_reflexive_addr: callee_reflexive_addr.map(String::from), + callee_local_addrs: Vec::new(), } } diff --git a/desktop/src-tauri/src/lib.rs b/desktop/src-tauri/src/lib.rs index 1677e9f..318a4ce 100644 --- a/desktop/src-tauri/src/lib.rs +++ b/desktop/src-tauri/src/lib.rs @@ -323,21 +323,18 @@ async fn connect( alias: String, os_aec: bool, quality: String, - // Phase 3 hole-punching: peer's server-reflexive address as - // cross-wired by the relay in CallSetup.peer_direct_addr. JS - // passes it through when present. Currently LOGGED for - // observability but not yet used to race a direct QUIC - // handshake — that's the Phase 3.5 follow-up. Passing it - // through now so real-hardware testing can confirm the - // advertising layer is delivering the addrs end to end, and so - // the JS → Rust wire is stable before we add the race logic. - #[allow(non_snake_case)] + // Phase 3 hole-punching: peer's server-reflexive address + // cross-wired by the relay in CallSetup.peer_direct_addr. peer_direct_addr: Option, + // Phase 5.5: peer's LAN host candidates from CallSetup. + // JS side passes [] when empty. + peer_local_addrs: Vec, ) -> Result { emit_call_debug(&app, "connect:start", serde_json::json!({ "relay": relay, "room": room, "peer_direct_addr": peer_direct_addr, + "peer_local_addrs": peer_local_addrs, })); let mut engine_lock = state.engine.lock().await; if engine_lock.is_some() { @@ -373,12 +370,26 @@ async fn connect( peer_direct_addr.as_deref(), ); + // Phase 5.5: build the full peer candidate bundle (reflex + + // LAN hosts). The dial_order helper will fan them out in + // priority order for the D-role race. + let peer_local_parsed: Vec = peer_local_addrs + .iter() + .filter_map(|s| s.parse().ok()) + .collect(); + let pre_connected_transport: Option> = - match (role, peer_addr_parsed, relay_addr_parsed) { - (Some(r), Some(peer_addr), Some(relay_sockaddr)) => { + match (role, relay_addr_parsed) { + (Some(r), Some(relay_sockaddr)) + if peer_addr_parsed.is_some() || !peer_local_parsed.is_empty() => + { + let candidates = wzp_client::dual_path::PeerCandidates { + reflexive: peer_addr_parsed, + local: peer_local_parsed.clone(), + }; tracing::info!( role = ?r, - %peer_addr, + candidates = ?candidates.dial_order(), %relay, %room, own = ?own_reflex_addr, @@ -386,7 +397,8 @@ async fn connect( ); emit_call_debug(&app, "connect:dual_path_race_start", serde_json::json!({ "role": format!("{:?}", r), - "peer_addr": peer_addr.to_string(), + "peer_reflex": peer_addr_parsed.map(|a| a.to_string()), + "peer_local": peer_local_parsed.iter().map(|a| a.to_string()).collect::>(), "relay_addr": relay_sockaddr.to_string(), "own_reflex_addr": own_reflex_addr, })); @@ -394,11 +406,9 @@ async fn connect( let call_sni = format!("call-{room}"); // Phase 5: pass the signal endpoint so the race // reuses ONE socket for listen + dial + relay. - // The advertised reflex addr then matches the - // actual listening port and peers can reach us. match wzp_client::dual_path::race( r, - peer_addr, + candidates, relay_sockaddr, room_sni, call_sni, @@ -430,7 +440,8 @@ async fn connect( } _ => { tracing::info!( - has_peer = peer_direct_addr.is_some(), + has_peer_reflex = peer_direct_addr.is_some(), + has_peer_local = !peer_local_addrs.is_empty(), has_own = own_reflex_addr.is_some(), ?role, %relay, @@ -438,7 +449,8 @@ async fn connect( "connect: skipping dual-path race (missing inputs), relay-only" ); emit_call_debug(&app, "connect:dual_path_skipped", serde_json::json!({ - "has_peer": peer_direct_addr.is_some(), + "has_peer_reflex": peer_direct_addr.is_some(), + "has_peer_local": !peer_local_addrs.is_empty(), "has_own": own_reflex_addr.is_some(), "role": format!("{:?}", role), })); @@ -878,18 +890,17 @@ fn do_register_signal( "callee_reflexive_addr": callee_reflexive_addr, })); } - Ok(Some(SignalMessage::CallSetup { call_id, room, relay_addr, peer_direct_addr })) => { + Ok(Some(SignalMessage::CallSetup { call_id, room, relay_addr, peer_direct_addr, peer_local_addrs })) => { // Phase 3: peer_direct_addr carries the OTHER party's - // reflex addr when hole-punching is viable. Forwarded - // to JS alongside the relay addr so the connect flow - // can attempt a dual-path race. `null` when either - // side didn't advertise (pre-Phase-3 peer, privacy - // mode callee, or relay policy). + // reflex addr. Phase 5.5: peer_local_addrs carries + // their LAN host candidates (usable for same-LAN + // direct dials that can't hairpin through the NAT). tracing::info!( %call_id, %room, %relay_addr, peer_direct = ?peer_direct_addr, + peer_local = ?peer_local_addrs, "signal: CallSetup — emitting setup event to JS" ); emit_call_debug(&app_clone, "recv:CallSetup", serde_json::json!({ @@ -897,6 +908,7 @@ fn do_register_signal( "room": room, "relay_addr": relay_addr, "peer_direct_addr": peer_direct_addr, + "peer_local_addrs": peer_local_addrs, })); let mut sig = signal_state.lock().await; sig.signal_status = "setup".into(); @@ -908,6 +920,7 @@ fn do_register_signal( "room": room, "relay_addr": relay_addr, "peer_direct_addr": peer_direct_addr, + "peer_local_addrs": peer_local_addrs, }), ); } @@ -1164,6 +1177,26 @@ async fn place_call( emit_call_debug(&app, "place_call:reflect_query_none", serde_json::json!({})); } + // Phase 5.5: gather LAN host candidates using the signal + // endpoint's bound port so incoming dials land on the same + // socket that's already listening. + let caller_local_addrs: Vec = { + let sig = state.signal.lock().await; + sig.endpoint + .as_ref() + .and_then(|ep| ep.local_addr().ok()) + .map(|la| { + wzp_client::reflect::local_host_candidates(la.port()) + .into_iter() + .map(|a| a.to_string()) + .collect() + }) + .unwrap_or_default() + }; + emit_call_debug(&app, "place_call:host_candidates", serde_json::json!({ + "local_addrs": caller_local_addrs, + })); + let sig = state.signal.lock().await; let transport = sig.transport.as_ref().ok_or("not registered")?; let call_id = format!( @@ -1185,6 +1218,7 @@ async fn place_call( signature: vec![], supported_profiles: vec![wzp_proto::QualityProfile::GOOD], caller_reflexive_addr: own_reflex.clone(), + caller_local_addrs: caller_local_addrs.clone(), }) .await .map_err(|e| { @@ -1245,6 +1279,29 @@ async fn answer_call( None }; + // Phase 5.5: gather LAN host candidates (AcceptTrusted only + // for symmetry with the reflex addr — privacy mode keeps + // LAN addrs hidden too). + let callee_local_addrs: Vec = + if accept_mode == wzp_proto::CallAcceptMode::AcceptTrusted { + let sig = state.signal.lock().await; + sig.endpoint + .as_ref() + .and_then(|ep| ep.local_addr().ok()) + .map(|la| { + wzp_client::reflect::local_host_candidates(la.port()) + .into_iter() + .map(|a| a.to_string()) + .collect() + }) + .unwrap_or_default() + } else { + Vec::new() + }; + emit_call_debug(&app, "answer_call:host_candidates", serde_json::json!({ + "local_addrs": callee_local_addrs, + })); + let sig = state.signal.lock().await; let transport = sig.transport.as_ref().ok_or_else(|| { tracing::warn!("answer_call: not registered (no transport)"); @@ -1260,6 +1317,7 @@ async fn answer_call( signature: None, chosen_profile: Some(wzp_proto::QualityProfile::GOOD), callee_reflexive_addr: own_reflex.clone(), + callee_local_addrs: callee_local_addrs.clone(), }) .await .map_err(|e| { diff --git a/desktop/src/main.ts b/desktop/src/main.ts index 3ad3f1f..3062a0c 100644 --- a/desktop/src/main.ts +++ b/desktop/src/main.ts @@ -1488,6 +1488,7 @@ listen("signal-event", (event: any) => { osAec: osAecCheckbox.checked, quality: loadSettings().quality || "auto", peerDirectAddr: data.peer_direct_addr ?? null, + peerLocalAddrs: data.peer_local_addrs ?? [], }); showCallScreen(); } catch (e: any) {