From da08723fe7fd3c7751538759e9609bcba03ef2b5 Mon Sep 17 00:00:00 2001 From: Siavash Sameni Date: Sat, 11 Apr 2026 18:13:31 +0400 Subject: [PATCH] =?UTF-8?q?fix(signal):=20forward-compat=20=E2=80=94=20log?= =?UTF-8?q?+continue=20on=20unknown=20SignalMessage=20variants?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both sides of the signal channel previously broke their recv loop on any deserialize error, which meant adding a new variant in one build silently killed signal connections from peers running an older build. This bit us during Phase 1 testing: a new client sending SignalMessage::Reflect to a pre-Phase-1 relay caused the relay to drop the whole signal connection, which looked like "Error: not registered" on the next place_call. Fix: - New TransportError::Deserialize(String) variant in wzp-proto carries serde errors as a distinct category. - wzp-transport/reliable.rs::recv_signal returns Deserialize on serde_json::from_slice failures (was wrapped in Internal). - wzp-relay/main.rs signal loop matches on Deserialize → warn + continue (instead of break). - desktop/src-tauri/lib.rs recv loop does the same. Other TransportError variants (ConnectionLost, Io, Internal) still break the loop — only pure parse failures are recoverable. This means future SignalMessage variant additions are backward- compat by construction: older peers will see "unknown variant, continuing" in their logs while newer peers can keep evolving the protocol. Co-Authored-By: Claude Opus 4.6 (1M context) --- crates/wzp-proto/src/error.rs | 9 +++++++++ crates/wzp-relay/src/main.rs | 10 ++++++++++ crates/wzp-transport/src/reliable.rs | 11 +++++++++-- desktop/src-tauri/src/lib.rs | 9 +++++++++ 4 files changed, 37 insertions(+), 2 deletions(-) diff --git a/crates/wzp-proto/src/error.rs b/crates/wzp-proto/src/error.rs index 0b006f0..45dc24d 100644 --- a/crates/wzp-proto/src/error.rs +++ b/crates/wzp-proto/src/error.rs @@ -53,6 +53,15 @@ pub enum TransportError { Timeout { ms: u64 }, #[error("io error: {0}")] Io(#[from] std::io::Error), + /// Parsed wire bytes successfully but the payload didn't + /// deserialize into a known `SignalMessage` variant. Usually + /// means the peer is running a newer build with a variant we + /// don't know yet. Callers should **log and continue** rather + /// than tearing down the connection, so that forward-compat + /// additions to `SignalMessage` don't silently kill old + /// clients/relays. + #[error("signal deserialize: {0}")] + Deserialize(String), #[error("internal transport error: {0}")] Internal(String), } diff --git a/crates/wzp-relay/src/main.rs b/crates/wzp-relay/src/main.rs index d37333b..f0b64da 100644 --- a/crates/wzp-relay/src/main.rs +++ b/crates/wzp-relay/src/main.rs @@ -1307,6 +1307,16 @@ async fn main() -> anyhow::Result<()> { info!(%addr, "signal connection closed"); break; } + Err(wzp_proto::TransportError::Deserialize(e)) => { + // Forward-compat: the peer sent a + // SignalMessage variant we don't know + // (newer client, newer federation peer). + // Log and continue — tearing down the + // connection on unknown variants would + // silently kill interop across minor + // protocol version bumps. + warn!(%addr, "signal deserialize (unknown variant?), continuing: {e}"); + } Err(e) => { warn!(%addr, "signal recv error: {e}"); break; diff --git a/crates/wzp-transport/src/reliable.rs b/crates/wzp-transport/src/reliable.rs index 0b088a7..61691f1 100644 --- a/crates/wzp-transport/src/reliable.rs +++ b/crates/wzp-transport/src/reliable.rs @@ -53,6 +53,13 @@ pub async fn recv_signal(recv: &mut quinn::RecvStream) -> Result { + // Forward-compat: the relay sent us a + // SignalMessage variant we don't know yet + // (older client against a newer relay). + // Log and keep the signal connection alive — + // otherwise direct-call registration would + // silently die on any protocol bump. + tracing::warn!(error = %e, "signal recv: unknown variant, continuing"); + } Err(e) => { tracing::warn!(error = %e, "signal recv error — breaking loop"); break;