fix(federation): diagnostic logging for cross-relay media routing

Added warn-level log in handle_datagram when a federation
datagram arrives but no matching local room is found. Prints:
- room_hash (8-byte tag from the datagram)
- active_rooms (all rooms the relay currently has)
- seq + peer label

This diagnoses the cross-relay recv_fr=0 issue: if media IS
arriving from the peer relay but the room hash doesn't match any
active room, the log tells us exactly what hash is expected vs
what rooms exist locally. If no datagram log fires at all, the
issue is upstream (peer relay not forwarding, federation link
down, etc.).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Siavash Sameni
2026-04-12 09:27:34 +04:00
parent 0ccf4ed6b5
commit 026940d492

View File

@@ -1077,7 +1077,7 @@ async fn handle_datagram(
// First: check local rooms (has participants)
active.iter().find(|r| room_hash(r) == rh).cloned()
.or_else(|| active.iter().find(|r| fm.global_room_hash(r) == rh).cloned())
// Second: check global room config (hub relay may have no local participants)
// Second: check static global room config (hub relay may have no local participants)
.or_else(|| {
fm.global_rooms.iter().find(|name| room_hash(name) == rh).cloned()
})
@@ -1087,6 +1087,23 @@ async fn handle_datagram(
Some(r) => r,
None => {
fm.event_log.emit(Event::new("room_not_found").seq(pkt.header.seq).peer(&peer_label));
// Phase 4.1 diagnostic: log the hash + active rooms
// so we can diagnose cross-relay call-* media routing
// failures. This fires when a peer relay sends media
// for a room we don't have locally — could be a
// timing issue (peer joined before us) or a hash
// mismatch.
let active = {
let mgr = fm.room_mgr.lock().await;
mgr.active_rooms()
};
warn!(
room_hash = ?rh,
active_rooms = ?active,
seq = pkt.header.seq,
peer = %peer_label,
"federation datagram for unknown room — no local room matches hash"
);
return;
}
};