feat: federation rewrite — global rooms router model
Some checks failed
Mirror to GitHub / mirror (push) Failing after 36s
Build Release Binaries / build-amd64 (push) Failing after 1m52s

Major rewrite of relay federation replacing virtual participants with
a clean router model:

1. Global rooms: [[global_rooms]] in TOML config declares rooms that
   are bridged across federation. Each relay is a router + local SFU.

2. Room events: RoomManager emits LocalJoin/LocalLeave via broadcast
   channel when rooms transition between empty and non-empty.

3. GlobalRoomActive/Inactive signals: relays announce when they have
   local participants in global rooms. Peers track active state and
   forward media accordingly. Announcements propagate for multi-hop.

4. Media forwarding: separated from SFU loop. Local participant sends
   via mpsc channel → egress task → forward_to_peers() → room-hash
   tagged datagrams to active peer links. Inbound datagrams delivered
   to local participants + forwarded to other active peers (multi-hop).

5. Loop prevention: don't forward back to source relay.

6. Room name hashing: is_global_room() checks both plain name and
   hash (clients hash room names for SNI privacy).

Removed: ParticipantSender::Federation, federated_participants, virtual
participant join/leave, periodic room polling. Rooms now only contain
local participants.

Signaling tested: 3-relay chain (A→B←C) correctly propagates
GlobalRoomActive through B to both A and C. Media forwarding plumbing
in place but needs final debugging.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Siavash Sameni
2026-04-08 07:54:38 +04:00
parent bc8bb3d790
commit b00db5dfdc
6 changed files with 387 additions and 344 deletions

View File

@@ -1,10 +1,11 @@
//! Relay federation — connects to peer relays and bridges rooms with matching names.
//! Relay federation — global room routing between peer relays.
//!
//! Each federated peer is represented as a virtual participant in shared rooms.
//! Media from local participants is forwarded to the peer via room-tagged datagrams.
//! Media from the peer is received, demuxed by room hash, and forwarded to local participants.
//! Each relay maintains a forwarding table per global room. When a local participant
//! sends media in a global room, it's forwarded to all peer relays that have the room
//! active. Incoming federated media is delivered to local participants and optionally
//! forwarded to other active peers (multi-hop).
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};
use std::net::SocketAddr;
use std::sync::Arc;
use std::time::Duration;
@@ -18,7 +19,7 @@ use wzp_proto::{MediaTransport, SignalMessage};
use wzp_transport::QuinnTransport;
use crate::config::{PeerConfig, TrustedConfig};
use crate::room::{self, ParticipantSender, RoomManager};
use crate::room::{self, FederationMediaOut, RoomEvent, RoomManager};
/// Compute 8-byte room hash for federation datagram tagging.
pub fn room_hash(room_name: &str) -> [u8; 8] {
@@ -28,19 +29,36 @@ pub fn room_hash(room_name: &str) -> [u8; 8] {
out
}
/// Manages federation connections to peer relays.
/// Normalize a fingerprint string (remove colons, lowercase).
fn normalize_fp(fp: &str) -> String {
fp.replace(':', "").to_lowercase()
}
/// Active link to a peer relay.
struct PeerLink {
transport: Arc<QuinnTransport>,
label: String,
/// Global rooms that this peer has reported as active.
active_rooms: HashSet<String>,
}
/// Manages federation connections and global room forwarding.
pub struct FederationManager {
peers: Vec<PeerConfig>,
trusted: Vec<TrustedConfig>,
global_rooms: HashSet<String>,
room_mgr: Arc<Mutex<RoomManager>>,
endpoint: quinn::Endpoint,
local_tls_fp: String,
/// Active peer connections, keyed by normalized fingerprint.
peer_links: Arc<Mutex<HashMap<String, PeerLink>>>,
}
impl FederationManager {
pub fn new(
peers: Vec<PeerConfig>,
trusted: Vec<TrustedConfig>,
global_rooms: HashSet<String>,
room_mgr: Arc<Mutex<RoomManager>>,
endpoint: quinn::Endpoint,
local_tls_fp: String,
@@ -48,19 +66,41 @@ impl FederationManager {
Self {
peers,
trusted,
global_rooms,
room_mgr,
endpoint,
local_tls_fp,
peer_links: Arc::new(Mutex::new(HashMap::new())),
}
}
/// Start federation — spawns one task per configured peer.
/// Check if a room name (which may be hashed) is a global room.
pub fn is_global_room(&self, room: &str) -> bool {
// Check both the raw name and the hashed version
if self.global_rooms.contains(room) {
return true;
}
// The room name in the room manager is the hashed SNI.
// Check if any configured global room hashes to this value.
self.global_rooms.iter().any(|name| {
wzp_crypto::hash_room_name(name) == room
})
}
/// Start federation — spawns connection loops + event dispatcher.
pub async fn run(self: Arc<Self>) {
if self.peers.is_empty() {
if self.peers.is_empty() && self.global_rooms.is_empty() {
return;
}
info!(peers = self.peers.len(), "federation starting");
info!(
peers = self.peers.len(),
global_rooms = self.global_rooms.len(),
"federation starting"
);
let mut handles = Vec::new();
// Per-peer outbound connection loops
for peer in &self.peers {
let this = self.clone();
let peer = peer.clone();
@@ -68,30 +108,58 @@ impl FederationManager {
run_peer_loop(this, peer).await;
}));
}
// Room event dispatcher
let room_events = {
let mgr = self.room_mgr.lock().await;
mgr.subscribe_events()
};
let this = self.clone();
handles.push(tokio::spawn(async move {
run_room_event_dispatcher(this, room_events).await;
}));
for h in handles {
let _ = h.await;
}
}
/// Handle an inbound federation connection from a peer that we recognize.
/// Handle an inbound federation connection from a recognized peer.
pub async fn handle_inbound(
self: &Arc<Self>,
transport: Arc<QuinnTransport>,
peer_config: PeerConfig,
) {
let addr: SocketAddr = peer_config.url.parse().unwrap_or_else(|_| "0.0.0.0:0".parse().unwrap());
info!(peer = ?peer_config.label, %addr, "inbound federation link active");
if let Err(e) = run_federation_link(self.clone(), transport, addr, &peer_config).await {
warn!(peer = ?peer_config.label, "inbound federation link ended: {e}");
let peer_fp = normalize_fp(&peer_config.fingerprint);
let label = peer_config.label.unwrap_or_else(|| peer_config.url.clone());
info!(peer = %label, "inbound federation link active");
if let Err(e) = run_federation_link(self.clone(), transport, peer_fp, label.clone()).await {
warn!(peer = %label, "inbound federation link ended: {e}");
}
}
/// Find a configured peer by TLS fingerprint.
/// Forward locally-generated media to active peers for a global room.
pub async fn forward_to_peers(&self, room_name: &str, room_hash: &[u8; 8], media_data: &Bytes) {
let links = self.peer_links.lock().await;
if links.is_empty() {
return;
}
for link in links.values() {
if link.active_rooms.contains(room_name) {
let mut tagged = Vec::with_capacity(8 + media_data.len());
tagged.extend_from_slice(room_hash);
tagged.extend_from_slice(media_data);
let _ = link.transport.send_raw_datagram(&tagged);
}
}
}
// ── Trust verification (kept from previous implementation) ──
pub fn find_peer_by_fingerprint(&self, fp: &str) -> Option<&PeerConfig> {
self.peers.iter().find(|p| normalize_fp(&p.fingerprint) == normalize_fp(fp))
}
/// Find a configured peer by source IP address.
pub fn find_peer_by_addr(&self, addr: SocketAddr) -> Option<&PeerConfig> {
let addr_ip = addr.ip();
self.peers.iter().find(|p| {
@@ -101,19 +169,14 @@ impl FederationManager {
})
}
/// Find a trusted relay by TLS fingerprint.
pub fn find_trusted_by_fingerprint(&self, fp: &str) -> Option<&TrustedConfig> {
self.trusted.iter().find(|t| normalize_fp(&t.fingerprint) == normalize_fp(fp))
}
/// Check if an inbound federation connection is trusted (by IP match in [[peers]] or fingerprint in [[trusted]]).
/// Returns the label for logging.
pub fn check_inbound_trust(&self, addr: SocketAddr, hello_fp: &str) -> Option<String> {
// Check [[peers]] by IP
if let Some(peer) = self.find_peer_by_addr(addr) {
return Some(peer.label.clone().unwrap_or_else(|| peer.url.clone()));
}
// Check [[trusted]] by fingerprint
if let Some(trusted) = self.find_trusted_by_fingerprint(hello_fp) {
return Some(trusted.label.clone().unwrap_or_else(|| hello_fp[..16].to_string()));
}
@@ -121,11 +184,57 @@ impl FederationManager {
}
}
/// Normalize a fingerprint string (remove colons, lowercase).
fn normalize_fp(fp: &str) -> String {
fp.replace(':', "").to_lowercase()
// ── Outbound media egress task ──
/// Drains the federation media channel and forwards to active peers.
pub async fn run_federation_media_egress(
fm: Arc<FederationManager>,
mut rx: tokio::sync::mpsc::Receiver<FederationMediaOut>,
) {
while let Some(out) = rx.recv().await {
fm.forward_to_peers(&out.room_name, &out.room_hash, &out.data).await;
}
}
// ── Room event dispatcher ──
/// Watches RoomManager events and sends GlobalRoomActive/Inactive to peers.
async fn run_room_event_dispatcher(
fm: Arc<FederationManager>,
mut events: tokio::sync::broadcast::Receiver<RoomEvent>,
) {
loop {
match events.recv().await {
Ok(RoomEvent::LocalJoin { room }) => {
if fm.is_global_room(&room) {
info!(room = %room, "global room now active, announcing to peers");
let msg = SignalMessage::GlobalRoomActive { room };
let links = fm.peer_links.lock().await;
for link in links.values() {
let _ = link.transport.send_signal(&msg).await;
}
}
}
Ok(RoomEvent::LocalLeave { room }) => {
if fm.is_global_room(&room) {
info!(room = %room, "global room now inactive, announcing to peers");
let msg = SignalMessage::GlobalRoomInactive { room };
let links = fm.peer_links.lock().await;
for link in links.values() {
let _ = link.transport.send_signal(&msg).await;
}
}
}
Err(tokio::sync::broadcast::error::RecvError::Lagged(n)) => {
warn!(missed = n, "room event receiver lagged");
}
Err(tokio::sync::broadcast::error::RecvError::Closed) => break,
}
}
}
// ── Peer connection management ──
/// Persistent connection loop for one peer — reconnects with backoff.
async fn run_peer_loop(fm: Arc<FederationManager>, peer: PeerConfig) {
let mut backoff = Duration::from_secs(5);
@@ -133,9 +242,10 @@ async fn run_peer_loop(fm: Arc<FederationManager>, peer: PeerConfig) {
info!(peer_url = %peer.url, label = ?peer.label, "federation: connecting to peer...");
match connect_to_peer(&fm, &peer).await {
Ok(transport) => {
backoff = Duration::from_secs(5); // reset on success
let addr: SocketAddr = peer.url.parse().unwrap_or_else(|_| "0.0.0.0:0".parse().unwrap());
if let Err(e) = run_federation_link(fm.clone(), transport, addr, &peer).await {
backoff = Duration::from_secs(5);
let peer_fp = normalize_fp(&peer.fingerprint);
let label = peer.label.clone().unwrap_or_else(|| peer.url.clone());
if let Err(e) = run_federation_link(fm.clone(), transport, peer_fp, label).await {
warn!(peer_url = %peer.url, "federation link ended: {e}");
}
}
@@ -148,219 +258,201 @@ async fn run_peer_loop(fm: Arc<FederationManager>, peer: PeerConfig) {
}
}
/// Connect to a peer relay.
/// Connect to a peer relay and send hello.
async fn connect_to_peer(fm: &FederationManager, peer: &PeerConfig) -> Result<Arc<QuinnTransport>, anyhow::Error> {
let addr: SocketAddr = peer.url.parse()?;
let client_cfg = wzp_transport::client_config();
let conn = wzp_transport::connect(&fm.endpoint, addr, "_federation", client_cfg).await?;
// TODO: verify peer TLS fingerprint once we have cert access
let transport = Arc::new(QuinnTransport::new(conn));
// Send hello with our TLS fingerprint so the peer can verify us
// Send hello with our TLS fingerprint
let hello = SignalMessage::FederationHello {
tls_fingerprint: fm.local_tls_fp.clone(),
};
transport.send_signal(&hello).await
.map_err(|e| anyhow::anyhow!("federation hello send failed: {e}"))?;
info!(peer_url = %peer.url, label = ?peer.label, "federation: connected to peer (hello sent)");
info!(peer_url = %peer.url, label = ?peer.label, "federation: connected (hello sent)");
Ok(transport)
}
/// Run the federation link: exchange room info and forward media.
// ── Federation link (runs on a single QUIC connection) ──
/// Run the federation link: exchange global room state and forward media.
async fn run_federation_link(
fm: Arc<FederationManager>,
transport: Arc<QuinnTransport>,
peer_addr: SocketAddr,
peer: &PeerConfig,
peer_fp: String,
peer_label: String,
) -> Result<(), anyhow::Error> {
// Announce our active rooms to the peer
let rooms = {
let mgr = fm.room_mgr.lock().await;
mgr.active_rooms()
};
for room_name in &rooms {
let participants = {
let mgr = fm.room_mgr.lock().await;
mgr.local_participants(room_name)
};
let msg = SignalMessage::FederationRoomJoin {
room: room_name.clone(),
participants,
};
transport.send_signal(&msg).await?;
// Register peer link
{
let mut links = fm.peer_links.lock().await;
links.insert(peer_fp.clone(), PeerLink {
transport: transport.clone(),
label: peer_label.clone(),
active_rooms: HashSet::new(),
});
}
// Track virtual participants we create on behalf of this peer
let mut peer_room_participants: HashMap<String, room::ParticipantId> = HashMap::new();
// Map room_hash -> room_name for incoming media demux
let mut hash_to_room: HashMap<[u8; 8], String> = HashMap::new();
// Announce our currently active global rooms
{
let mgr = fm.room_mgr.lock().await;
for room_name in mgr.active_rooms() {
if fm.is_global_room(&room_name) {
let msg = SignalMessage::GlobalRoomActive { room: room_name };
let _ = transport.send_signal(&msg).await;
}
}
}
// Run three tasks: recv signals + recv media + periodic room announcements
// Two concurrent tasks: signal recv + media recv
let signal_transport = transport.clone();
let media_transport = transport.clone();
let announce_transport = transport.clone();
let fm_signal = fm.clone();
let fm_media = fm.clone();
let fm_announce = fm.clone();
let peer_label = peer.label.clone().unwrap_or_else(|| peer.url.clone());
let peer_label2 = peer_label.clone();
let peer_fp_signal = peer_fp.clone();
let peer_fp_media = peer_fp.clone();
let label_signal = peer_label.clone();
let signal_task = async move {
loop {
match signal_transport.recv_signal().await {
Ok(Some(msg)) => {
info!(peer = %peer_label, "federation: received signal {:?}", std::mem::discriminant(&msg));
match msg {
SignalMessage::FederationRoomJoin { room, participants } => {
info!(peer = %peer_label, room = %room, count = participants.len(), "federation: peer room join");
let rh = room_hash(&room);
hash_to_room.insert(rh, room.clone());
let sender = ParticipantSender::Federation {
transport: signal_transport.clone(),
room_hash: rh,
};
let (pid, update, senders) = {
let mut mgr = fm_signal.room_mgr.lock().await;
mgr.join_federated(&room, peer_addr, sender, participants)
};
peer_room_participants.insert(room, pid);
room::broadcast_signal(&senders, &update).await;
}
SignalMessage::FederationRoomLeave { room } => {
info!(peer = %peer_label, room = %room, "federation: peer room leave");
if let Some(pid) = peer_room_participants.remove(&room) {
let result = {
let mut mgr = fm_signal.room_mgr.lock().await;
mgr.leave(&room, pid)
};
if let Some((update, senders)) = result {
room::broadcast_signal(&senders, &update).await;
}
}
hash_to_room.retain(|_, v| v != &room);
}
SignalMessage::FederationParticipantUpdate { room, participants } => {
let result = {
let mut mgr = fm_signal.room_mgr.lock().await;
mgr.update_federated_participants(&room, peer_addr, participants)
};
if let Some((update, senders)) = result {
room::broadcast_signal(&senders, &update).await;
}
}
_ => {} // ignore other signals
}
handle_signal(&fm_signal, &peer_fp_signal, &label_signal, msg).await;
}
Ok(None) => break,
Err(e) => {
error!(peer = %peer_label, "federation signal recv error: {e}");
error!(peer = %label_signal, "federation signal error: {e}");
break;
}
}
}
// Cleanup: remove all virtual participants for this peer
for (room, pid) in &peer_room_participants {
let result = {
let mut mgr = fm_signal.room_mgr.lock().await;
mgr.leave(room, *pid)
};
if let Some((update, senders)) = result {
room::broadcast_signal(&senders, &update).await;
}
}
info!(peer = %peer_label, "federation signal task ended");
};
let media_task = async move {
loop {
match media_transport.connection().read_datagram().await {
Ok(data) => {
if data.len() < 8 + 4 {
continue; // too short (need room_hash + min header)
}
let mut rh = [0u8; 8];
rh.copy_from_slice(&data[..8]);
let media_bytes = &data[8..];
// Deserialize media packet
let pkt = match wzp_proto::MediaPacket::from_bytes(Bytes::copy_from_slice(media_bytes)) {
Some(pkt) => pkt,
None => continue,
};
// Look up room by hash — we need to get the room name from the signal task's hash_to_room
// For simplicity, we forward to all local participants via the room manager
// The virtual participant approach means we don't need the room name here —
// the SFU loop handles it. But since inbound media doesn't go through run_participant,
// we need to manually fan out.
// For now, just use the room manager to find local participants
// This is a simplified approach — full implementation would maintain
// a shared hash_to_room map between signal and media tasks
let mgr = fm_media.room_mgr.lock().await;
for room_name in mgr.active_rooms() {
if room_hash(&room_name) == rh {
// Forward to all local participants in this room
let locals: Vec<_> = mgr.local_senders(&room_name);
drop(mgr); // release lock before sending
for sender in &locals {
if let ParticipantSender::Quic(t) = sender {
let _ = t.send_media(&pkt).await;
}
}
break;
}
}
handle_datagram(&fm_media, &peer_fp_media, data).await;
}
Err(_) => break,
}
}
};
// Periodically announce new local rooms to the peer
let announce_task = async move {
let mut announced: std::collections::HashSet<String> = std::collections::HashSet::new();
loop {
tokio::time::sleep(Duration::from_secs(1)).await;
let rooms = {
let mgr = fm_announce.room_mgr.lock().await;
mgr.active_rooms()
};
for room_name in &rooms {
if !announced.contains(room_name) {
let participants = {
let mgr = fm_announce.room_mgr.lock().await;
mgr.local_participants(room_name)
};
if participants.is_empty() {
continue; // only virtual participants, skip
}
info!(peer = %peer_label2, room = %room_name, local_count = participants.len(), "federation: announcing room to peer");
let msg = SignalMessage::FederationRoomJoin {
room: room_name.clone(),
participants,
};
match announce_transport.send_signal(&msg).await {
Ok(()) => {
info!(peer = %peer_label2, room = %room_name, "federation: room announced successfully");
announced.insert(room_name.clone());
}
Err(e) => {
warn!(peer = %peer_label2, room = %room_name, "federation: announce send failed: {e}");
}
}
}
}
// Remove rooms that no longer exist
announced.retain(|r| rooms.contains(r));
}
};
tokio::select! {
_ = signal_task => {}
_ = media_task => {}
_ = announce_task => {}
}
// Cleanup: remove peer link
{
let mut links = fm.peer_links.lock().await;
links.remove(&peer_fp);
}
info!(peer = %peer_label, "federation link ended");
Ok(())
}
/// Handle an incoming federation signal.
async fn handle_signal(
fm: &Arc<FederationManager>,
peer_fp: &str,
peer_label: &str,
msg: SignalMessage,
) {
match msg {
SignalMessage::GlobalRoomActive { room } => {
if fm.is_global_room(&room) {
info!(peer = %peer_label, room = %room, "peer has global room active");
let mut links = fm.peer_links.lock().await;
if let Some(link) = links.get_mut(peer_fp) {
link.active_rooms.insert(room.clone());
}
// Propagate: tell all OTHER peers this room is routable through us.
// This enables multi-hop: A→B→C where B relays A's announcement to C and vice versa.
for (fp, link) in links.iter() {
if fp != peer_fp {
let _ = link.transport.send_signal(&SignalMessage::GlobalRoomActive { room: room.clone() }).await;
}
}
}
}
SignalMessage::GlobalRoomInactive { room } => {
info!(peer = %peer_label, room = %room, "peer global room now inactive");
let mut links = fm.peer_links.lock().await;
if let Some(link) = links.get_mut(peer_fp) {
link.active_rooms.remove(&room);
}
// Check if any other peer still has this room — if none, propagate inactive
let any_other_active = links.iter()
.any(|(fp, l)| fp != peer_fp && l.active_rooms.contains(&room));
let local_active = {
let mgr = fm.room_mgr.lock().await;
mgr.active_rooms().iter().any(|r| r == &room)
};
if !any_other_active && !local_active {
for (fp, link) in links.iter() {
if fp != peer_fp {
let _ = link.transport.send_signal(&SignalMessage::GlobalRoomInactive { room: room.clone() }).await;
}
}
}
}
_ => {} // ignore other signals
}
}
/// Handle an incoming federation datagram (room-hash-tagged media).
async fn handle_datagram(
fm: &Arc<FederationManager>,
source_peer_fp: &str,
data: Bytes,
) {
if data.len() < 12 { return; } // 8-byte hash + min packet
let mut rh = [0u8; 8];
rh.copy_from_slice(&data[..8]);
let media_bytes = data.slice(8..);
let pkt = match wzp_proto::MediaPacket::from_bytes(media_bytes.clone()) {
Some(pkt) => pkt,
None => return,
};
// Find room by hash
let room_name = {
let mgr = fm.room_mgr.lock().await;
mgr.active_rooms().into_iter().find(|r| room_hash(r) == rh)
};
let room_name = match room_name {
Some(r) => r,
None => return, // room not active locally
};
// Deliver to all local participants
let locals = {
let mgr = fm.room_mgr.lock().await;
mgr.local_senders(&room_name)
};
for sender in &locals {
match sender {
room::ParticipantSender::Quic(t) => { let _ = t.send_media(&pkt).await; }
room::ParticipantSender::WebSocket(_) => { let _ = sender.send_raw(&pkt.payload).await; }
}
}
// Multi-hop: forward to OTHER active peers (not the source)
let links = fm.peer_links.lock().await;
for (fp, link) in links.iter() {
if fp != source_peer_fp && link.active_rooms.contains(&room_name) {
let mut tagged = Vec::with_capacity(8 + media_bytes.len());
tagged.extend_from_slice(&rh);
tagged.extend_from_slice(&media_bytes);
let _ = link.transport.send_raw_datagram(&tagged);
}
}
}