feat: Prometheus metrics on relay + web bridge, client JSONL export (T5-S1/S3/S4)
WZP-P2-T5-S1: Relay Prometheus /metrics - RelayMetrics: active_sessions, active_rooms, packets/bytes_forwarded, auth_attempts (ok/fail), handshake_duration histogram - --metrics-port flag spawns HTTP server - Wired into auth, handshake, session, and packet forwarding paths - 2 tests WZP-P2-T5-S3: Web bridge Prometheus /metrics - WebMetrics: active_connections, frames_bridged (up/down), auth_failures, handshake_latency histogram - Added /metrics route to existing axum app - Wired into WS connect/disconnect, auth, handshake, send/recv loops - 2 tests WZP-P2-T5-S4: Client --metrics-file JSONL - ClientMetricsSnapshot with all telemetry fields - MetricsWriter: writes one JSON line per second to file - snapshot_from_stats() converts JitterStats to snapshot - --metrics-file <path> flag - 3 tests 223 tests passing across all crates. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -25,6 +25,9 @@ use tracing::{error, info, warn};
|
||||
use wzp_client::call::{CallConfig, CallDecoder, CallEncoder};
|
||||
use wzp_proto::MediaTransport;
|
||||
|
||||
mod metrics;
|
||||
use metrics::WebMetrics;
|
||||
|
||||
const FRAME_SAMPLES: usize = 960;
|
||||
|
||||
#[derive(Clone)]
|
||||
@@ -32,6 +35,7 @@ struct AppState {
|
||||
relay_addr: SocketAddr,
|
||||
rooms: Arc<Mutex<HashMap<String, RoomSlot>>>,
|
||||
auth_url: Option<String>,
|
||||
metrics: WebMetrics,
|
||||
}
|
||||
|
||||
/// A waiting client in a room.
|
||||
@@ -90,10 +94,12 @@ async fn main() -> anyhow::Result<()> {
|
||||
info!(url, "auth enabled — browsers must send token as first WS message");
|
||||
}
|
||||
|
||||
let web_metrics = WebMetrics::new();
|
||||
let state = AppState {
|
||||
relay_addr,
|
||||
rooms: Arc::new(Mutex::new(HashMap::new())),
|
||||
auth_url,
|
||||
metrics: web_metrics,
|
||||
};
|
||||
|
||||
let static_dir = if std::path::Path::new("crates/wzp-web/static").exists() {
|
||||
@@ -106,6 +112,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
|
||||
let app = Router::new()
|
||||
.route("/ws/{room}", get(ws_handler))
|
||||
.route("/metrics", get(metrics::metrics_handler))
|
||||
.fallback_service(ServeDir::new(static_dir))
|
||||
.with_state(state);
|
||||
|
||||
@@ -172,6 +179,8 @@ async fn ws_handler(
|
||||
async fn handle_ws(socket: WebSocket, room: String, state: AppState) {
|
||||
info!(room = %room, "client joined room");
|
||||
|
||||
state.metrics.active_connections.inc();
|
||||
|
||||
let (mut ws_sender, mut ws_receiver) = socket.split();
|
||||
|
||||
// Auth: if --auth-url is set, expect a JSON auth message from the browser first
|
||||
@@ -184,6 +193,8 @@ async fn handle_ws(socket: WebSocket, room: String, state: AppState) {
|
||||
let token = v.get("token").and_then(|t| t.as_str()).unwrap_or("").to_string();
|
||||
if token.is_empty() {
|
||||
error!(room = %room, "empty auth token");
|
||||
state.metrics.auth_failures.inc();
|
||||
state.metrics.active_connections.dec();
|
||||
return;
|
||||
}
|
||||
// Validate against featherChat
|
||||
@@ -194,6 +205,8 @@ async fn handle_ws(socket: WebSocket, room: String, state: AppState) {
|
||||
}
|
||||
Err(e) => {
|
||||
error!(room = %room, "browser auth failed: {e}");
|
||||
state.metrics.auth_failures.inc();
|
||||
state.metrics.active_connections.dec();
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -202,12 +215,16 @@ async fn handle_ws(socket: WebSocket, room: String, state: AppState) {
|
||||
}
|
||||
_ => {
|
||||
error!(room = %room, "expected auth JSON, got: {text}");
|
||||
state.metrics.auth_failures.inc();
|
||||
state.metrics.active_connections.dec();
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
error!(room = %room, "no auth message from browser");
|
||||
state.metrics.auth_failures.inc();
|
||||
state.metrics.active_connections.dec();
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -257,14 +274,18 @@ async fn handle_ws(socket: WebSocket, room: String, state: AppState) {
|
||||
}
|
||||
|
||||
// Crypto handshake with relay
|
||||
let handshake_start = std::time::Instant::now();
|
||||
let bridge_seed = wzp_crypto::Seed::generate();
|
||||
match wzp_client::handshake::perform_handshake(&*transport, &bridge_seed.0).await {
|
||||
Ok(_session) => {
|
||||
info!(room = %room, "crypto handshake with relay complete");
|
||||
let elapsed = handshake_start.elapsed().as_secs_f64();
|
||||
state.metrics.handshake_latency.observe(elapsed);
|
||||
info!(room = %room, elapsed_ms = %(elapsed * 1000.0), "crypto handshake with relay complete");
|
||||
}
|
||||
Err(e) => {
|
||||
error!(room = %room, "relay handshake failed: {e}");
|
||||
transport.close().await.ok();
|
||||
state.metrics.active_connections.dec();
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -277,6 +298,7 @@ async fn handle_ws(socket: WebSocket, room: String, state: AppState) {
|
||||
let send_transport = transport.clone();
|
||||
let send_encoder = encoder.clone();
|
||||
let send_room = room.clone();
|
||||
let send_metrics = state.metrics.clone();
|
||||
let send_task = tokio::spawn(async move {
|
||||
let mut frames_sent = 0u64;
|
||||
while let Some(Ok(msg)) = ws_receiver.next().await {
|
||||
@@ -302,6 +324,7 @@ async fn handle_ws(socket: WebSocket, room: String, state: AppState) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
send_metrics.frames_bridged.with_label_values(&["up"]).inc();
|
||||
frames_sent += 1;
|
||||
if frames_sent % 500 == 0 {
|
||||
info!(room = %send_room, frames_sent, "browser → relay");
|
||||
@@ -318,6 +341,7 @@ async fn handle_ws(socket: WebSocket, room: String, state: AppState) {
|
||||
let recv_transport = transport.clone();
|
||||
let recv_decoder = decoder.clone();
|
||||
let recv_room = room.clone();
|
||||
let recv_metrics = state.metrics.clone();
|
||||
let recv_task = tokio::spawn(async move {
|
||||
let mut pcm_buf = vec![0i16; FRAME_SAMPLES];
|
||||
let mut frames_recv = 0u64;
|
||||
@@ -336,6 +360,7 @@ async fn handle_ws(socket: WebSocket, room: String, state: AppState) {
|
||||
error!("ws send: {e}");
|
||||
return;
|
||||
}
|
||||
recv_metrics.frames_bridged.with_label_values(&["down"]).inc();
|
||||
frames_recv += 1;
|
||||
if frames_recv % 500 == 0 {
|
||||
info!(room = %recv_room, frames_recv, "relay → browser");
|
||||
@@ -356,5 +381,6 @@ async fn handle_ws(socket: WebSocket, room: String, state: AppState) {
|
||||
}
|
||||
|
||||
transport.close().await.ok();
|
||||
state.metrics.active_connections.dec();
|
||||
info!(room = %room, "session ended");
|
||||
}
|
||||
|
||||
130
crates/wzp-web/src/metrics.rs
Normal file
130
crates/wzp-web/src/metrics.rs
Normal file
@@ -0,0 +1,130 @@
|
||||
//! Prometheus metrics for the WZP web bridge.
|
||||
|
||||
use prometheus::{
|
||||
Encoder, Histogram, HistogramOpts, IntCounter, IntCounterVec, IntGauge, Opts, Registry,
|
||||
TextEncoder,
|
||||
};
|
||||
|
||||
/// Holds all Prometheus metrics for the web bridge.
|
||||
#[derive(Clone)]
|
||||
pub struct WebMetrics {
|
||||
pub active_connections: IntGauge,
|
||||
pub frames_bridged: IntCounterVec,
|
||||
pub auth_failures: IntCounter,
|
||||
pub handshake_latency: Histogram,
|
||||
registry: Registry,
|
||||
}
|
||||
|
||||
impl WebMetrics {
|
||||
/// Create and register all web bridge metrics.
|
||||
pub fn new() -> Self {
|
||||
let registry = Registry::new();
|
||||
|
||||
let active_connections = IntGauge::with_opts(
|
||||
Opts::new("wzp_web_active_connections", "Current WebSocket connections"),
|
||||
)
|
||||
.expect("metric");
|
||||
registry
|
||||
.register(Box::new(active_connections.clone()))
|
||||
.expect("register");
|
||||
|
||||
let frames_bridged = IntCounterVec::new(
|
||||
Opts::new("wzp_web_frames_bridged_total", "Audio frames bridged"),
|
||||
&["direction"],
|
||||
)
|
||||
.expect("metric");
|
||||
registry
|
||||
.register(Box::new(frames_bridged.clone()))
|
||||
.expect("register");
|
||||
|
||||
let auth_failures = IntCounter::with_opts(
|
||||
Opts::new("wzp_web_auth_failures_total", "Browser auth failures"),
|
||||
)
|
||||
.expect("metric");
|
||||
registry
|
||||
.register(Box::new(auth_failures.clone()))
|
||||
.expect("register");
|
||||
|
||||
let handshake_latency = Histogram::with_opts(
|
||||
HistogramOpts::new(
|
||||
"wzp_web_handshake_latency_seconds",
|
||||
"Relay handshake time",
|
||||
)
|
||||
.buckets(vec![0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0]),
|
||||
)
|
||||
.expect("metric");
|
||||
registry
|
||||
.register(Box::new(handshake_latency.clone()))
|
||||
.expect("register");
|
||||
|
||||
Self {
|
||||
active_connections,
|
||||
frames_bridged,
|
||||
auth_failures,
|
||||
handshake_latency,
|
||||
registry,
|
||||
}
|
||||
}
|
||||
|
||||
/// Encode all metrics as Prometheus text exposition format.
|
||||
pub fn gather(&self) -> String {
|
||||
let encoder = TextEncoder::new();
|
||||
let metric_families = self.registry.gather();
|
||||
let mut buf = Vec::new();
|
||||
encoder.encode(&metric_families, &mut buf).unwrap();
|
||||
String::from_utf8(buf).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
/// Axum handler that returns Prometheus text metrics.
|
||||
pub async fn metrics_handler(
|
||||
axum::extract::State(state): axum::extract::State<super::AppState>,
|
||||
) -> String {
|
||||
state.metrics.gather()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn web_metrics_register() {
|
||||
let m = WebMetrics::new();
|
||||
// Touch CounterVec labels so they appear in output
|
||||
m.frames_bridged.with_label_values(&["up"]);
|
||||
m.frames_bridged.with_label_values(&["down"]);
|
||||
let output = m.gather();
|
||||
assert!(
|
||||
output.contains("wzp_web_active_connections"),
|
||||
"missing active_connections"
|
||||
);
|
||||
assert!(
|
||||
output.contains("wzp_web_frames_bridged_total"),
|
||||
"missing frames_bridged"
|
||||
);
|
||||
assert!(
|
||||
output.contains("wzp_web_auth_failures_total"),
|
||||
"missing auth_failures"
|
||||
);
|
||||
assert!(
|
||||
output.contains("wzp_web_handshake_latency_seconds"),
|
||||
"missing handshake_latency"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn web_metrics_track_connections() {
|
||||
let m = WebMetrics::new();
|
||||
assert_eq!(m.active_connections.get(), 0);
|
||||
|
||||
m.active_connections.inc();
|
||||
m.active_connections.inc();
|
||||
assert_eq!(m.active_connections.get(), 2);
|
||||
|
||||
m.active_connections.dec();
|
||||
assert_eq!(m.active_connections.get(), 1);
|
||||
|
||||
let output = m.gather();
|
||||
assert!(output.contains("wzp_web_active_connections 1"));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user