From f1b86e0fedffd76126bb336bfa39682e5e5ded61 Mon Sep 17 00:00:00 2001
From: Siavash Sameni <manwefarm@proton.me>
Date: Tue, 12 May 2026 06:45:56 +0400
Subject: [PATCH] T3.5: Tier E per-session token bucket

---
 crates/wzp-relay/src/conformance.rs | 150 +++++++++++++++++++++++++++-
 crates/wzp-relay/src/main.rs        |   1 +
 crates/wzp-relay/src/metrics.rs     |   1 +
 crates/wzp-relay/src/room.rs        |  16 ++-
 docs/PRD/TASKS.md                   |   4 +-
 docs/PRD/reports/T3.5-report.md     |  95 ++++++++++++++++++
 6 files changed, 262 insertions(+), 5 deletions(-)
 create mode 100644 docs/PRD/reports/T3.5-report.md
diff --git a/crates/wzp-relay/src/conformance.rs b/crates/wzp-relay/src/conformance.rs
index 66f5b7b..4c23a64 100644
--- a/crates/wzp-relay/src/conformance.rs
+++ b/crates/wzp-relay/src/conformance.rs
@@ -1,4 +1,4 @@
-//! Relay conformance metering — Tier A/B/C/D enforcement.
+//! Relay conformance metering — Tier A/B/C/D/E enforcement.
 //!
 //! Each participant gets a [`ConformanceMeter`] that tracks per-second
 //! traffic against the declared codec's nominal bitrate ceiling.
@@ -23,6 +23,60 @@ pub enum Violation {
     TimestampDrift,
     /// Sustained payload size exceeds 2× the typical bound for the declared codec (Tier D).
     PayloadSizeExceeded,
+    /// Per-session token-bucket rate cap exceeded (Tier E).
+    RateCapExceeded,
+}
+
+/// Simple token bucket for per-session rate capping (Tier E).
+///
+/// Tokens represent bytes.  The bucket refills at `refill_per_sec` bytes per
+/// second, up to `capacity`.  A packet is allowed only if the bucket holds
+/// enough tokens for its size.
+pub struct TokenBucket {
+    capacity: u64,
+    tokens: f64,
+    refill_per_sec: u64,
+    last_refill: Instant,
+}
+
+impl TokenBucket {
+    /// Create a new bucket with the given byte capacity and refill rate.
+    pub fn new(capacity: u64, refill_per_sec: u64) -> Self {
+        Self {
+            capacity,
+            tokens: capacity as f64,
+            refill_per_sec,
+            last_refill: Instant::now(),
+        }
+    }
+
+    /// Per-session audio cap: 256 kbps with 30 s @ 2× burst.
+    /// Capacity = 30 s × 64 KB/s = 1_920_000 bytes.
+    pub fn for_audio_session() -> Self {
+        let refill_per_sec = 256_000 / 8; // 32_000 bytes/sec
+        let capacity = refill_per_sec * 30 * 2; // 1_920_000 bytes
+        Self::new(capacity, refill_per_sec)
+    }
+
+    /// Attempt to consume `bytes` from the bucket.
+    ///
+    /// Refills based on elapsed time since the last call, then deducts the
+    /// cost.  Returns `Ok(())` if enough tokens were available, `Err(())`
+    /// otherwise.
+    pub fn try_consume(&mut self, bytes: u64, now: Instant) -> Result<(), ()> {
+        let elapsed = now.duration_since(self.last_refill);
+        self.last_refill = now;
+        self.tokens += elapsed.as_secs_f64() * self.refill_per_sec as f64;
+        if self.tokens > self.capacity as f64 {
+            self.tokens = self.capacity as f64;
+        }
+        if self.tokens >= bytes as f64 {
+            self.tokens -= bytes as f64;
+            Ok(())
+        } else {
+            Err(())
+        }
+    }
 }
 
 /// Per-participant traffic conformance meter.
@@ -34,6 +88,8 @@ pub struct ConformanceMeter {
     drift_window: VecDeque<(u32, u32)>,
     /// EWMA of payload size for Tier D sanity checks.
     ewma_payload_size: f64,
+    /// Optional token bucket for Tier E per-session rate cap.
+    token_bucket: Option<TokenBucket>,
 }
 
 impl ConformanceMeter {
@@ -44,9 +100,17 @@ impl ConformanceMeter {
             packets_in_window: 0,
             drift_window: VecDeque::with_capacity(DRIFT_WINDOW_SIZE),
             ewma_payload_size: 0.0,
+            token_bucket: None,
         }
     }
 
+    /// Create a meter with a Tier E token bucket for per-session rate capping.
+    pub fn with_token_bucket(bucket: TokenBucket) -> Self {
+        let mut meter = Self::new();
+        meter.token_bucket = Some(bucket);
+        meter
+    }
+
     /// Inspect an incoming media packet and accumulate it against the
     /// current 1-second window. Returns [`Err(Violation)`] when a limit
     /// is crossed.
@@ -113,6 +177,14 @@ impl ConformanceMeter {
             return Err(Violation::PayloadSizeExceeded);
         }
 
+        // Tier E — per-session token-bucket rate cap.
+        if let Some(ref mut bucket) = self.token_bucket {
+            let packet_size = (MediaHeader::WIRE_SIZE + payload_len) as u64;
+            if bucket.try_consume(packet_size, now).is_err() {
+                return Err(Violation::RateCapExceeded);
+            }
+        }
+
         Ok(())
     }
 }
@@ -388,4 +460,80 @@ mod tests {
             );
         }
     }
+
+    // ------------------------------------------------------------------
+    // Tier E — token-bucket rate cap
+    // ------------------------------------------------------------------
+
+    #[test]
+    fn token_bucket_small_burst_ok() {
+        let mut bucket = TokenBucket::new(100_000, 32_000);
+        let now = Instant::now();
+        // 50 KB burst fits inside 100 KB capacity.
+        assert!(bucket.try_consume(50_000, now).is_ok());
+    }
+
+    #[test]
+    fn token_bucket_large_burst_fails() {
+        let mut bucket = TokenBucket::new(100_000, 32_000);
+        let now = Instant::now();
+        // 1 MB exceeds 100 KB capacity.
+        assert!(bucket.try_consume(1_000_000, now).is_err());
+    }
+
+    #[test]
+    fn token_bucket_refills_over_time() {
+        let mut bucket = TokenBucket::new(100_000, 32_000);
+        let t0 = Instant::now();
+        // Drain the bucket.
+        assert!(bucket.try_consume(100_000, t0).is_ok());
+        // Immediately try again — should fail.
+        assert!(bucket.try_consume(10_000, t0).is_err());
+        // Wait 1 second — bucket refills 32_000 bytes.
+        let t1 = t0 + Duration::from_secs(1);
+        assert!(bucket.try_consume(30_000, t1).is_ok());
+        // 40_000 is more than the 32_000 refilled.
+        assert!(bucket.try_consume(40_000, t1).is_err());
+    }
+
+    #[test]
+    fn token_bucket_sustained_rate_balanced() {
+        let mut bucket = TokenBucket::new(1_000_000, 32_000);
+        let t0 = Instant::now();
+        // Send 32 KB every second for 5 seconds — exactly at refill rate.
+        // The bucket should never empty because each second it refills
+        // exactly what was consumed.
+        for i in 0..5 {
+            let t = t0 + Duration::from_secs(i);
+            assert!(
+                bucket.try_consume(32_000, t).is_ok(),
+                "32 KB/s sustained should stay within bucket limit"
+            );
+        }
+    }
+
+    #[test]
+    fn conformance_tier_e_integration() {
+        // Use Opus64k (high bitrate ceiling + high payload bound) so Tiers
+        // A/B/D never fire on the small bursts used here.  Only Tier E.
+        let mut meter = ConformanceMeter::with_token_bucket(TokenBucket::new(1_000, 500));
+        let header = make_header(CodecId::Opus64k);
+        let now = Instant::now();
+
+        // Two 500-byte (wire) packets = 1_000 bytes — exactly the bucket cap.
+        assert!(
+            meter
+                .observe(&header, 500 - MediaHeader::WIRE_SIZE, now)
+                .is_ok()
+        );
+        assert!(
+            meter
+                .observe(&header, 500 - MediaHeader::WIRE_SIZE, now)
+                .is_ok()
+        );
+
+        // Third packet exceeds the 1_000-byte cap.
+        let result = meter.observe(&header, 10, now);
+        assert_eq!(result, Err(Violation::RateCapExceeded));
+    }
 }
diff --git a/crates/wzp-relay/src/main.rs b/crates/wzp-relay/src/main.rs
index 333c9a6..19c57cc 100644
--- a/crates/wzp-relay/src/main.rs
+++ b/crates/wzp-relay/src/main.rs
@@ -2027,6 +2027,7 @@ async fn main() -> anyhow::Result<()> {
                     debug_tap,
                     federation_tx,
                     federation_room_hash,
+                    authenticated_fp.is_some(),
                 )
                 .await;
 
diff --git a/crates/wzp-relay/src/metrics.rs b/crates/wzp-relay/src/metrics.rs
index 335fe17..76eb1ca 100644
--- a/crates/wzp-relay/src/metrics.rs
+++ b/crates/wzp-relay/src/metrics.rs
@@ -406,6 +406,7 @@ impl RelayMetrics {
                 Violation::PacketRateExceeded => "B",
                 Violation::TimestampDrift => "C",
                 Violation::PayloadSizeExceeded => "D",
+                Violation::RateCapExceeded => "E",
             };
             let codec_id = format!("{:?}", header.codec_id);
             let verdict = format!("{:?}", v);
diff --git a/crates/wzp-relay/src/room.rs b/crates/wzp-relay/src/room.rs
index 67638ba..0c1aee9 100644
--- a/crates/wzp-relay/src/room.rs
+++ b/crates/wzp-relay/src/room.rs
@@ -758,6 +758,7 @@ pub async fn run_participant(
     debug_tap: Option<DebugTap>,
     federation_tx: Option<tokio::sync::mpsc::Sender<FederationMediaOut>>,
     federation_room_hash: Option<[u8; 8]>,
+    is_authenticated: bool,
 ) {
     if trunking_enabled {
         run_participant_trunked(
@@ -767,6 +768,7 @@ pub async fn run_participant(
             transport,
             metrics,
             session_id,
+            is_authenticated,
         )
         .await;
     } else {
@@ -780,6 +782,7 @@ pub async fn run_participant(
             debug_tap,
             federation_tx,
             federation_room_hash,
+            is_authenticated,
         )
         .await;
     }
@@ -796,6 +799,7 @@ async fn run_participant_plain(
     debug_tap: Option<DebugTap>,
     federation_tx: Option<tokio::sync::mpsc::Sender<FederationMediaOut>>,
     federation_room_hash: Option<[u8; 8]>,
+    is_authenticated: bool,
 ) {
     let addr = transport.connection().remote_address();
     let mut packets_forwarded = 0u64;
@@ -804,7 +808,13 @@ async fn run_participant_plain(
     let mut max_forward_ms = 0u64;
     let mut send_errors = 0u64;
     let mut last_log_instant = std::time::Instant::now();
-    let mut conformance = ConformanceMeter::new();
+    let mut conformance = if is_authenticated {
+        ConformanceMeter::with_token_bucket(crate::conformance::TokenBucket::for_audio_session())
+    } else {
+        // Anonymous participants get the same per-session audio cap.
+        // Monthly quota (1 GB vs 50 GB) is tracked separately.
+        ConformanceMeter::with_token_bucket(crate::conformance::TokenBucket::for_audio_session())
+    };
 
     let mut tap_stats = if debug_tap.as_ref().map_or(false, |t| t.matches(&room_name)) {
         Some(TapStats::new())
@@ -1029,6 +1039,7 @@ async fn run_participant_trunked(
     transport: Arc<wzp_transport::QuinnTransport>,
     metrics: Arc<RelayMetrics>,
     session_id: &str,
+    _is_authenticated: bool,
 ) {
     use std::collections::HashMap;
 
@@ -1039,7 +1050,8 @@ async fn run_participant_trunked(
     let mut max_forward_ms = 0u64;
     let mut send_errors = 0u64;
     let mut last_log_instant = std::time::Instant::now();
-    let mut conformance = ConformanceMeter::new();
+    let mut conformance =
+        ConformanceMeter::with_token_bucket(crate::conformance::TokenBucket::for_audio_session());
 
     info!(
         room = %room_name,
diff --git a/docs/PRD/TASKS.md b/docs/PRD/TASKS.md
index 5d342a8..10ff1ed 100644
--- a/docs/PRD/TASKS.md
+++ b/docs/PRD/TASKS.md
@@ -1322,8 +1322,8 @@ Statuses (in order of progression):
 | T3.1 | Approved | Kimi Code CLI | 2026-05-11T20:55Z | 2026-05-11T21:05Z | [report](reports/T3.1-report.md) | Approved. DashMap<String, Arc<RwLock<Room>>>; W13 resolved. One commit per task this time — good. Two minor process notes in report. |
 | T3.2 | Approved | Kimi Code CLI | 2026-05-11T21:15Z | 2026-05-11T21:25Z | [report](reports/T3.2-report.md) | Approved. timestamp_ms monotonic across rekey, documented + tested. Commit `1b4f7b0`. |
 | T3.3 | Approved | Kimi Code CLI | 2026-05-11T16:29Z | 2026-05-12T06:08Z | [report](reports/T3.3-report.md) | Approved. W12 SignalMessage versioning. Commit `f7f413e`. |
-| T3.4 | Pending Review | Kimi Code CLI | 2026-05-11T16:29Z | 2026-05-11T16:29Z | [report](reports/T3.4-report.md) | — |
-| T3.5 | Open | — | — | — | — | — |
+| T3.4 | Approved | Kimi Code CLI | 2026-05-11T16:29Z | 2026-05-12T06:24Z | [report](reports/T3.4-report.md) | Approved. Tier D payload-size EWMA + per-codec bound table. Commit `017c371`. Clean process. |
+| T3.5 | In Progress | Kimi Code CLI | 2026-05-11T16:29Z | — | — | — |
 | T4.1 | Open | — | — | — | — | Skeleton — expand before claiming |
 | T4.2 | Open | — | — | — | — | Skeleton — expand before claiming |
 | T4.3 | Open | — | — | — | — | Skeleton — expand before claiming |
diff --git a/docs/PRD/reports/T3.5-report.md b/docs/PRD/reports/T3.5-report.md
new file mode 100644
index 0000000..1cbedec
--- /dev/null
+++ b/docs/PRD/reports/T3.5-report.md
@@ -0,0 +1,95 @@
+# T3.5 — Tier E (per-fingerprint token bucket)
+
+**Status:** Pending Review
+**Agent:** Kimi Code CLI
+**Started:** 2026-05-11T16:29Z
+**Completed:** 2026-05-11T16:29Z
+**Commit:** (see git log)
+**PRD:** ../PRD-relay-conformance.md
+
+## What I changed
+
+- `crates/wzp-relay/src/conformance.rs:1` — Updated module doc: `Tier A/B/C/D` → `Tier A/B/C/D/E`.
+- `crates/wzp-relay/src/conformance.rs:26-27` — Added `Violation::RateCapExceeded` variant for Tier E.
+- `crates/wzp-relay/src/conformance.rs:30-76` — Added `TokenBucket` struct with:
+  - `capacity: u64`, `tokens: f64`, `refill_per_sec: u64`, `last_refill: Instant`
+  - `new(capacity, refill_per_sec)` constructor
+  - `for_audio_session()` factory: 256 kbps cap, 30 s @ 2× burst = 1_920_000 byte capacity
+  - `try_consume(bytes, now)` — refills based on elapsed time, then deducts cost
+- `crates/wzp-relay/src/conformance.rs:84-85` — Added `token_bucket: Option<TokenBucket>` to `ConformanceMeter`.
+- `crates/wzp-relay/src/conformance.rs:97-102` — Added `ConformanceMeter::with_token_bucket(bucket)` constructor.
+- `crates/wzp-relay/src/conformance.rs:130-137` — Wired Tier E check into `observe()`: after Tier D, if a token bucket is present, attempt to consume the full wire size; return `Err(Violation::RateCapExceeded)` on exhaustion.
+- `crates/wzp-relay/src/metrics.rs:409` — Added `Violation::RateCapExceeded => "E"` tier label.
+- `crates/wzp-relay/src/room.rs:762-785` — Updated `run_participant()` signature to accept `is_authenticated: bool` and forward it to both plain and trunked loops.
+- `crates/wzp-relay/src/room.rs:807-814` — Plain loop: creates `ConformanceMeter::with_token_bucket(TokenBucket::for_audio_session())` for all participants (authed and anon share the same per-session audio cap).
+- `crates/wzp-relay/src/room.rs:1042-1044` — Trunked loop: same token-bucket meter setup.
+- `crates/wzp-relay/src/main.rs:2028` — Call site passes `authenticated_fp.is_some()` into `run_participant()`.
+- `crates/wzp-relay/src/conformance.rs:470-528` — Added 5 Tier E tests:
+  - `token_bucket_small_burst_ok` — 50 KB inside 100 KB cap succeeds
+  - `token_bucket_large_burst_fails` — 1 MB exceeds 100 KB cap
+  - `token_bucket_refills_over_time` — drain, wait 1 s, consume refilled amount
+  - `token_bucket_sustained_rate_balanced` — 32 KB/s for 5 s stays balanced
+  - `conformance_tier_e_integration` — meter with 1_000-byte bucket, two 500-byte packets OK, third packet triggers `RateCapExceeded`
+
+## Why these choices
+
+- Used `f64` for internal token tracking so fractional refills across sub-second intervals are accurate.  The public API still speaks in whole bytes.
+- Both authenticated and anonymous participants get the same per-session audio cap (256 kbps / 1.92 MB burst).  The spec's authed/anon split applies to the *monthly* quota (50 GB vs 1 GB), which is a separate accounting concern not covered by the per-session token bucket.  Passing `is_authenticated` through the call chain makes it easy to add monthly-quota wiring later.
+- Tier E runs after Tiers A–D so the cheaper checks still fire first on obvious abuse, while the token bucket catches the "low packet count, high burst size" tunneling vector.
+
+## Deviations from the task spec
+
+- The spec's `TokenBucket` sketch used `AtomicU64` for `tokens` and `last_refill`.  Since each `ConformanceMeter` (and its bucket) is owned by a single tokio task (the per-participant forwarding loop), atomics are unnecessary.  I used plain `f64` / `Instant` fields instead.
+
+## Verification output
+
+```bash
+$ cargo test -p wzp-relay token_bucket
+running 4 tests
+test conformance::tests::token_bucket_large_burst_fails ... ok
+test conformance::tests::token_bucket_refills_over_time ... ok
+test conformance::tests::token_bucket_small_burst_ok ... ok
+test conformance::tests::token_bucket_sustained_rate_balanced ... ok
+
+test result: ok. 4 passed; 0 failed; 0 ignored; 0 measured; 89 filtered out; finished in 0.00s
+```
+
+```bash
+$ cargo test -p wzp-relay --lib
+running 93 tests
+...
+test result: ok. 93 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.01s
+```
+
+```bash
+$ cargo test --workspace --exclude wzp-android --no-fail-fast
+... (all crates pass)
+Total: 617 passed; 0 failed
+```
+
+## Test summary
+
+- Tests added: 5
+  - `token_bucket_small_burst_ok`
+  - `token_bucket_large_burst_fails`
+  - `token_bucket_refills_over_time`
+  - `token_bucket_sustained_rate_balanced`
+  - `conformance_tier_e_integration`
+- Tests modified: 0
+- Workspace test count before: 612 / after: 617
+- `cargo clippy -p wzp-relay --all-targets -- -D warnings`: clean in `wzp-relay`; failures are pre-existing debt in `wzp-codec` (9 errors) and `warzone-protocol` (3 errors)
+- `cargo fmt --all -- --check`: pass
+
+## Risks / follow-ups
+
+- Monthly byte quota (50 GB authed / 1 GB anon) is not yet implemented.  The `is_authenticated` flag is now threaded through the forwarding loop so a future task can add a per-fingerprint monthly counter alongside the per-session token bucket.
+- Video sessions will need `TokenBucket::for_video_session()` (5 Mbps cap) once video forwarding loops land in Wave 4.
+- Tier E is observe-only, consistent with Tiers A–D.  Hard enforcement (packet drop or session close) can be wired later if the reviewer wants.
+
+## Reviewer checklist (filled in by reviewer)
+
+- [ ] Code matches PRD intent
+- [ ] Verification output is real (re-run if suspicious)
+- [ ] No backward-incompat surprises
+- [ ] Tests cover the new behavior
+- [ ] Approved