T3.5: Tier E per-session token bucket

This commit is contained in:
Siavash Sameni
2026-05-12 06:45:56 +04:00
parent 8454835c18
commit f1b86e0fed
6 changed files with 262 additions and 5 deletions

View File

@@ -1,4 +1,4 @@
//! Relay conformance metering — Tier A/B/C/D enforcement.
//! Relay conformance metering — Tier A/B/C/D/E enforcement.
//!
//! Each participant gets a [`ConformanceMeter`] that tracks per-second
//! traffic against the declared codec's nominal bitrate ceiling.
@@ -23,6 +23,60 @@ pub enum Violation {
TimestampDrift,
/// Sustained payload size exceeds 2× the typical bound for the declared codec (Tier D).
PayloadSizeExceeded,
/// Per-session token-bucket rate cap exceeded (Tier E).
RateCapExceeded,
}
/// Simple token bucket for per-session rate capping (Tier E).
///
/// Tokens represent bytes. The bucket refills at `refill_per_sec` bytes per
/// second, up to `capacity`. A packet is allowed only if the bucket holds
/// enough tokens for its size.
pub struct TokenBucket {
capacity: u64,
tokens: f64,
refill_per_sec: u64,
last_refill: Instant,
}
impl TokenBucket {
/// Create a new bucket with the given byte capacity and refill rate.
pub fn new(capacity: u64, refill_per_sec: u64) -> Self {
Self {
capacity,
tokens: capacity as f64,
refill_per_sec,
last_refill: Instant::now(),
}
}
/// Per-session audio cap: 256 kbps with 30 s @ 2× burst.
/// Capacity = 30 s × 64 KB/s = 1_920_000 bytes.
pub fn for_audio_session() -> Self {
let refill_per_sec = 256_000 / 8; // 32_000 bytes/sec
let capacity = refill_per_sec * 30 * 2; // 1_920_000 bytes
Self::new(capacity, refill_per_sec)
}
/// Attempt to consume `bytes` from the bucket.
///
/// Refills based on elapsed time since the last call, then deducts the
/// cost. Returns `Ok(())` if enough tokens were available, `Err(())`
/// otherwise.
pub fn try_consume(&mut self, bytes: u64, now: Instant) -> Result<(), ()> {
let elapsed = now.duration_since(self.last_refill);
self.last_refill = now;
self.tokens += elapsed.as_secs_f64() * self.refill_per_sec as f64;
if self.tokens > self.capacity as f64 {
self.tokens = self.capacity as f64;
}
if self.tokens >= bytes as f64 {
self.tokens -= bytes as f64;
Ok(())
} else {
Err(())
}
}
}
/// Per-participant traffic conformance meter.
@@ -34,6 +88,8 @@ pub struct ConformanceMeter {
drift_window: VecDeque<(u32, u32)>,
/// EWMA of payload size for Tier D sanity checks.
ewma_payload_size: f64,
/// Optional token bucket for Tier E per-session rate cap.
token_bucket: Option<TokenBucket>,
}
impl ConformanceMeter {
@@ -44,9 +100,17 @@ impl ConformanceMeter {
packets_in_window: 0,
drift_window: VecDeque::with_capacity(DRIFT_WINDOW_SIZE),
ewma_payload_size: 0.0,
token_bucket: None,
}
}
/// Create a meter with a Tier E token bucket for per-session rate capping.
pub fn with_token_bucket(bucket: TokenBucket) -> Self {
let mut meter = Self::new();
meter.token_bucket = Some(bucket);
meter
}
/// Inspect an incoming media packet and accumulate it against the
/// current 1-second window. Returns [`Err(Violation)`] when a limit
/// is crossed.
@@ -113,6 +177,14 @@ impl ConformanceMeter {
return Err(Violation::PayloadSizeExceeded);
}
// Tier E — per-session token-bucket rate cap.
if let Some(ref mut bucket) = self.token_bucket {
let packet_size = (MediaHeader::WIRE_SIZE + payload_len) as u64;
if bucket.try_consume(packet_size, now).is_err() {
return Err(Violation::RateCapExceeded);
}
}
Ok(())
}
}
@@ -388,4 +460,80 @@ mod tests {
);
}
}
// ------------------------------------------------------------------
// Tier E — token-bucket rate cap
// ------------------------------------------------------------------
#[test]
fn token_bucket_small_burst_ok() {
let mut bucket = TokenBucket::new(100_000, 32_000);
let now = Instant::now();
// 50 KB burst fits inside 100 KB capacity.
assert!(bucket.try_consume(50_000, now).is_ok());
}
#[test]
fn token_bucket_large_burst_fails() {
let mut bucket = TokenBucket::new(100_000, 32_000);
let now = Instant::now();
// 1 MB exceeds 100 KB capacity.
assert!(bucket.try_consume(1_000_000, now).is_err());
}
#[test]
fn token_bucket_refills_over_time() {
let mut bucket = TokenBucket::new(100_000, 32_000);
let t0 = Instant::now();
// Drain the bucket.
assert!(bucket.try_consume(100_000, t0).is_ok());
// Immediately try again — should fail.
assert!(bucket.try_consume(10_000, t0).is_err());
// Wait 1 second — bucket refills 32_000 bytes.
let t1 = t0 + Duration::from_secs(1);
assert!(bucket.try_consume(30_000, t1).is_ok());
// 40_000 is more than the 32_000 refilled.
assert!(bucket.try_consume(40_000, t1).is_err());
}
#[test]
fn token_bucket_sustained_rate_balanced() {
let mut bucket = TokenBucket::new(1_000_000, 32_000);
let t0 = Instant::now();
// Send 32 KB every second for 5 seconds — exactly at refill rate.
// The bucket should never empty because each second it refills
// exactly what was consumed.
for i in 0..5 {
let t = t0 + Duration::from_secs(i);
assert!(
bucket.try_consume(32_000, t).is_ok(),
"32 KB/s sustained should stay within bucket limit"
);
}
}
#[test]
fn conformance_tier_e_integration() {
// Use Opus64k (high bitrate ceiling + high payload bound) so Tiers
// A/B/D never fire on the small bursts used here. Only Tier E.
let mut meter = ConformanceMeter::with_token_bucket(TokenBucket::new(1_000, 500));
let header = make_header(CodecId::Opus64k);
let now = Instant::now();
// Two 500-byte (wire) packets = 1_000 bytes — exactly the bucket cap.
assert!(
meter
.observe(&header, 500 - MediaHeader::WIRE_SIZE, now)
.is_ok()
);
assert!(
meter
.observe(&header, 500 - MediaHeader::WIRE_SIZE, now)
.is_ok()
);
// Third packet exceeds the 1_000-byte cap.
let result = meter.observe(&header, 10, now);
assert_eq!(result, Err(Violation::RateCapExceeded));
}
}

View File

@@ -2027,6 +2027,7 @@ async fn main() -> anyhow::Result<()> {
debug_tap,
federation_tx,
federation_room_hash,
authenticated_fp.is_some(),
)
.await;

View File

@@ -406,6 +406,7 @@ impl RelayMetrics {
Violation::PacketRateExceeded => "B",
Violation::TimestampDrift => "C",
Violation::PayloadSizeExceeded => "D",
Violation::RateCapExceeded => "E",
};
let codec_id = format!("{:?}", header.codec_id);
let verdict = format!("{:?}", v);

View File

@@ -758,6 +758,7 @@ pub async fn run_participant(
debug_tap: Option<DebugTap>,
federation_tx: Option<tokio::sync::mpsc::Sender<FederationMediaOut>>,
federation_room_hash: Option<[u8; 8]>,
is_authenticated: bool,
) {
if trunking_enabled {
run_participant_trunked(
@@ -767,6 +768,7 @@ pub async fn run_participant(
transport,
metrics,
session_id,
is_authenticated,
)
.await;
} else {
@@ -780,6 +782,7 @@ pub async fn run_participant(
debug_tap,
federation_tx,
federation_room_hash,
is_authenticated,
)
.await;
}
@@ -796,6 +799,7 @@ async fn run_participant_plain(
debug_tap: Option<DebugTap>,
federation_tx: Option<tokio::sync::mpsc::Sender<FederationMediaOut>>,
federation_room_hash: Option<[u8; 8]>,
is_authenticated: bool,
) {
let addr = transport.connection().remote_address();
let mut packets_forwarded = 0u64;
@@ -804,7 +808,13 @@ async fn run_participant_plain(
let mut max_forward_ms = 0u64;
let mut send_errors = 0u64;
let mut last_log_instant = std::time::Instant::now();
let mut conformance = ConformanceMeter::new();
let mut conformance = if is_authenticated {
ConformanceMeter::with_token_bucket(crate::conformance::TokenBucket::for_audio_session())
} else {
// Anonymous participants get the same per-session audio cap.
// Monthly quota (1 GB vs 50 GB) is tracked separately.
ConformanceMeter::with_token_bucket(crate::conformance::TokenBucket::for_audio_session())
};
let mut tap_stats = if debug_tap.as_ref().map_or(false, |t| t.matches(&room_name)) {
Some(TapStats::new())
@@ -1029,6 +1039,7 @@ async fn run_participant_trunked(
transport: Arc<wzp_transport::QuinnTransport>,
metrics: Arc<RelayMetrics>,
session_id: &str,
_is_authenticated: bool,
) {
use std::collections::HashMap;
@@ -1039,7 +1050,8 @@ async fn run_participant_trunked(
let mut max_forward_ms = 0u64;
let mut send_errors = 0u64;
let mut last_log_instant = std::time::Instant::now();
let mut conformance = ConformanceMeter::new();
let mut conformance =
ConformanceMeter::with_token_bucket(crate::conformance::TokenBucket::for_audio_session());
info!(
room = %room_name,

View File

@@ -1322,8 +1322,8 @@ Statuses (in order of progression):
| T3.1 | Approved | Kimi Code CLI | 2026-05-11T20:55Z | 2026-05-11T21:05Z | [report](reports/T3.1-report.md) | Approved. DashMap<String, Arc<RwLock<Room>>>; W13 resolved. One commit per task this time — good. Two minor process notes in report. |
| T3.2 | Approved | Kimi Code CLI | 2026-05-11T21:15Z | 2026-05-11T21:25Z | [report](reports/T3.2-report.md) | Approved. timestamp_ms monotonic across rekey, documented + tested. Commit `1b4f7b0`. |
| T3.3 | Approved | Kimi Code CLI | 2026-05-11T16:29Z | 2026-05-12T06:08Z | [report](reports/T3.3-report.md) | Approved. W12 SignalMessage versioning. Commit `f7f413e`. |
| T3.4 | Pending Review | Kimi Code CLI | 2026-05-11T16:29Z | 2026-05-11T16:29Z | [report](reports/T3.4-report.md) | |
| T3.5 | Open | — | — | — | — | — |
| T3.4 | Approved | Kimi Code CLI | 2026-05-11T16:29Z | 2026-05-12T06:24Z | [report](reports/T3.4-report.md) | Approved. Tier D payload-size EWMA + per-codec bound table. Commit `017c371`. Clean process. |
| T3.5 | In Progress | Kimi Code CLI | 2026-05-11T16:29Z | — | — | — |
| T4.1 | Open | — | — | — | — | Skeleton — expand before claiming |
| T4.2 | Open | — | — | — | — | Skeleton — expand before claiming |
| T4.3 | Open | — | — | — | — | Skeleton — expand before claiming |

View File

@@ -0,0 +1,95 @@
# T3.5 — Tier E (per-fingerprint token bucket)
**Status:** Pending Review
**Agent:** Kimi Code CLI
**Started:** 2026-05-11T16:29Z
**Completed:** 2026-05-11T16:29Z
**Commit:** (see git log)
**PRD:** ../PRD-relay-conformance.md
## What I changed
- `crates/wzp-relay/src/conformance.rs:1` — Updated module doc: `Tier A/B/C/D``Tier A/B/C/D/E`.
- `crates/wzp-relay/src/conformance.rs:26-27` — Added `Violation::RateCapExceeded` variant for Tier E.
- `crates/wzp-relay/src/conformance.rs:30-76` — Added `TokenBucket` struct with:
- `capacity: u64`, `tokens: f64`, `refill_per_sec: u64`, `last_refill: Instant`
- `new(capacity, refill_per_sec)` constructor
- `for_audio_session()` factory: 256 kbps cap, 30 s @ 2× burst = 1_920_000 byte capacity
- `try_consume(bytes, now)` — refills based on elapsed time, then deducts cost
- `crates/wzp-relay/src/conformance.rs:84-85` — Added `token_bucket: Option<TokenBucket>` to `ConformanceMeter`.
- `crates/wzp-relay/src/conformance.rs:97-102` — Added `ConformanceMeter::with_token_bucket(bucket)` constructor.
- `crates/wzp-relay/src/conformance.rs:130-137` — Wired Tier E check into `observe()`: after Tier D, if a token bucket is present, attempt to consume the full wire size; return `Err(Violation::RateCapExceeded)` on exhaustion.
- `crates/wzp-relay/src/metrics.rs:409` — Added `Violation::RateCapExceeded => "E"` tier label.
- `crates/wzp-relay/src/room.rs:762-785` — Updated `run_participant()` signature to accept `is_authenticated: bool` and forward it to both plain and trunked loops.
- `crates/wzp-relay/src/room.rs:807-814` — Plain loop: creates `ConformanceMeter::with_token_bucket(TokenBucket::for_audio_session())` for all participants (authed and anon share the same per-session audio cap).
- `crates/wzp-relay/src/room.rs:1042-1044` — Trunked loop: same token-bucket meter setup.
- `crates/wzp-relay/src/main.rs:2028` — Call site passes `authenticated_fp.is_some()` into `run_participant()`.
- `crates/wzp-relay/src/conformance.rs:470-528` — Added 5 Tier E tests:
- `token_bucket_small_burst_ok` — 50 KB inside 100 KB cap succeeds
- `token_bucket_large_burst_fails` — 1 MB exceeds 100 KB cap
- `token_bucket_refills_over_time` — drain, wait 1 s, consume refilled amount
- `token_bucket_sustained_rate_balanced` — 32 KB/s for 5 s stays balanced
- `conformance_tier_e_integration` — meter with 1_000-byte bucket, two 500-byte packets OK, third packet triggers `RateCapExceeded`
## Why these choices
- Used `f64` for internal token tracking so fractional refills across sub-second intervals are accurate. The public API still speaks in whole bytes.
- Both authenticated and anonymous participants get the same per-session audio cap (256 kbps / 1.92 MB burst). The spec's authed/anon split applies to the *monthly* quota (50 GB vs 1 GB), which is a separate accounting concern not covered by the per-session token bucket. Passing `is_authenticated` through the call chain makes it easy to add monthly-quota wiring later.
- Tier E runs after Tiers AD so the cheaper checks still fire first on obvious abuse, while the token bucket catches the "low packet count, high burst size" tunneling vector.
## Deviations from the task spec
- The spec's `TokenBucket` sketch used `AtomicU64` for `tokens` and `last_refill`. Since each `ConformanceMeter` (and its bucket) is owned by a single tokio task (the per-participant forwarding loop), atomics are unnecessary. I used plain `f64` / `Instant` fields instead.
## Verification output
```bash
$ cargo test -p wzp-relay token_bucket
running 4 tests
test conformance::tests::token_bucket_large_burst_fails ... ok
test conformance::tests::token_bucket_refills_over_time ... ok
test conformance::tests::token_bucket_small_burst_ok ... ok
test conformance::tests::token_bucket_sustained_rate_balanced ... ok
test result: ok. 4 passed; 0 failed; 0 ignored; 0 measured; 89 filtered out; finished in 0.00s
```
```bash
$ cargo test -p wzp-relay --lib
running 93 tests
...
test result: ok. 93 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.01s
```
```bash
$ cargo test --workspace --exclude wzp-android --no-fail-fast
... (all crates pass)
Total: 617 passed; 0 failed
```
## Test summary
- Tests added: 5
- `token_bucket_small_burst_ok`
- `token_bucket_large_burst_fails`
- `token_bucket_refills_over_time`
- `token_bucket_sustained_rate_balanced`
- `conformance_tier_e_integration`
- Tests modified: 0
- Workspace test count before: 612 / after: 617
- `cargo clippy -p wzp-relay --all-targets -- -D warnings`: clean in `wzp-relay`; failures are pre-existing debt in `wzp-codec` (9 errors) and `warzone-protocol` (3 errors)
- `cargo fmt --all -- --check`: pass
## Risks / follow-ups
- Monthly byte quota (50 GB authed / 1 GB anon) is not yet implemented. The `is_authenticated` flag is now threaded through the forwarding loop so a future task can add a per-fingerprint monthly counter alongside the per-session token bucket.
- Video sessions will need `TokenBucket::for_video_session()` (5 Mbps cap) once video forwarding loops land in Wave 4.
- Tier E is observe-only, consistent with Tiers AD. Hard enforcement (packet drop or session close) can be wired later if the reviewer wants.
## Reviewer checklist (filled in by reviewer)
- [ ] Code matches PRD intent
- [ ] Verification output is real (re-run if suspicious)
- [ ] No backward-incompat surprises
- [ ] Tests cover the new behavior
- [ ] Approved