fix(video): add frame metadata and Android encode diagnostics
Some checks failed
Mirror to GitHub / mirror (push) Failing after 41s
Build Release Binaries / build-amd64 (push) Failing after 4m7s

This commit is contained in:
Siavash Sameni
2026-05-26 11:28:17 +04:00
parent 9a7745978b
commit 112472609e
8 changed files with 574 additions and 85 deletions

View File

@@ -243,6 +243,7 @@ struct VideoStreamProbe {
last_frame: Option<Instant>, last_frame: Option<Instant>,
reassembler: VideoReassembler, reassembler: VideoReassembler,
decoder: Option<Box<dyn VideoDecoder>>, decoder: Option<Box<dyn VideoDecoder>>,
decoder_key: Option<(CodecId, u32, u32)>,
decode_ok: u64, decode_ok: u64,
decode_pending: u64, decode_pending: u64,
decode_err: u64, decode_err: u64,
@@ -275,6 +276,7 @@ impl VideoStreamProbe {
last_frame: None, last_frame: None,
reassembler: VideoReassembler::new(), reassembler: VideoReassembler::new(),
decoder, decoder,
decoder_key: decode.then_some((codec, 1280, 720)),
decode_ok: 0, decode_ok: 0,
decode_pending: 0, decode_pending: 0,
decode_err: 0, decode_err: 0,
@@ -294,6 +296,7 @@ impl VideoStreamProbe {
.is_some() .is_some()
.then(|| create_video_decoder(self.codec, 1280, 720).ok()) .then(|| create_video_decoder(self.codec, 1280, 720).ok())
.flatten(); .flatten();
self.decoder_key = self.decoder.as_ref().map(|_| (self.codec, 1280, 720));
} }
if self.seq_initialized { if self.seq_initialized {
let expected = self.last_seq.wrapping_add(1); let expected = self.last_seq.wrapping_add(1);
@@ -305,19 +308,26 @@ impl VideoStreamProbe {
self.last_seq = pkt.header.seq; self.last_seq = pkt.header.seq;
self.seq_initialized = true; self.seq_initialized = true;
if let Some((codec, keyframe, frame)) = self.reassembler.push(pkt) { if let Some(frame) = self.reassembler.push(pkt) {
self.frames += 1; self.frames += 1;
self.bytes += frame.len() as u64; self.bytes += frame.data.len() as u64;
self.max_frame_bytes = self.max_frame_bytes.max(frame.len()); self.max_frame_bytes = self.max_frame_bytes.max(frame.data.len());
self.last_frame = Some(now); self.last_frame = Some(now);
if keyframe { if frame.is_keyframe {
self.keyframes += 1; self.keyframes += 1;
} }
if codec != self.codec { if frame.codec_id != self.codec {
self.codec = codec; self.codec = frame.codec_id;
}
let frame_width = frame.width.unwrap_or(1280) as u32;
let frame_height = frame.height.unwrap_or(720) as u32;
let decoder_key = (self.codec, frame_width, frame_height);
if self.decoder.is_some() && self.decoder_key != Some(decoder_key) {
self.decoder = create_video_decoder(self.codec, frame_width, frame_height).ok();
self.decoder_key = self.decoder.as_ref().map(|_| decoder_key);
} }
if let Some(decoder) = self.decoder.as_mut() { if let Some(decoder) = self.decoder.as_mut() {
match decoder.decode(&frame) { match decoder.decode(&frame.data) {
Ok(Some(decoded)) => { Ok(Some(decoded)) => {
self.decode_ok += 1; self.decode_ok += 1;
self.last_decode_debug = decoder.debug_snapshot(); self.last_decode_debug = decoder.debug_snapshot();

View File

@@ -49,6 +49,11 @@ pub trait VideoEncoder: Send {
/// ///
/// Default implementation is a no-op. /// Default implementation is a no-op.
fn set_mode(&mut self, _mode: crate::EncoderMode) {} fn set_mode(&mut self, _mode: crate::EncoderMode) {}
/// Optional platform-specific encoder state for debug logs.
fn debug_snapshot(&self) -> Option<String> {
None
}
} }
/// Raw video frame input for encoding. /// Raw video frame input for encoding.

View File

@@ -27,6 +27,8 @@ pub struct MediaCodecEncoder {
width: u32, width: u32,
#[cfg(target_os = "android")] #[cfg(target_os = "android")]
height: u32, height: u32,
#[cfg(target_os = "android")]
input_format_logged: bool,
force_keyframe: bool, force_keyframe: bool,
#[cfg(not(target_os = "android"))] #[cfg(not(target_os = "android"))]
_width: u32, _width: u32,
@@ -86,6 +88,7 @@ impl MediaCodecEncoder {
codec, codec,
width, width,
height, height,
input_format_logged: false,
force_keyframe: false, force_keyframe: false,
}) })
} }
@@ -123,8 +126,34 @@ impl VideoEncoder for MediaCodecEncoder {
if self.force_keyframe { if self.force_keyframe {
self.request_sync_frame(); self.request_sync_frame();
} }
let input = let layout = encoder_input_layout(&self.codec, self.width, self.height);
i420_to_nv12(&frame.data, self.width as usize, self.height as usize)?; if !self.input_format_logged {
self.input_format_logged = true;
log_media_codec_input_format("h264_encoder_input", &self.codec, &layout);
}
let input_capacity = { buffer.buffer_mut().len() };
let mut input = i420_to_padded_nv12(
&frame.data,
self.width as usize,
self.height as usize,
layout.stride,
layout.slice_height,
)?;
if input.len() > input_capacity {
tracing::warn!(
target: "wzp_video::mediacodec",
padded_len = input.len(),
input_capacity,
"MediaCodec H.264 input buffer smaller than padded layout; falling back to tight NV12"
);
input = i420_to_padded_nv12(
&frame.data,
self.width as usize,
self.height as usize,
self.width as usize,
self.height as usize,
)?;
}
let to_copy = { let to_copy = {
let buf = buffer.buffer_mut(); let buf = buffer.buffer_mut();
let n = input.len().min(buf.len()); let n = input.len().min(buf.len());
@@ -398,6 +427,8 @@ pub struct MediaCodecHevcEncoder {
width: u32, width: u32,
#[cfg(target_os = "android")] #[cfg(target_os = "android")]
height: u32, height: u32,
#[cfg(target_os = "android")]
input_format_logged: bool,
force_keyframe: bool, force_keyframe: bool,
#[cfg(not(target_os = "android"))] #[cfg(not(target_os = "android"))]
_width: u32, _width: u32,
@@ -439,6 +470,7 @@ impl MediaCodecHevcEncoder {
codec, codec,
width, width,
height, height,
input_format_logged: false,
force_keyframe: false, force_keyframe: false,
}) })
} }
@@ -476,10 +508,38 @@ impl VideoEncoder for MediaCodecHevcEncoder {
} else { } else {
0 0
}; };
let layout = encoder_input_layout(&self.codec, self.width, self.height);
if !self.input_format_logged {
self.input_format_logged = true;
log_media_codec_input_format("hevc_encoder_input", &self.codec, &layout);
}
let input_capacity = { buffer.buffer_mut().len() };
let mut input = i420_to_padded_planar(
&frame.data,
self.width as usize,
self.height as usize,
layout.stride,
layout.slice_height,
)?;
if input.len() > input_capacity {
tracing::warn!(
target: "wzp_video::mediacodec",
padded_len = input.len(),
input_capacity,
"MediaCodec HEVC input buffer smaller than padded layout; falling back to tight I420"
);
input = i420_to_padded_planar(
&frame.data,
self.width as usize,
self.height as usize,
self.width as usize,
self.height as usize,
)?;
}
let to_copy = { let to_copy = {
let buf = buffer.buffer_mut(); let buf = buffer.buffer_mut();
let n = frame.data.len().min(buf.len()); let n = input.len().min(buf.len());
for (d, &s) in buf[..n].iter_mut().zip(frame.data[..n].iter()) { for (d, &s) in buf[..n].iter_mut().zip(input[..n].iter()) {
d.write(s); d.write(s);
} }
n n
@@ -1153,6 +1213,46 @@ fn positive_format_usize(format: &MediaFormat, key: &str) -> Option<usize> {
(value > 0).then_some(value as usize) (value > 0).then_some(value as usize)
} }
#[cfg(target_os = "android")]
#[derive(Clone, Copy, Debug)]
struct EncoderInputLayout {
stride: usize,
slice_height: usize,
}
#[cfg(target_os = "android")]
fn encoder_input_layout(codec: &MediaCodec, width: u32, height: u32) -> EncoderInputLayout {
// ndk 0.9 exposes AMediaCodec_getInputFormat only behind API 28, while
// this app still targets API 26. Keep encoder input tight until we can
// query the actual input format. Guessing padded rows here is dangerous:
// when the encoder actually reads tight input, padding bytes become pixels
// from the next row and show up as diagonal green bands.
let _ = codec;
let width = width as usize;
let height = height as usize;
EncoderInputLayout {
stride: width,
slice_height: height,
}
}
#[cfg(target_os = "android")]
fn log_media_codec_input_format(label: &str, codec: &MediaCodec, layout: &EncoderInputLayout) {
let format = codec.output_format();
tracing::info!(
target: "wzp_video::mediacodec",
label,
color_format = format.i32("color-format"),
width = format.i32("width"),
height = format.i32("height"),
stride = format.i32("stride"),
slice_height = format.i32("slice-height"),
effective_stride = layout.stride,
effective_slice_height = layout.slice_height,
"MediaCodec input format"
);
}
#[cfg(target_os = "android")] #[cfg(target_os = "android")]
fn log_media_codec_format(label: &str, codec: &MediaCodec) { fn log_media_codec_format(label: &str, codec: &MediaCodec) {
let format = codec.output_format(); let format = codec.output_format();
@@ -1183,7 +1283,13 @@ fn i420_len(width: usize, height: usize) -> Result<usize, VideoError> {
} }
#[cfg(target_os = "android")] #[cfg(target_os = "android")]
fn i420_to_nv12(src: &[u8], width: usize, height: usize) -> Result<Vec<u8>, VideoError> { fn i420_to_padded_nv12(
src: &[u8],
width: usize,
height: usize,
stride: usize,
slice_height: usize,
) -> Result<Vec<u8>, VideoError> {
let y_size = width.checked_mul(height).ok_or_else(|| { let y_size = width.checked_mul(height).ok_or_else(|| {
VideoError::InvalidInput(format!("invalid frame dimensions {width}x{height}")) VideoError::InvalidInput(format!("invalid frame dimensions {width}x{height}"))
})?; })?;
@@ -1196,14 +1302,124 @@ fn i420_to_nv12(src: &[u8], width: usize, height: usize) -> Result<Vec<u8>, Vide
))); )));
} }
let mut out = vec![0u8; expected]; if stride < width || slice_height < height {
out[..y_size].copy_from_slice(&src[..y_size]); return Err(VideoError::InvalidInput(format!(
"invalid encoder input layout {stride}x{slice_height} for {width}x{height}"
)));
}
let chroma_width = width / 2;
let chroma_height = height / 2;
let y_stride = stride;
let uv_stride = stride;
let y_slice_height = slice_height;
let uv_slice_height = (slice_height / 2).max(chroma_height);
let y_padded_size = y_stride.checked_mul(y_slice_height).ok_or_else(|| {
VideoError::InvalidInput(format!(
"invalid padded Y layout {y_stride}x{y_slice_height}"
))
})?;
let uv_padded_size = uv_stride.checked_mul(uv_slice_height).ok_or_else(|| {
VideoError::InvalidInput(format!(
"invalid padded UV layout {uv_stride}x{uv_slice_height}"
))
})?;
let total = y_padded_size
.checked_add(uv_padded_size)
.ok_or_else(|| VideoError::InvalidInput("padded NV12 size overflow".into()))?;
let mut out = vec![0u8; total];
out[y_padded_size..].fill(128);
for row in 0..height {
let src_off = row * width;
let dst_off = row * y_stride;
out[dst_off..dst_off + width].copy_from_slice(&src[src_off..src_off + width]);
}
let u = &src[y_size..y_size + uv_size]; let u = &src[y_size..y_size + uv_size];
let v = &src[y_size + uv_size..y_size + uv_size * 2]; let v = &src[y_size + uv_size..y_size + uv_size * 2];
for i in 0..uv_size { for row in 0..chroma_height {
out[y_size + i * 2] = u[i]; let src_row = row * chroma_width;
out[y_size + i * 2 + 1] = v[i]; let dst_row = y_padded_size + row * uv_stride;
for col in 0..chroma_width {
out[dst_row + col * 2] = u[src_row + col];
out[dst_row + col * 2 + 1] = v[src_row + col];
} }
}
Ok(out)
}
#[cfg(target_os = "android")]
fn i420_to_padded_planar(
src: &[u8],
width: usize,
height: usize,
stride: usize,
slice_height: usize,
) -> Result<Vec<u8>, VideoError> {
let y_size = width.checked_mul(height).ok_or_else(|| {
VideoError::InvalidInput(format!("invalid frame dimensions {width}x{height}"))
})?;
let uv_size = y_size / 4;
let expected = y_size + uv_size * 2;
if src.len() < expected {
return Err(VideoError::InvalidInput(format!(
"I420 frame too small for padded planar copy: {} bytes, expected {expected}",
src.len()
)));
}
if stride < width || slice_height < height {
return Err(VideoError::InvalidInput(format!(
"invalid encoder input layout {stride}x{slice_height} for {width}x{height}"
)));
}
let chroma_width = width / 2;
let chroma_height = height / 2;
let y_stride = stride;
let chroma_stride = (stride / 2).max(chroma_width);
let y_slice_height = slice_height;
let chroma_slice_height = (slice_height / 2).max(chroma_height);
let y_padded_size = y_stride.checked_mul(y_slice_height).ok_or_else(|| {
VideoError::InvalidInput(format!(
"invalid padded Y layout {y_stride}x{y_slice_height}"
))
})?;
let chroma_padded_size = chroma_stride
.checked_mul(chroma_slice_height)
.ok_or_else(|| {
VideoError::InvalidInput(format!(
"invalid padded chroma layout {chroma_stride}x{chroma_slice_height}"
))
})?;
let chroma_total = chroma_padded_size
.checked_mul(2)
.ok_or_else(|| VideoError::InvalidInput("padded I420 chroma size overflow".into()))?;
let total = y_padded_size
.checked_add(chroma_total)
.ok_or_else(|| VideoError::InvalidInput("padded I420 size overflow".into()))?;
let mut out = vec![0u8; total];
out[y_padded_size..].fill(128);
for row in 0..height {
let src_off = row * width;
let dst_off = row * y_stride;
out[dst_off..dst_off + width].copy_from_slice(&src[src_off..src_off + width]);
}
let src_u = y_size;
let src_v = y_size + uv_size;
let dst_u = y_padded_size;
let dst_v = y_padded_size + chroma_padded_size;
for row in 0..chroma_height {
let src_off = row * chroma_width;
let dst_off = row * chroma_stride;
out[dst_u + dst_off..dst_u + dst_off + chroma_width]
.copy_from_slice(&src[src_u + src_off..src_u + src_off + chroma_width]);
out[dst_v + dst_off..dst_v + dst_off + chroma_width]
.copy_from_slice(&src[src_v + src_off..src_v + src_off + chroma_width]);
}
Ok(out) Ok(out)
} }
@@ -1410,7 +1626,7 @@ fn split_annex_b(data: &[u8]) -> Vec<&[u8]> {
/// Android MediaCodec `csd-0`. /// Android MediaCodec `csd-0`.
#[allow(dead_code)] #[allow(dead_code)]
fn extract_sequence_header_obu(data: &[u8]) -> Option<Vec<u8>> { fn extract_sequence_header_obu(data: &[u8]) -> Option<Vec<u8>> {
use crate::av1_obu::{ObuHeader, read_leb128}; use crate::av1_obu::{read_leb128, ObuHeader};
let mut i = 0usize; let mut i = 0usize;
while i < data.len() { while i < data.len() {
let header = ObuHeader::from_byte(data[i]); let header = ObuHeader::from_byte(data[i]);

View File

@@ -28,6 +28,24 @@ use wzp_proto::{CodecId, MediaHeaderV2, MediaPacket, MediaType};
/// 1200 (QUIC MTU) 16 (MediaHeaderV2) 16 (AEAD tag) = 1168. /// 1200 (QUIC MTU) 16 (MediaHeaderV2) 16 (AEAD tag) = 1168.
pub const VIDEO_MAX_PAYLOAD: usize = 1168; pub const VIDEO_MAX_PAYLOAD: usize = 1168;
const VIDEO_FRAME_META_MAGIC: [u8; 4] = *b"WZV1";
const VIDEO_FRAME_META_LEN: usize = 8;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct VideoFrameMeta {
pub width: u16,
pub height: u16,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ReassembledVideoFrame {
pub codec_id: CodecId,
pub is_keyframe: bool,
pub width: Option<u16>,
pub height: Option<u16>,
pub data: Vec<u8>,
}
/// Fragments one encoded video frame into a sequence of [`MediaPacket`]s. /// Fragments one encoded video frame into a sequence of [`MediaPacket`]s.
/// ///
/// Pass each `MediaPacket` to `transport.send_media()`. /// Pass each `MediaPacket` to `transport.send_media()`.
@@ -37,12 +55,20 @@ pub fn packetize_video_frame(
is_keyframe: bool, is_keyframe: bool,
seq: &mut u32, seq: &mut u32,
timestamp_ms: u32, timestamp_ms: u32,
width: u32,
height: u32,
) -> Vec<MediaPacket> { ) -> Vec<MediaPacket> {
if frame.is_empty() { if frame.is_empty() {
return vec![]; return vec![];
} }
let chunks: Vec<&[u8]> = frame.chunks(VIDEO_MAX_PAYLOAD).collect(); let mut framed = Vec::with_capacity(VIDEO_FRAME_META_LEN + frame.len());
framed.extend_from_slice(&VIDEO_FRAME_META_MAGIC);
framed.extend_from_slice(&(width.min(u16::MAX as u32) as u16).to_be_bytes());
framed.extend_from_slice(&(height.min(u16::MAX as u32) as u16).to_be_bytes());
framed.extend_from_slice(frame);
let chunks: Vec<&[u8]> = framed.chunks(VIDEO_MAX_PAYLOAD).collect();
let total = chunks.len().min(255); let total = chunks.len().min(255);
let mut packets = Vec::with_capacity(total); let mut packets = Vec::with_capacity(total);
@@ -118,9 +144,8 @@ impl VideoReassembler {
/// Push one received video packet. /// Push one received video packet.
/// ///
/// Returns `Some((codec_id, is_keyframe, frame_bytes))` when a complete /// Returns `Some(frame)` when a complete frame is ready, `None` otherwise.
/// frame is ready, `None` otherwise. pub fn push(&mut self, pkt: &MediaPacket) -> Option<ReassembledVideoFrame> {
pub fn push(&mut self, pkt: &MediaPacket) -> Option<(CodecId, bool, Vec<u8>)> {
let hdr = &pkt.header; let hdr = &pkt.header;
let fragment_index = (hdr.fec_block >> 8) as u8; let fragment_index = (hdr.fec_block >> 8) as u8;
let fragment_count = (hdr.fec_block & 0xFF) as u8; let fragment_count = (hdr.fec_block & 0xFF) as u8;
@@ -161,7 +186,14 @@ impl VideoReassembler {
for i in 0..total as u8 { for i in 0..total as u8 {
frame.extend_from_slice(pending.fragments.get(&i)?); frame.extend_from_slice(pending.fragments.get(&i)?);
} }
Some((codec_id, pending.is_keyframe, frame)) let (meta, data) = split_video_frame_payload(frame);
Some(ReassembledVideoFrame {
codec_id,
is_keyframe: pending.is_keyframe,
width: meta.map(|m| m.width),
height: meta.map(|m| m.height),
data,
})
} }
/// Evict stale pending frames older than `max_age_ms` milliseconds. /// Evict stale pending frames older than `max_age_ms` milliseconds.
@@ -169,12 +201,22 @@ impl VideoReassembler {
/// Call periodically (e.g. every 2s) to prevent accumulation of frames /// Call periodically (e.g. every 2s) to prevent accumulation of frames
/// whose first or middle fragments were lost. /// whose first or middle fragments were lost.
pub fn evict_stale(&mut self, current_timestamp_ms: u32, max_age_ms: u32) { pub fn evict_stale(&mut self, current_timestamp_ms: u32, max_age_ms: u32) {
self.pending.retain(|&ts, _| { self.pending
current_timestamp_ms.wrapping_sub(ts) <= max_age_ms .retain(|&ts, _| current_timestamp_ms.wrapping_sub(ts) <= max_age_ms);
});
} }
} }
fn split_video_frame_payload(mut frame: Vec<u8>) -> (Option<VideoFrameMeta>, Vec<u8>) {
if frame.len() < VIDEO_FRAME_META_LEN || frame[..4] != VIDEO_FRAME_META_MAGIC {
return (None, frame);
}
let width = u16::from_be_bytes([frame[4], frame[5]]);
let height = u16::from_be_bytes([frame[6], frame[7]]);
frame.drain(..VIDEO_FRAME_META_LEN);
(Some(VideoFrameMeta { width, height }), frame)
}
impl Default for VideoReassembler { impl Default for VideoReassembler {
fn default() -> Self { fn default() -> Self {
Self::new() Self::new()
@@ -193,7 +235,7 @@ mod tests {
fn single_fragment_roundtrip() { fn single_fragment_roundtrip() {
let frame = make_frame(100); let frame = make_frame(100);
let mut seq = 0u32; let mut seq = 0u32;
let pkts = packetize_video_frame(&frame, CodecId::Av1Main, true, &mut seq, 1000); let pkts = packetize_video_frame(&frame, CodecId::Av1Main, true, &mut seq, 1000, 640, 480);
assert_eq!(pkts.len(), 1); assert_eq!(pkts.len(), 1);
assert!(pkts[0].header.is_keyframe()); assert!(pkts[0].header.is_keyframe());
assert!(pkts[0].header.is_frame_end()); assert!(pkts[0].header.is_frame_end());
@@ -203,17 +245,27 @@ mod tests {
let mut reassembler = VideoReassembler::new(); let mut reassembler = VideoReassembler::new();
let result = reassembler.push(&pkts[0]); let result = reassembler.push(&pkts[0]);
assert!(result.is_some()); assert!(result.is_some());
let (codec, is_kf, data) = result.unwrap(); let result = result.unwrap();
assert_eq!(codec, CodecId::Av1Main); assert_eq!(result.codec_id, CodecId::Av1Main);
assert!(is_kf); assert!(result.is_keyframe);
assert_eq!(data, frame); assert_eq!(result.width, Some(640));
assert_eq!(result.height, Some(480));
assert_eq!(result.data, frame);
} }
#[test] #[test]
fn multi_fragment_roundtrip() { fn multi_fragment_roundtrip() {
let frame = make_frame(VIDEO_MAX_PAYLOAD * 3 + 50); let frame = make_frame(VIDEO_MAX_PAYLOAD * 3 + 50);
let mut seq = 0u32; let mut seq = 0u32;
let pkts = packetize_video_frame(&frame, CodecId::H264Baseline, false, &mut seq, 2000); let pkts = packetize_video_frame(
&frame,
CodecId::H264Baseline,
false,
&mut seq,
2000,
960,
540,
);
assert_eq!(pkts.len(), 4); assert_eq!(pkts.len(), 4);
assert!(!pkts[0].header.is_frame_end()); assert!(!pkts[0].header.is_frame_end());
assert!(pkts[3].header.is_frame_end()); assert!(pkts[3].header.is_frame_end());
@@ -224,33 +276,66 @@ mod tests {
for pkt in &pkts { for pkt in &pkts {
result = reassembler.push(pkt); result = reassembler.push(pkt);
} }
let (codec, is_kf, data) = result.unwrap(); let result = result.unwrap();
assert_eq!(codec, CodecId::H264Baseline); assert_eq!(result.codec_id, CodecId::H264Baseline);
assert!(!is_kf); assert!(!result.is_keyframe);
assert_eq!(data, frame); assert_eq!(result.width, Some(960));
assert_eq!(result.height, Some(540));
assert_eq!(result.data, frame);
} }
#[test] #[test]
fn out_of_order_delivery() { fn out_of_order_delivery() {
let frame = make_frame(VIDEO_MAX_PAYLOAD * 2 + 100); let frame = make_frame(VIDEO_MAX_PAYLOAD * 2 + 100);
let mut seq = 0u32; let mut seq = 0u32;
let pkts = packetize_video_frame(&frame, CodecId::Av1Main, false, &mut seq, 3000); let pkts = packetize_video_frame(&frame, CodecId::Av1Main, false, &mut seq, 3000, 320, 240);
assert_eq!(pkts.len(), 3); assert_eq!(pkts.len(), 3);
let mut reassembler = VideoReassembler::new(); let mut reassembler = VideoReassembler::new();
// Deliver out of order: 2, 0, 1 // Deliver out of order: 2, 0, 1
assert!(reassembler.push(&pkts[2]).is_none()); // last arrives first — no total_fragments yet assert!(reassembler.push(&pkts[2]).is_none()); // last arrives first — no total_fragments yet
assert!(reassembler.push(&pkts[0]).is_none()); assert!(reassembler.push(&pkts[0]).is_none());
let result = reassembler.push(&pkts[1]).expect("last missing fragment completes frame"); let result = reassembler
assert_eq!(result.0, CodecId::Av1Main); .push(&pkts[1])
assert!(!result.1); .expect("last missing fragment completes frame");
assert_eq!(result.2, frame); assert_eq!(result.codec_id, CodecId::Av1Main);
assert!(!result.is_keyframe);
assert_eq!(result.width, Some(320));
assert_eq!(result.height, Some(240));
assert_eq!(result.data, frame);
} }
#[test] #[test]
fn empty_frame_produces_no_packets() { fn empty_frame_produces_no_packets() {
let mut seq = 0u32; let mut seq = 0u32;
let pkts = packetize_video_frame(&[], CodecId::Av1Main, false, &mut seq, 0); let pkts = packetize_video_frame(&[], CodecId::Av1Main, false, &mut seq, 0, 640, 480);
assert!(pkts.is_empty()); assert!(pkts.is_empty());
} }
#[test]
fn old_payload_without_meta_still_reassembles() {
let payload = Bytes::copy_from_slice(&[0x00, 0x00, 0x00, 0x01, 0x65]);
let pkt = MediaPacket {
header: MediaHeaderV2 {
version: MediaHeaderV2::VERSION,
flags: MediaHeaderV2::FLAG_KEYFRAME | MediaHeaderV2::FLAG_FRAME_END,
media_type: MediaType::Video,
codec_id: CodecId::H264Baseline,
stream_id: 0,
fec_ratio: 0,
seq: 7,
timestamp: 123,
fec_block: 1,
},
payload: payload.clone(),
quality_report: None,
};
let mut reassembler = VideoReassembler::new();
let frame = reassembler.push(&pkt).unwrap();
assert_eq!(frame.codec_id, CodecId::H264Baseline);
assert_eq!(frame.width, None);
assert_eq!(frame.height, None);
assert_eq!(frame.data, payload.to_vec());
}
} }

View File

@@ -16,9 +16,9 @@
use std::sync::Mutex; use std::sync::Mutex;
use wzp_proto::CodecId; use wzp_proto::CodecId;
use wzp_video::{ use wzp_video::{
VideoFrame,
factory::{create_video_decoder, create_video_encoder}, factory::{create_video_decoder, create_video_encoder},
transport::{VideoReassembler, packetize_video_frame}, transport::{packetize_video_frame, VideoReassembler},
VideoFrame,
}; };
/// VideoToolbox has global session registry state — serialise integration tests /// VideoToolbox has global session registry state — serialise integration tests
@@ -42,7 +42,12 @@ fn synthetic_i420(width: u32, height: u32, frame_idx: u32) -> VideoFrame {
data[y_size..y_size + uv_size].fill(128); data[y_size..y_size + uv_size].fill(128);
data[y_size + uv_size..].fill(128); data[y_size + uv_size..].fill(128);
VideoFrame { width, height, data, timestamp_ms: frame_idx as u64 * 33 } VideoFrame {
width,
height,
data,
timestamp_ms: frame_idx as u64 * 33,
}
} }
// ── tests ───────────────────────────────────────────────────────────────────── // ── tests ─────────────────────────────────────────────────────────────────────
@@ -53,10 +58,10 @@ fn h264_pipeline_roundtrip() {
let _g = VT_LOCK.lock().unwrap(); let _g = VT_LOCK.lock().unwrap();
let (w, h) = (640, 360); let (w, h) = (640, 360);
let mut encoder = create_video_encoder(CodecId::H264Baseline, w, h, 1_500_000) let mut encoder =
.expect("H264Baseline encoder"); create_video_encoder(CodecId::H264Baseline, w, h, 1_500_000).expect("H264Baseline encoder");
let mut decoder = create_video_decoder(CodecId::H264Baseline, w, h) let mut decoder =
.expect("H264Baseline decoder"); create_video_decoder(CodecId::H264Baseline, w, h).expect("H264Baseline decoder");
let mut seq = 0u32; let mut seq = 0u32;
let mut decoded_count = 0usize; let mut decoded_count = 0usize;
@@ -71,32 +76,60 @@ fn h264_pipeline_roundtrip() {
} }
let is_keyframe = encoder.is_keyframe(&encoded); let is_keyframe = encoder.is_keyframe(&encoded);
let pkts = packetize_video_frame(&encoded, CodecId::H264Baseline, is_keyframe, &mut seq, i * 33); let pkts = packetize_video_frame(
assert!(!pkts.is_empty(), "packetize must produce at least one packet"); &encoded,
CodecId::H264Baseline,
is_keyframe,
&mut seq,
i * 33,
w,
h,
);
assert!(
!pkts.is_empty(),
"packetize must produce at least one packet"
);
// All fragments for this frame share the same timestamp. // All fragments for this frame share the same timestamp.
let ts = pkts[0].header.timestamp; let ts = pkts[0].header.timestamp;
let total_frags = pkts.len(); let total_frags = pkts.len();
for (idx, pkt) in pkts.iter().enumerate() { for (idx, pkt) in pkts.iter().enumerate() {
assert_eq!(pkt.header.timestamp, ts, "all fragments of one frame share timestamp"); assert_eq!(
pkt.header.timestamp, ts,
"all fragments of one frame share timestamp"
);
let frag_idx = (pkt.header.fec_block >> 8) as usize; let frag_idx = (pkt.header.fec_block >> 8) as usize;
let frag_total = (pkt.header.fec_block & 0xFF) as usize; let frag_total = (pkt.header.fec_block & 0xFF) as usize;
assert_eq!(frag_idx, idx, "fragment index must match packet position"); assert_eq!(frag_idx, idx, "fragment index must match packet position");
assert_eq!(frag_total, total_frags, "all fragments carry the correct total count"); assert_eq!(
frag_total, total_frags,
"all fragments carry the correct total count"
);
} }
assert!(pkts.last().unwrap().header.is_frame_end(), "last packet must have FLAG_FRAME_END"); assert!(
pkts.last().unwrap().header.is_frame_end(),
"last packet must have FLAG_FRAME_END"
);
// Push through reassembler — only the last packet should yield a frame. // Push through reassembler — only the last packet should yield a frame.
let mut reassembler = VideoReassembler::new(); let mut reassembler = VideoReassembler::new();
for (j, pkt) in pkts.iter().enumerate() { for (j, pkt) in pkts.iter().enumerate() {
let result = reassembler.push(pkt); let result = reassembler.push(pkt);
if j + 1 < pkts.len() { if j + 1 < pkts.len() {
assert!(result.is_none(), "intermediate fragments must not yield a complete frame"); assert!(
result.is_none(),
"intermediate fragments must not yield a complete frame"
);
} else { } else {
let (codec, kf, data) = result.expect("last fragment must complete the frame"); let frame = result.expect("last fragment must complete the frame");
assert_eq!(codec, CodecId::H264Baseline); assert_eq!(frame.codec_id, CodecId::H264Baseline);
assert_eq!(kf, is_keyframe); assert_eq!(frame.is_keyframe, is_keyframe);
assert_eq!(data, encoded, "reassembled bytes must match original encoded bytes"); assert_eq!(frame.width, Some(w as u16));
assert_eq!(frame.height, Some(h as u16));
assert_eq!(
frame.data, encoded,
"reassembled bytes must match original encoded bytes"
);
} }
} }
@@ -118,7 +151,10 @@ fn h264_pipeline_roundtrip() {
} }
} }
assert!(decoded_count > 0, "at least one frame must have been decoded"); assert!(
decoded_count > 0,
"at least one frame must have been decoded"
);
} }
/// Fragmentation: a frame larger than VIDEO_MAX_PAYLOAD splits into multiple packets, /// Fragmentation: a frame larger than VIDEO_MAX_PAYLOAD splits into multiple packets,
@@ -134,13 +170,28 @@ fn large_frame_fragments_and_reassembles() {
let mut seq = 0u32; let mut seq = 0u32;
let pkts = packetize_video_frame( let pkts = packetize_video_frame(
&synthetic_encoded, CodecId::H264Baseline, true, &mut seq, 9000, &synthetic_encoded,
CodecId::H264Baseline,
true,
&mut seq,
9000,
1280,
720,
); );
assert!(pkts.len() >= 4, "large frame must produce ≥4 fragments"); assert!(pkts.len() >= 4, "large frame must produce ≥4 fragments");
assert!(pkts[0].header.is_keyframe(), "keyframe flag propagates to all fragments"); assert!(
assert!(!pkts[0].header.is_frame_end(), "first packet is not frame end"); pkts[0].header.is_keyframe(),
assert!(pkts.last().unwrap().header.is_frame_end(), "last packet is frame end"); "keyframe flag propagates to all fragments"
);
assert!(
!pkts[0].header.is_frame_end(),
"first packet is not frame end"
);
assert!(
pkts.last().unwrap().header.is_frame_end(),
"last packet is frame end"
);
let mut reassembler = VideoReassembler::new(); let mut reassembler = VideoReassembler::new();
let mut result = None; let mut result = None;
@@ -148,8 +199,13 @@ fn large_frame_fragments_and_reassembles() {
result = reassembler.push(pkt); result = reassembler.push(pkt);
} }
let (_, _, data) = result.expect("all fragments delivered → complete frame"); let frame = result.expect("all fragments delivered → complete frame");
assert_eq!(data, synthetic_encoded, "reassembled bytes must match input exactly"); assert_eq!(frame.width, Some(1280));
assert_eq!(frame.height, Some(720));
assert_eq!(
frame.data, synthetic_encoded,
"reassembled bytes must match input exactly"
);
} }
/// Packet loss: if the first fragment is missing, reassembly cannot complete. /// Packet loss: if the first fragment is missing, reassembly cannot complete.
@@ -159,7 +215,7 @@ fn missing_fragment_blocks_reassembly() {
let frame: Vec<u8> = vec![0xAB; VIDEO_MAX_PAYLOAD * 2 + 50]; let frame: Vec<u8> = vec![0xAB; VIDEO_MAX_PAYLOAD * 2 + 50];
let mut seq = 0u32; let mut seq = 0u32;
let pkts = packetize_video_frame(&frame, CodecId::Av1Main, false, &mut seq, 1234); let pkts = packetize_video_frame(&frame, CodecId::Av1Main, false, &mut seq, 1234, 640, 480);
assert!(pkts.len() >= 3); assert!(pkts.len() >= 3);
let mut reassembler = VideoReassembler::new(); let mut reassembler = VideoReassembler::new();
@@ -193,7 +249,15 @@ fn evict_stale_removes_aged_frames() {
let frame: Vec<u8> = vec![0x55; VIDEO_MAX_PAYLOAD * 2]; let frame: Vec<u8> = vec![0x55; VIDEO_MAX_PAYLOAD * 2];
let mut seq = 0u32; let mut seq = 0u32;
let pkts = packetize_video_frame(&frame, CodecId::H264Baseline, false, &mut seq, 500); let pkts = packetize_video_frame(
&frame,
CodecId::H264Baseline,
false,
&mut seq,
500,
640,
480,
);
let mut reassembler = VideoReassembler::new(); let mut reassembler = VideoReassembler::new();
// Push only first packet — frame is incomplete. // Push only first packet — frame is incomplete.

View File

@@ -1385,7 +1385,7 @@ impl CallEngine {
// Video pipeline state — mirror of the desktop recv task. // Video pipeline state — mirror of the desktop recv task.
let mut video_reassembler = wzp_video::transport::VideoReassembler::new(); let mut video_reassembler = wzp_video::transport::VideoReassembler::new();
let mut video_decoder: Option<Box<dyn wzp_video::decoder::VideoDecoder>> = None; let mut video_decoder: Option<Box<dyn wzp_video::decoder::VideoDecoder>> = None;
let mut video_decoder_codec: Option<wzp_proto::CodecId> = None; let mut video_decoder_key: Option<(wzp_proto::CodecId, u32, u32)> = None;
let mut video_first_recv_logged = false; let mut video_first_recv_logged = false;
let mut video_first_reassembled_logged = false; let mut video_first_reassembled_logged = false;
let mut video_reassembled_samples: u64 = 0; let mut video_reassembled_samples: u64 = 0;
@@ -1466,7 +1466,14 @@ impl CallEngine {
}), }),
); );
} }
if let Some((codec_id, is_kf, frame)) = video_reassembler.push(&pkt) { if let Some(reassembled) = video_reassembler.push(&pkt) {
let codec_id = reassembled.codec_id;
let is_kf = reassembled.is_keyframe;
let frame_width =
reassembled.width.unwrap_or(video_width as u16) as u32;
let frame_height =
reassembled.height.unwrap_or(video_height as u16) as u32;
let frame = reassembled.data;
video_reassembled_samples += 1; video_reassembled_samples += 1;
if !video_first_reassembled_logged { if !video_first_reassembled_logged {
video_first_reassembled_logged = true; video_first_reassembled_logged = true;
@@ -1484,6 +1491,14 @@ impl CallEngine {
); );
} }
if should_log_video_sample(video_reassembled_samples, is_kf) { if should_log_video_sample(video_reassembled_samples, is_kf) {
crate::maybe_dump_video_bytes(
&recv_app,
"remote_encoded_reassembled",
"android",
video_reassembled_samples,
&frame,
codec_id,
);
crate::emit_call_debug( crate::emit_call_debug(
&recv_app, &recv_app,
"video:reassembled_frame", "video:reassembled_frame",
@@ -1517,22 +1532,23 @@ impl CallEngine {
video_reassembler.evict_stale(pkt.header.timestamp, 5_000); video_reassembler.evict_stale(pkt.header.timestamp, 5_000);
continue; continue;
} }
if video_decoder_codec != Some(codec_id) { let decoder_key = (codec_id, frame_width, frame_height);
if video_decoder_key != Some(decoder_key) {
crate::emit_call_debug( crate::emit_call_debug(
&recv_app, &recv_app,
"video:decoder_init_start", "video:decoder_init_start",
serde_json::json!({ serde_json::json!({
"t_ms": recv_t0.elapsed().as_millis() as u64, "t_ms": recv_t0.elapsed().as_millis() as u64,
"codec": format!("{:?}", codec_id), "codec": format!("{:?}", codec_id),
"width": video_width, "width": frame_width,
"height": video_height, "height": frame_height,
"platform": "android", "platform": "android",
}), }),
); );
match wzp_video::factory::create_video_decoder( match wzp_video::factory::create_video_decoder(
codec_id, codec_id,
video_width, frame_width,
video_height, frame_height,
) { ) {
Ok(d) => { Ok(d) => {
info!(codec = ?codec_id, "video decoder created (android)"); info!(codec = ?codec_id, "video decoder created (android)");
@@ -1546,7 +1562,7 @@ impl CallEngine {
}), }),
); );
video_decoder = Some(d); video_decoder = Some(d);
video_decoder_codec = Some(codec_id); video_decoder_key = Some(decoder_key);
} }
Err(e) => { Err(e) => {
error!("video decoder init failed: {e}"); error!("video decoder init failed: {e}");
@@ -2304,11 +2320,21 @@ impl CallEngine {
is_keyframe, is_keyframe,
&mut seq, &mut seq,
ts_ms, ts_ms,
frame.width,
frame.height,
); );
video_packets_total += pkts.len() as u64; video_packets_total += pkts.len() as u64;
video_bytes_total += encoded.len() as u64; video_bytes_total += encoded.len() as u64;
if encoded_frame_samples < 5 { if encoded_frame_samples < 5 {
encoded_frame_samples += 1; encoded_frame_samples += 1;
crate::maybe_dump_video_bytes(
&vid_app,
"local_encoded",
"android",
encoded_frame_samples,
&encoded,
vid_codec,
);
let packet_payload_bytes: usize = let packet_payload_bytes: usize =
pkts.iter().map(|pkt| pkt.payload.len()).sum(); pkts.iter().map(|pkt| pkt.payload.len()).sum();
crate::emit_call_debug( crate::emit_call_debug(
@@ -2870,7 +2896,7 @@ impl CallEngine {
let mut first_packet_logged = false; let mut first_packet_logged = false;
let mut video_reassembler = wzp_video::transport::VideoReassembler::new(); let mut video_reassembler = wzp_video::transport::VideoReassembler::new();
let mut video_decoder: Option<Box<dyn wzp_video::decoder::VideoDecoder>> = None; let mut video_decoder: Option<Box<dyn wzp_video::decoder::VideoDecoder>> = None;
let mut video_decoder_codec: Option<wzp_proto::CodecId> = None; let mut video_decoder_key: Option<(wzp_proto::CodecId, u32, u32)> = None;
let mut video_first_recv_logged_desktop = false; let mut video_first_recv_logged_desktop = false;
let mut video_first_reassembled_logged = false; let mut video_first_reassembled_logged = false;
let mut video_reassembled_samples: u64 = 0; let mut video_reassembled_samples: u64 = 0;
@@ -2956,7 +2982,14 @@ impl CallEngine {
}), }),
); );
} }
if let Some((codec_id, is_kf, frame)) = video_reassembler.push(&pkt) { if let Some(reassembled) = video_reassembler.push(&pkt) {
let codec_id = reassembled.codec_id;
let is_kf = reassembled.is_keyframe;
let frame_width =
reassembled.width.unwrap_or(video_width as u16) as u32;
let frame_height =
reassembled.height.unwrap_or(video_height as u16) as u32;
let frame = reassembled.data;
video_reassembled_samples += 1; video_reassembled_samples += 1;
if !video_first_reassembled_logged { if !video_first_reassembled_logged {
video_first_reassembled_logged = true; video_first_reassembled_logged = true;
@@ -2974,6 +3007,14 @@ impl CallEngine {
); );
} }
if should_log_video_sample(video_reassembled_samples, is_kf) { if should_log_video_sample(video_reassembled_samples, is_kf) {
crate::maybe_dump_video_bytes(
&recv_app,
"remote_encoded_reassembled",
"desktop",
video_reassembled_samples,
&frame,
codec_id,
);
crate::emit_call_debug( crate::emit_call_debug(
&recv_app, &recv_app,
"video:reassembled_frame", "video:reassembled_frame",
@@ -3008,22 +3049,23 @@ impl CallEngine {
continue; continue;
} }
// Lazy-init or switch decoder on codec change. // Lazy-init or switch decoder on codec change.
if video_decoder_codec != Some(codec_id) { let decoder_key = (codec_id, frame_width, frame_height);
if video_decoder_key != Some(decoder_key) {
crate::emit_call_debug( crate::emit_call_debug(
&recv_app, &recv_app,
"video:decoder_init_start", "video:decoder_init_start",
serde_json::json!({ serde_json::json!({
"t_ms": recv_t0.elapsed().as_millis() as u64, "t_ms": recv_t0.elapsed().as_millis() as u64,
"codec": format!("{:?}", codec_id), "codec": format!("{:?}", codec_id),
"width": video_width, "width": frame_width,
"height": video_height, "height": frame_height,
"platform": "desktop", "platform": "desktop",
}), }),
); );
match wzp_video::factory::create_video_decoder( match wzp_video::factory::create_video_decoder(
codec_id, codec_id,
video_width, frame_width,
video_height, frame_height,
) { ) {
Ok(d) => { Ok(d) => {
info!(codec = ?codec_id, "video decoder created"); info!(codec = ?codec_id, "video decoder created");
@@ -3037,7 +3079,7 @@ impl CallEngine {
}), }),
); );
video_decoder = Some(d); video_decoder = Some(d);
video_decoder_codec = Some(codec_id); video_decoder_key = Some(decoder_key);
} }
Err(e) => { Err(e) => {
error!("video decoder init failed: {e}"); error!("video decoder init failed: {e}");
@@ -3640,11 +3682,21 @@ impl CallEngine {
is_keyframe, is_keyframe,
&mut seq, &mut seq,
ts_ms, ts_ms,
frame.width,
frame.height,
); );
video_packets_total += pkts.len() as u64; video_packets_total += pkts.len() as u64;
video_bytes_total += encoded.len() as u64; video_bytes_total += encoded.len() as u64;
if encoded_frame_samples < 5 { if encoded_frame_samples < 5 {
encoded_frame_samples += 1; encoded_frame_samples += 1;
crate::maybe_dump_video_bytes(
&vid_app,
"local_encoded",
"desktop",
encoded_frame_samples,
&encoded,
vid_codec,
);
let packet_payload_bytes: usize = let packet_payload_bytes: usize =
pkts.iter().map(|pkt| pkt.payload.len()).sum(); pkts.iter().map(|pkt| pkt.payload.len()).sum();
crate::emit_call_debug( crate::emit_call_debug(

View File

@@ -205,6 +205,61 @@ pub(crate) fn maybe_dump_video_jpeg(
} }
} }
pub(crate) fn maybe_dump_video_bytes(
app: &tauri::AppHandle,
stage: &str,
platform: &str,
frame_no: u64,
bytes: &[u8],
codec: wzp_proto::CodecId,
) {
if !should_dump_frame(frame_no) || bytes.is_empty() {
return;
}
let ext = match codec {
wzp_proto::CodecId::H265Main => "h265",
wzp_proto::CodecId::Av1Main => "obu",
_ => "h264",
};
let seq = FRAME_DUMP_WRITES.fetch_add(1, Ordering::Relaxed) + 1;
let dir = identity_dir().join("frame-dumps");
let file_name = format!("{seq:06}_{platform}_{stage}_f{frame_no:06}.{ext}");
let path = dir.join(file_name);
let result = std::fs::create_dir_all(&dir).and_then(|_| std::fs::write(&path, bytes));
match result {
Ok(()) => emit_call_debug(
app,
"video:byte_dump",
serde_json::json!({
"stage": stage,
"platform": platform,
"frame_no": frame_no,
"codec": format!("{:?}", codec),
"bytes": bytes.len(),
"path": path,
}),
),
Err(e) => {
if seq <= 5 || seq % 30 == 0 {
emit_call_debug(
app,
"video:byte_dump_failed",
serde_json::json!({
"stage": stage,
"platform": platform,
"frame_no": frame_no,
"codec": format!("{:?}", codec),
"error": e.to_string(),
"path": path,
}),
);
}
}
}
}
/// RGB24 → I420 (planar 4:2:0). Layout: Y(w×h) | U(w/2×h/2) | V(w/2×h/2). /// RGB24 → I420 (planar 4:2:0). Layout: Y(w×h) | U(w/2×h/2) | V(w/2×h/2).
fn rgb_to_i420(rgb: &[u8], w: usize, h: usize) -> Vec<u8> { fn rgb_to_i420(rgb: &[u8], w: usize, h: usize) -> Vec<u8> {
let y_size = w * h; let y_size = w * h;

View File

@@ -607,7 +607,7 @@ function drawCameraFrameForSend() {
const vh = vdLocalVideo.videoHeight || camCaptureCanvas.height; const vh = vdLocalVideo.videoHeight || camCaptureCanvas.height;
if (!vw || !vh) return; if (!vw || !vh) return;
const scale = Math.max(cameraSendWidth / vw, cameraSendHeight / vh); const scale = Math.min(cameraSendWidth / vw, cameraSendHeight / vh);
const dw = vw * scale; const dw = vw * scale;
const dh = vh * scale; const dh = vh * scale;
const dx = (cameraSendWidth - dw) / 2; const dx = (cameraSendWidth - dw) / 2;
@@ -631,6 +631,8 @@ async function captureAndPushCameraFrame() {
frame_no: cameraCaptureFrameNo, frame_no: cameraCaptureFrameNo,
width: camCaptureCanvas.width, width: camCaptureCanvas.width,
height: camCaptureCanvas.height, height: camCaptureCanvas.height,
source_width: vdLocalVideo.videoWidth || null,
source_height: vdLocalVideo.videoHeight || null,
jpeg_b64_len: b64.length, jpeg_b64_len: b64.length,
capture_clock: getVideoFrameCallbackApi() ? "video_frame_callback" : "interval", capture_clock: getVideoFrameCallbackApi() ? "video_frame_callback" : "interval",
}); });