diff --git a/crates/wzp-client/src/analyzer.rs b/crates/wzp-client/src/analyzer.rs index fd1f4bd..cc9d206 100644 --- a/crates/wzp-client/src/analyzer.rs +++ b/crates/wzp-client/src/analyzer.rs @@ -243,6 +243,7 @@ struct VideoStreamProbe { last_frame: Option, reassembler: VideoReassembler, decoder: Option>, + decoder_key: Option<(CodecId, u32, u32)>, decode_ok: u64, decode_pending: u64, decode_err: u64, @@ -275,6 +276,7 @@ impl VideoStreamProbe { last_frame: None, reassembler: VideoReassembler::new(), decoder, + decoder_key: decode.then_some((codec, 1280, 720)), decode_ok: 0, decode_pending: 0, decode_err: 0, @@ -294,6 +296,7 @@ impl VideoStreamProbe { .is_some() .then(|| create_video_decoder(self.codec, 1280, 720).ok()) .flatten(); + self.decoder_key = self.decoder.as_ref().map(|_| (self.codec, 1280, 720)); } if self.seq_initialized { let expected = self.last_seq.wrapping_add(1); @@ -305,19 +308,26 @@ impl VideoStreamProbe { self.last_seq = pkt.header.seq; self.seq_initialized = true; - if let Some((codec, keyframe, frame)) = self.reassembler.push(pkt) { + if let Some(frame) = self.reassembler.push(pkt) { self.frames += 1; - self.bytes += frame.len() as u64; - self.max_frame_bytes = self.max_frame_bytes.max(frame.len()); + self.bytes += frame.data.len() as u64; + self.max_frame_bytes = self.max_frame_bytes.max(frame.data.len()); self.last_frame = Some(now); - if keyframe { + if frame.is_keyframe { self.keyframes += 1; } - if codec != self.codec { - self.codec = codec; + if frame.codec_id != self.codec { + self.codec = frame.codec_id; + } + let frame_width = frame.width.unwrap_or(1280) as u32; + let frame_height = frame.height.unwrap_or(720) as u32; + let decoder_key = (self.codec, frame_width, frame_height); + if self.decoder.is_some() && self.decoder_key != Some(decoder_key) { + self.decoder = create_video_decoder(self.codec, frame_width, frame_height).ok(); + self.decoder_key = self.decoder.as_ref().map(|_| decoder_key); } if let Some(decoder) = self.decoder.as_mut() { - match decoder.decode(&frame) { + match decoder.decode(&frame.data) { Ok(Some(decoded)) => { self.decode_ok += 1; self.last_decode_debug = decoder.debug_snapshot(); diff --git a/crates/wzp-video/src/encoder.rs b/crates/wzp-video/src/encoder.rs index efa00b5..945d55c 100644 --- a/crates/wzp-video/src/encoder.rs +++ b/crates/wzp-video/src/encoder.rs @@ -49,6 +49,11 @@ pub trait VideoEncoder: Send { /// /// Default implementation is a no-op. fn set_mode(&mut self, _mode: crate::EncoderMode) {} + + /// Optional platform-specific encoder state for debug logs. + fn debug_snapshot(&self) -> Option { + None + } } /// Raw video frame input for encoding. diff --git a/crates/wzp-video/src/mediacodec.rs b/crates/wzp-video/src/mediacodec.rs index 5c5d6c2..e0f9a15 100644 --- a/crates/wzp-video/src/mediacodec.rs +++ b/crates/wzp-video/src/mediacodec.rs @@ -27,6 +27,8 @@ pub struct MediaCodecEncoder { width: u32, #[cfg(target_os = "android")] height: u32, + #[cfg(target_os = "android")] + input_format_logged: bool, force_keyframe: bool, #[cfg(not(target_os = "android"))] _width: u32, @@ -86,6 +88,7 @@ impl MediaCodecEncoder { codec, width, height, + input_format_logged: false, force_keyframe: false, }) } @@ -123,8 +126,34 @@ impl VideoEncoder for MediaCodecEncoder { if self.force_keyframe { self.request_sync_frame(); } - let input = - i420_to_nv12(&frame.data, self.width as usize, self.height as usize)?; + let layout = encoder_input_layout(&self.codec, self.width, self.height); + if !self.input_format_logged { + self.input_format_logged = true; + log_media_codec_input_format("h264_encoder_input", &self.codec, &layout); + } + let input_capacity = { buffer.buffer_mut().len() }; + let mut input = i420_to_padded_nv12( + &frame.data, + self.width as usize, + self.height as usize, + layout.stride, + layout.slice_height, + )?; + if input.len() > input_capacity { + tracing::warn!( + target: "wzp_video::mediacodec", + padded_len = input.len(), + input_capacity, + "MediaCodec H.264 input buffer smaller than padded layout; falling back to tight NV12" + ); + input = i420_to_padded_nv12( + &frame.data, + self.width as usize, + self.height as usize, + self.width as usize, + self.height as usize, + )?; + } let to_copy = { let buf = buffer.buffer_mut(); let n = input.len().min(buf.len()); @@ -398,6 +427,8 @@ pub struct MediaCodecHevcEncoder { width: u32, #[cfg(target_os = "android")] height: u32, + #[cfg(target_os = "android")] + input_format_logged: bool, force_keyframe: bool, #[cfg(not(target_os = "android"))] _width: u32, @@ -439,6 +470,7 @@ impl MediaCodecHevcEncoder { codec, width, height, + input_format_logged: false, force_keyframe: false, }) } @@ -476,10 +508,38 @@ impl VideoEncoder for MediaCodecHevcEncoder { } else { 0 }; + let layout = encoder_input_layout(&self.codec, self.width, self.height); + if !self.input_format_logged { + self.input_format_logged = true; + log_media_codec_input_format("hevc_encoder_input", &self.codec, &layout); + } + let input_capacity = { buffer.buffer_mut().len() }; + let mut input = i420_to_padded_planar( + &frame.data, + self.width as usize, + self.height as usize, + layout.stride, + layout.slice_height, + )?; + if input.len() > input_capacity { + tracing::warn!( + target: "wzp_video::mediacodec", + padded_len = input.len(), + input_capacity, + "MediaCodec HEVC input buffer smaller than padded layout; falling back to tight I420" + ); + input = i420_to_padded_planar( + &frame.data, + self.width as usize, + self.height as usize, + self.width as usize, + self.height as usize, + )?; + } let to_copy = { let buf = buffer.buffer_mut(); - let n = frame.data.len().min(buf.len()); - for (d, &s) in buf[..n].iter_mut().zip(frame.data[..n].iter()) { + let n = input.len().min(buf.len()); + for (d, &s) in buf[..n].iter_mut().zip(input[..n].iter()) { d.write(s); } n @@ -1153,6 +1213,46 @@ fn positive_format_usize(format: &MediaFormat, key: &str) -> Option { (value > 0).then_some(value as usize) } +#[cfg(target_os = "android")] +#[derive(Clone, Copy, Debug)] +struct EncoderInputLayout { + stride: usize, + slice_height: usize, +} + +#[cfg(target_os = "android")] +fn encoder_input_layout(codec: &MediaCodec, width: u32, height: u32) -> EncoderInputLayout { + // ndk 0.9 exposes AMediaCodec_getInputFormat only behind API 28, while + // this app still targets API 26. Keep encoder input tight until we can + // query the actual input format. Guessing padded rows here is dangerous: + // when the encoder actually reads tight input, padding bytes become pixels + // from the next row and show up as diagonal green bands. + let _ = codec; + let width = width as usize; + let height = height as usize; + EncoderInputLayout { + stride: width, + slice_height: height, + } +} + +#[cfg(target_os = "android")] +fn log_media_codec_input_format(label: &str, codec: &MediaCodec, layout: &EncoderInputLayout) { + let format = codec.output_format(); + tracing::info!( + target: "wzp_video::mediacodec", + label, + color_format = format.i32("color-format"), + width = format.i32("width"), + height = format.i32("height"), + stride = format.i32("stride"), + slice_height = format.i32("slice-height"), + effective_stride = layout.stride, + effective_slice_height = layout.slice_height, + "MediaCodec input format" + ); +} + #[cfg(target_os = "android")] fn log_media_codec_format(label: &str, codec: &MediaCodec) { let format = codec.output_format(); @@ -1183,7 +1283,13 @@ fn i420_len(width: usize, height: usize) -> Result { } #[cfg(target_os = "android")] -fn i420_to_nv12(src: &[u8], width: usize, height: usize) -> Result, VideoError> { +fn i420_to_padded_nv12( + src: &[u8], + width: usize, + height: usize, + stride: usize, + slice_height: usize, +) -> Result, VideoError> { let y_size = width.checked_mul(height).ok_or_else(|| { VideoError::InvalidInput(format!("invalid frame dimensions {width}x{height}")) })?; @@ -1196,17 +1302,127 @@ fn i420_to_nv12(src: &[u8], width: usize, height: usize) -> Result, Vide ))); } - let mut out = vec![0u8; expected]; - out[..y_size].copy_from_slice(&src[..y_size]); + if stride < width || slice_height < height { + return Err(VideoError::InvalidInput(format!( + "invalid encoder input layout {stride}x{slice_height} for {width}x{height}" + ))); + } + + let chroma_width = width / 2; + let chroma_height = height / 2; + let y_stride = stride; + let uv_stride = stride; + let y_slice_height = slice_height; + let uv_slice_height = (slice_height / 2).max(chroma_height); + let y_padded_size = y_stride.checked_mul(y_slice_height).ok_or_else(|| { + VideoError::InvalidInput(format!( + "invalid padded Y layout {y_stride}x{y_slice_height}" + )) + })?; + let uv_padded_size = uv_stride.checked_mul(uv_slice_height).ok_or_else(|| { + VideoError::InvalidInput(format!( + "invalid padded UV layout {uv_stride}x{uv_slice_height}" + )) + })?; + let total = y_padded_size + .checked_add(uv_padded_size) + .ok_or_else(|| VideoError::InvalidInput("padded NV12 size overflow".into()))?; + + let mut out = vec![0u8; total]; + out[y_padded_size..].fill(128); + for row in 0..height { + let src_off = row * width; + let dst_off = row * y_stride; + out[dst_off..dst_off + width].copy_from_slice(&src[src_off..src_off + width]); + } + let u = &src[y_size..y_size + uv_size]; let v = &src[y_size + uv_size..y_size + uv_size * 2]; - for i in 0..uv_size { - out[y_size + i * 2] = u[i]; - out[y_size + i * 2 + 1] = v[i]; + for row in 0..chroma_height { + let src_row = row * chroma_width; + let dst_row = y_padded_size + row * uv_stride; + for col in 0..chroma_width { + out[dst_row + col * 2] = u[src_row + col]; + out[dst_row + col * 2 + 1] = v[src_row + col]; + } } Ok(out) } +#[cfg(target_os = "android")] +fn i420_to_padded_planar( + src: &[u8], + width: usize, + height: usize, + stride: usize, + slice_height: usize, +) -> Result, VideoError> { + let y_size = width.checked_mul(height).ok_or_else(|| { + VideoError::InvalidInput(format!("invalid frame dimensions {width}x{height}")) + })?; + let uv_size = y_size / 4; + let expected = y_size + uv_size * 2; + if src.len() < expected { + return Err(VideoError::InvalidInput(format!( + "I420 frame too small for padded planar copy: {} bytes, expected {expected}", + src.len() + ))); + } + if stride < width || slice_height < height { + return Err(VideoError::InvalidInput(format!( + "invalid encoder input layout {stride}x{slice_height} for {width}x{height}" + ))); + } + + let chroma_width = width / 2; + let chroma_height = height / 2; + let y_stride = stride; + let chroma_stride = (stride / 2).max(chroma_width); + let y_slice_height = slice_height; + let chroma_slice_height = (slice_height / 2).max(chroma_height); + let y_padded_size = y_stride.checked_mul(y_slice_height).ok_or_else(|| { + VideoError::InvalidInput(format!( + "invalid padded Y layout {y_stride}x{y_slice_height}" + )) + })?; + let chroma_padded_size = chroma_stride + .checked_mul(chroma_slice_height) + .ok_or_else(|| { + VideoError::InvalidInput(format!( + "invalid padded chroma layout {chroma_stride}x{chroma_slice_height}" + )) + })?; + let chroma_total = chroma_padded_size + .checked_mul(2) + .ok_or_else(|| VideoError::InvalidInput("padded I420 chroma size overflow".into()))?; + let total = y_padded_size + .checked_add(chroma_total) + .ok_or_else(|| VideoError::InvalidInput("padded I420 size overflow".into()))?; + + let mut out = vec![0u8; total]; + out[y_padded_size..].fill(128); + for row in 0..height { + let src_off = row * width; + let dst_off = row * y_stride; + out[dst_off..dst_off + width].copy_from_slice(&src[src_off..src_off + width]); + } + + let src_u = y_size; + let src_v = y_size + uv_size; + let dst_u = y_padded_size; + let dst_v = y_padded_size + chroma_padded_size; + for row in 0..chroma_height { + let src_off = row * chroma_width; + let dst_off = row * chroma_stride; + out[dst_u + dst_off..dst_u + dst_off + chroma_width] + .copy_from_slice(&src[src_u + src_off..src_u + src_off + chroma_width]); + out[dst_v + dst_off..dst_v + dst_off + chroma_width] + .copy_from_slice(&src[src_v + src_off..src_v + src_off + chroma_width]); + } + + Ok(out) +} + #[cfg(target_os = "android")] fn yuv420_planar_to_tight_i420( src: &[u8], @@ -1410,7 +1626,7 @@ fn split_annex_b(data: &[u8]) -> Vec<&[u8]> { /// Android MediaCodec `csd-0`. #[allow(dead_code)] fn extract_sequence_header_obu(data: &[u8]) -> Option> { - use crate::av1_obu::{ObuHeader, read_leb128}; + use crate::av1_obu::{read_leb128, ObuHeader}; let mut i = 0usize; while i < data.len() { let header = ObuHeader::from_byte(data[i]); diff --git a/crates/wzp-video/src/transport.rs b/crates/wzp-video/src/transport.rs index c36d40a..8134b13 100644 --- a/crates/wzp-video/src/transport.rs +++ b/crates/wzp-video/src/transport.rs @@ -28,6 +28,24 @@ use wzp_proto::{CodecId, MediaHeaderV2, MediaPacket, MediaType}; /// 1200 (QUIC MTU) − 16 (MediaHeaderV2) − 16 (AEAD tag) = 1168. pub const VIDEO_MAX_PAYLOAD: usize = 1168; +const VIDEO_FRAME_META_MAGIC: [u8; 4] = *b"WZV1"; +const VIDEO_FRAME_META_LEN: usize = 8; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct VideoFrameMeta { + pub width: u16, + pub height: u16, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct ReassembledVideoFrame { + pub codec_id: CodecId, + pub is_keyframe: bool, + pub width: Option, + pub height: Option, + pub data: Vec, +} + /// Fragments one encoded video frame into a sequence of [`MediaPacket`]s. /// /// Pass each `MediaPacket` to `transport.send_media()`. @@ -37,12 +55,20 @@ pub fn packetize_video_frame( is_keyframe: bool, seq: &mut u32, timestamp_ms: u32, + width: u32, + height: u32, ) -> Vec { if frame.is_empty() { return vec![]; } - let chunks: Vec<&[u8]> = frame.chunks(VIDEO_MAX_PAYLOAD).collect(); + let mut framed = Vec::with_capacity(VIDEO_FRAME_META_LEN + frame.len()); + framed.extend_from_slice(&VIDEO_FRAME_META_MAGIC); + framed.extend_from_slice(&(width.min(u16::MAX as u32) as u16).to_be_bytes()); + framed.extend_from_slice(&(height.min(u16::MAX as u32) as u16).to_be_bytes()); + framed.extend_from_slice(frame); + + let chunks: Vec<&[u8]> = framed.chunks(VIDEO_MAX_PAYLOAD).collect(); let total = chunks.len().min(255); let mut packets = Vec::with_capacity(total); @@ -118,9 +144,8 @@ impl VideoReassembler { /// Push one received video packet. /// - /// Returns `Some((codec_id, is_keyframe, frame_bytes))` when a complete - /// frame is ready, `None` otherwise. - pub fn push(&mut self, pkt: &MediaPacket) -> Option<(CodecId, bool, Vec)> { + /// Returns `Some(frame)` when a complete frame is ready, `None` otherwise. + pub fn push(&mut self, pkt: &MediaPacket) -> Option { let hdr = &pkt.header; let fragment_index = (hdr.fec_block >> 8) as u8; let fragment_count = (hdr.fec_block & 0xFF) as u8; @@ -161,7 +186,14 @@ impl VideoReassembler { for i in 0..total as u8 { frame.extend_from_slice(pending.fragments.get(&i)?); } - Some((codec_id, pending.is_keyframe, frame)) + let (meta, data) = split_video_frame_payload(frame); + Some(ReassembledVideoFrame { + codec_id, + is_keyframe: pending.is_keyframe, + width: meta.map(|m| m.width), + height: meta.map(|m| m.height), + data, + }) } /// Evict stale pending frames older than `max_age_ms` milliseconds. @@ -169,12 +201,22 @@ impl VideoReassembler { /// Call periodically (e.g. every 2s) to prevent accumulation of frames /// whose first or middle fragments were lost. pub fn evict_stale(&mut self, current_timestamp_ms: u32, max_age_ms: u32) { - self.pending.retain(|&ts, _| { - current_timestamp_ms.wrapping_sub(ts) <= max_age_ms - }); + self.pending + .retain(|&ts, _| current_timestamp_ms.wrapping_sub(ts) <= max_age_ms); } } +fn split_video_frame_payload(mut frame: Vec) -> (Option, Vec) { + if frame.len() < VIDEO_FRAME_META_LEN || frame[..4] != VIDEO_FRAME_META_MAGIC { + return (None, frame); + } + + let width = u16::from_be_bytes([frame[4], frame[5]]); + let height = u16::from_be_bytes([frame[6], frame[7]]); + frame.drain(..VIDEO_FRAME_META_LEN); + (Some(VideoFrameMeta { width, height }), frame) +} + impl Default for VideoReassembler { fn default() -> Self { Self::new() @@ -193,7 +235,7 @@ mod tests { fn single_fragment_roundtrip() { let frame = make_frame(100); let mut seq = 0u32; - let pkts = packetize_video_frame(&frame, CodecId::Av1Main, true, &mut seq, 1000); + let pkts = packetize_video_frame(&frame, CodecId::Av1Main, true, &mut seq, 1000, 640, 480); assert_eq!(pkts.len(), 1); assert!(pkts[0].header.is_keyframe()); assert!(pkts[0].header.is_frame_end()); @@ -203,17 +245,27 @@ mod tests { let mut reassembler = VideoReassembler::new(); let result = reassembler.push(&pkts[0]); assert!(result.is_some()); - let (codec, is_kf, data) = result.unwrap(); - assert_eq!(codec, CodecId::Av1Main); - assert!(is_kf); - assert_eq!(data, frame); + let result = result.unwrap(); + assert_eq!(result.codec_id, CodecId::Av1Main); + assert!(result.is_keyframe); + assert_eq!(result.width, Some(640)); + assert_eq!(result.height, Some(480)); + assert_eq!(result.data, frame); } #[test] fn multi_fragment_roundtrip() { let frame = make_frame(VIDEO_MAX_PAYLOAD * 3 + 50); let mut seq = 0u32; - let pkts = packetize_video_frame(&frame, CodecId::H264Baseline, false, &mut seq, 2000); + let pkts = packetize_video_frame( + &frame, + CodecId::H264Baseline, + false, + &mut seq, + 2000, + 960, + 540, + ); assert_eq!(pkts.len(), 4); assert!(!pkts[0].header.is_frame_end()); assert!(pkts[3].header.is_frame_end()); @@ -224,33 +276,66 @@ mod tests { for pkt in &pkts { result = reassembler.push(pkt); } - let (codec, is_kf, data) = result.unwrap(); - assert_eq!(codec, CodecId::H264Baseline); - assert!(!is_kf); - assert_eq!(data, frame); + let result = result.unwrap(); + assert_eq!(result.codec_id, CodecId::H264Baseline); + assert!(!result.is_keyframe); + assert_eq!(result.width, Some(960)); + assert_eq!(result.height, Some(540)); + assert_eq!(result.data, frame); } #[test] fn out_of_order_delivery() { let frame = make_frame(VIDEO_MAX_PAYLOAD * 2 + 100); let mut seq = 0u32; - let pkts = packetize_video_frame(&frame, CodecId::Av1Main, false, &mut seq, 3000); + let pkts = packetize_video_frame(&frame, CodecId::Av1Main, false, &mut seq, 3000, 320, 240); assert_eq!(pkts.len(), 3); let mut reassembler = VideoReassembler::new(); // Deliver out of order: 2, 0, 1 assert!(reassembler.push(&pkts[2]).is_none()); // last arrives first — no total_fragments yet assert!(reassembler.push(&pkts[0]).is_none()); - let result = reassembler.push(&pkts[1]).expect("last missing fragment completes frame"); - assert_eq!(result.0, CodecId::Av1Main); - assert!(!result.1); - assert_eq!(result.2, frame); + let result = reassembler + .push(&pkts[1]) + .expect("last missing fragment completes frame"); + assert_eq!(result.codec_id, CodecId::Av1Main); + assert!(!result.is_keyframe); + assert_eq!(result.width, Some(320)); + assert_eq!(result.height, Some(240)); + assert_eq!(result.data, frame); } #[test] fn empty_frame_produces_no_packets() { let mut seq = 0u32; - let pkts = packetize_video_frame(&[], CodecId::Av1Main, false, &mut seq, 0); + let pkts = packetize_video_frame(&[], CodecId::Av1Main, false, &mut seq, 0, 640, 480); assert!(pkts.is_empty()); } + + #[test] + fn old_payload_without_meta_still_reassembles() { + let payload = Bytes::copy_from_slice(&[0x00, 0x00, 0x00, 0x01, 0x65]); + let pkt = MediaPacket { + header: MediaHeaderV2 { + version: MediaHeaderV2::VERSION, + flags: MediaHeaderV2::FLAG_KEYFRAME | MediaHeaderV2::FLAG_FRAME_END, + media_type: MediaType::Video, + codec_id: CodecId::H264Baseline, + stream_id: 0, + fec_ratio: 0, + seq: 7, + timestamp: 123, + fec_block: 1, + }, + payload: payload.clone(), + quality_report: None, + }; + + let mut reassembler = VideoReassembler::new(); + let frame = reassembler.push(&pkt).unwrap(); + assert_eq!(frame.codec_id, CodecId::H264Baseline); + assert_eq!(frame.width, None); + assert_eq!(frame.height, None); + assert_eq!(frame.data, payload.to_vec()); + } } diff --git a/crates/wzp-video/tests/pipeline_roundtrip.rs b/crates/wzp-video/tests/pipeline_roundtrip.rs index 5146bd6..6683fa2 100644 --- a/crates/wzp-video/tests/pipeline_roundtrip.rs +++ b/crates/wzp-video/tests/pipeline_roundtrip.rs @@ -16,9 +16,9 @@ use std::sync::Mutex; use wzp_proto::CodecId; use wzp_video::{ - VideoFrame, factory::{create_video_decoder, create_video_encoder}, - transport::{VideoReassembler, packetize_video_frame}, + transport::{packetize_video_frame, VideoReassembler}, + VideoFrame, }; /// VideoToolbox has global session registry state — serialise integration tests @@ -42,7 +42,12 @@ fn synthetic_i420(width: u32, height: u32, frame_idx: u32) -> VideoFrame { data[y_size..y_size + uv_size].fill(128); data[y_size + uv_size..].fill(128); - VideoFrame { width, height, data, timestamp_ms: frame_idx as u64 * 33 } + VideoFrame { + width, + height, + data, + timestamp_ms: frame_idx as u64 * 33, + } } // ── tests ───────────────────────────────────────────────────────────────────── @@ -53,10 +58,10 @@ fn h264_pipeline_roundtrip() { let _g = VT_LOCK.lock().unwrap(); let (w, h) = (640, 360); - let mut encoder = create_video_encoder(CodecId::H264Baseline, w, h, 1_500_000) - .expect("H264Baseline encoder"); - let mut decoder = create_video_decoder(CodecId::H264Baseline, w, h) - .expect("H264Baseline decoder"); + let mut encoder = + create_video_encoder(CodecId::H264Baseline, w, h, 1_500_000).expect("H264Baseline encoder"); + let mut decoder = + create_video_decoder(CodecId::H264Baseline, w, h).expect("H264Baseline decoder"); let mut seq = 0u32; let mut decoded_count = 0usize; @@ -71,32 +76,60 @@ fn h264_pipeline_roundtrip() { } let is_keyframe = encoder.is_keyframe(&encoded); - let pkts = packetize_video_frame(&encoded, CodecId::H264Baseline, is_keyframe, &mut seq, i * 33); - assert!(!pkts.is_empty(), "packetize must produce at least one packet"); + let pkts = packetize_video_frame( + &encoded, + CodecId::H264Baseline, + is_keyframe, + &mut seq, + i * 33, + w, + h, + ); + assert!( + !pkts.is_empty(), + "packetize must produce at least one packet" + ); // All fragments for this frame share the same timestamp. let ts = pkts[0].header.timestamp; let total_frags = pkts.len(); for (idx, pkt) in pkts.iter().enumerate() { - assert_eq!(pkt.header.timestamp, ts, "all fragments of one frame share timestamp"); + assert_eq!( + pkt.header.timestamp, ts, + "all fragments of one frame share timestamp" + ); let frag_idx = (pkt.header.fec_block >> 8) as usize; let frag_total = (pkt.header.fec_block & 0xFF) as usize; assert_eq!(frag_idx, idx, "fragment index must match packet position"); - assert_eq!(frag_total, total_frags, "all fragments carry the correct total count"); + assert_eq!( + frag_total, total_frags, + "all fragments carry the correct total count" + ); } - assert!(pkts.last().unwrap().header.is_frame_end(), "last packet must have FLAG_FRAME_END"); + assert!( + pkts.last().unwrap().header.is_frame_end(), + "last packet must have FLAG_FRAME_END" + ); // Push through reassembler — only the last packet should yield a frame. let mut reassembler = VideoReassembler::new(); for (j, pkt) in pkts.iter().enumerate() { let result = reassembler.push(pkt); if j + 1 < pkts.len() { - assert!(result.is_none(), "intermediate fragments must not yield a complete frame"); + assert!( + result.is_none(), + "intermediate fragments must not yield a complete frame" + ); } else { - let (codec, kf, data) = result.expect("last fragment must complete the frame"); - assert_eq!(codec, CodecId::H264Baseline); - assert_eq!(kf, is_keyframe); - assert_eq!(data, encoded, "reassembled bytes must match original encoded bytes"); + let frame = result.expect("last fragment must complete the frame"); + assert_eq!(frame.codec_id, CodecId::H264Baseline); + assert_eq!(frame.is_keyframe, is_keyframe); + assert_eq!(frame.width, Some(w as u16)); + assert_eq!(frame.height, Some(h as u16)); + assert_eq!( + frame.data, encoded, + "reassembled bytes must match original encoded bytes" + ); } } @@ -118,7 +151,10 @@ fn h264_pipeline_roundtrip() { } } - assert!(decoded_count > 0, "at least one frame must have been decoded"); + assert!( + decoded_count > 0, + "at least one frame must have been decoded" + ); } /// Fragmentation: a frame larger than VIDEO_MAX_PAYLOAD splits into multiple packets, @@ -134,13 +170,28 @@ fn large_frame_fragments_and_reassembles() { let mut seq = 0u32; let pkts = packetize_video_frame( - &synthetic_encoded, CodecId::H264Baseline, true, &mut seq, 9000, + &synthetic_encoded, + CodecId::H264Baseline, + true, + &mut seq, + 9000, + 1280, + 720, ); assert!(pkts.len() >= 4, "large frame must produce ≥4 fragments"); - assert!(pkts[0].header.is_keyframe(), "keyframe flag propagates to all fragments"); - assert!(!pkts[0].header.is_frame_end(), "first packet is not frame end"); - assert!(pkts.last().unwrap().header.is_frame_end(), "last packet is frame end"); + assert!( + pkts[0].header.is_keyframe(), + "keyframe flag propagates to all fragments" + ); + assert!( + !pkts[0].header.is_frame_end(), + "first packet is not frame end" + ); + assert!( + pkts.last().unwrap().header.is_frame_end(), + "last packet is frame end" + ); let mut reassembler = VideoReassembler::new(); let mut result = None; @@ -148,8 +199,13 @@ fn large_frame_fragments_and_reassembles() { result = reassembler.push(pkt); } - let (_, _, data) = result.expect("all fragments delivered → complete frame"); - assert_eq!(data, synthetic_encoded, "reassembled bytes must match input exactly"); + let frame = result.expect("all fragments delivered → complete frame"); + assert_eq!(frame.width, Some(1280)); + assert_eq!(frame.height, Some(720)); + assert_eq!( + frame.data, synthetic_encoded, + "reassembled bytes must match input exactly" + ); } /// Packet loss: if the first fragment is missing, reassembly cannot complete. @@ -159,7 +215,7 @@ fn missing_fragment_blocks_reassembly() { let frame: Vec = vec![0xAB; VIDEO_MAX_PAYLOAD * 2 + 50]; let mut seq = 0u32; - let pkts = packetize_video_frame(&frame, CodecId::Av1Main, false, &mut seq, 1234); + let pkts = packetize_video_frame(&frame, CodecId::Av1Main, false, &mut seq, 1234, 640, 480); assert!(pkts.len() >= 3); let mut reassembler = VideoReassembler::new(); @@ -193,7 +249,15 @@ fn evict_stale_removes_aged_frames() { let frame: Vec = vec![0x55; VIDEO_MAX_PAYLOAD * 2]; let mut seq = 0u32; - let pkts = packetize_video_frame(&frame, CodecId::H264Baseline, false, &mut seq, 500); + let pkts = packetize_video_frame( + &frame, + CodecId::H264Baseline, + false, + &mut seq, + 500, + 640, + 480, + ); let mut reassembler = VideoReassembler::new(); // Push only first packet — frame is incomplete. diff --git a/desktop/src-tauri/src/engine.rs b/desktop/src-tauri/src/engine.rs index c4b95f0..dc68bcb 100644 --- a/desktop/src-tauri/src/engine.rs +++ b/desktop/src-tauri/src/engine.rs @@ -1385,7 +1385,7 @@ impl CallEngine { // Video pipeline state — mirror of the desktop recv task. let mut video_reassembler = wzp_video::transport::VideoReassembler::new(); let mut video_decoder: Option> = None; - let mut video_decoder_codec: Option = None; + let mut video_decoder_key: Option<(wzp_proto::CodecId, u32, u32)> = None; let mut video_first_recv_logged = false; let mut video_first_reassembled_logged = false; let mut video_reassembled_samples: u64 = 0; @@ -1466,7 +1466,14 @@ impl CallEngine { }), ); } - if let Some((codec_id, is_kf, frame)) = video_reassembler.push(&pkt) { + if let Some(reassembled) = video_reassembler.push(&pkt) { + let codec_id = reassembled.codec_id; + let is_kf = reassembled.is_keyframe; + let frame_width = + reassembled.width.unwrap_or(video_width as u16) as u32; + let frame_height = + reassembled.height.unwrap_or(video_height as u16) as u32; + let frame = reassembled.data; video_reassembled_samples += 1; if !video_first_reassembled_logged { video_first_reassembled_logged = true; @@ -1484,6 +1491,14 @@ impl CallEngine { ); } if should_log_video_sample(video_reassembled_samples, is_kf) { + crate::maybe_dump_video_bytes( + &recv_app, + "remote_encoded_reassembled", + "android", + video_reassembled_samples, + &frame, + codec_id, + ); crate::emit_call_debug( &recv_app, "video:reassembled_frame", @@ -1517,22 +1532,23 @@ impl CallEngine { video_reassembler.evict_stale(pkt.header.timestamp, 5_000); continue; } - if video_decoder_codec != Some(codec_id) { + let decoder_key = (codec_id, frame_width, frame_height); + if video_decoder_key != Some(decoder_key) { crate::emit_call_debug( &recv_app, "video:decoder_init_start", serde_json::json!({ "t_ms": recv_t0.elapsed().as_millis() as u64, "codec": format!("{:?}", codec_id), - "width": video_width, - "height": video_height, + "width": frame_width, + "height": frame_height, "platform": "android", }), ); match wzp_video::factory::create_video_decoder( codec_id, - video_width, - video_height, + frame_width, + frame_height, ) { Ok(d) => { info!(codec = ?codec_id, "video decoder created (android)"); @@ -1546,7 +1562,7 @@ impl CallEngine { }), ); video_decoder = Some(d); - video_decoder_codec = Some(codec_id); + video_decoder_key = Some(decoder_key); } Err(e) => { error!("video decoder init failed: {e}"); @@ -2304,11 +2320,21 @@ impl CallEngine { is_keyframe, &mut seq, ts_ms, + frame.width, + frame.height, ); video_packets_total += pkts.len() as u64; video_bytes_total += encoded.len() as u64; if encoded_frame_samples < 5 { encoded_frame_samples += 1; + crate::maybe_dump_video_bytes( + &vid_app, + "local_encoded", + "android", + encoded_frame_samples, + &encoded, + vid_codec, + ); let packet_payload_bytes: usize = pkts.iter().map(|pkt| pkt.payload.len()).sum(); crate::emit_call_debug( @@ -2870,7 +2896,7 @@ impl CallEngine { let mut first_packet_logged = false; let mut video_reassembler = wzp_video::transport::VideoReassembler::new(); let mut video_decoder: Option> = None; - let mut video_decoder_codec: Option = None; + let mut video_decoder_key: Option<(wzp_proto::CodecId, u32, u32)> = None; let mut video_first_recv_logged_desktop = false; let mut video_first_reassembled_logged = false; let mut video_reassembled_samples: u64 = 0; @@ -2956,7 +2982,14 @@ impl CallEngine { }), ); } - if let Some((codec_id, is_kf, frame)) = video_reassembler.push(&pkt) { + if let Some(reassembled) = video_reassembler.push(&pkt) { + let codec_id = reassembled.codec_id; + let is_kf = reassembled.is_keyframe; + let frame_width = + reassembled.width.unwrap_or(video_width as u16) as u32; + let frame_height = + reassembled.height.unwrap_or(video_height as u16) as u32; + let frame = reassembled.data; video_reassembled_samples += 1; if !video_first_reassembled_logged { video_first_reassembled_logged = true; @@ -2974,6 +3007,14 @@ impl CallEngine { ); } if should_log_video_sample(video_reassembled_samples, is_kf) { + crate::maybe_dump_video_bytes( + &recv_app, + "remote_encoded_reassembled", + "desktop", + video_reassembled_samples, + &frame, + codec_id, + ); crate::emit_call_debug( &recv_app, "video:reassembled_frame", @@ -3008,22 +3049,23 @@ impl CallEngine { continue; } // Lazy-init or switch decoder on codec change. - if video_decoder_codec != Some(codec_id) { + let decoder_key = (codec_id, frame_width, frame_height); + if video_decoder_key != Some(decoder_key) { crate::emit_call_debug( &recv_app, "video:decoder_init_start", serde_json::json!({ "t_ms": recv_t0.elapsed().as_millis() as u64, "codec": format!("{:?}", codec_id), - "width": video_width, - "height": video_height, + "width": frame_width, + "height": frame_height, "platform": "desktop", }), ); match wzp_video::factory::create_video_decoder( codec_id, - video_width, - video_height, + frame_width, + frame_height, ) { Ok(d) => { info!(codec = ?codec_id, "video decoder created"); @@ -3037,7 +3079,7 @@ impl CallEngine { }), ); video_decoder = Some(d); - video_decoder_codec = Some(codec_id); + video_decoder_key = Some(decoder_key); } Err(e) => { error!("video decoder init failed: {e}"); @@ -3640,11 +3682,21 @@ impl CallEngine { is_keyframe, &mut seq, ts_ms, + frame.width, + frame.height, ); video_packets_total += pkts.len() as u64; video_bytes_total += encoded.len() as u64; if encoded_frame_samples < 5 { encoded_frame_samples += 1; + crate::maybe_dump_video_bytes( + &vid_app, + "local_encoded", + "desktop", + encoded_frame_samples, + &encoded, + vid_codec, + ); let packet_payload_bytes: usize = pkts.iter().map(|pkt| pkt.payload.len()).sum(); crate::emit_call_debug( diff --git a/desktop/src-tauri/src/lib.rs b/desktop/src-tauri/src/lib.rs index aa6ed39..6896d37 100644 --- a/desktop/src-tauri/src/lib.rs +++ b/desktop/src-tauri/src/lib.rs @@ -205,6 +205,61 @@ pub(crate) fn maybe_dump_video_jpeg( } } +pub(crate) fn maybe_dump_video_bytes( + app: &tauri::AppHandle, + stage: &str, + platform: &str, + frame_no: u64, + bytes: &[u8], + codec: wzp_proto::CodecId, +) { + if !should_dump_frame(frame_no) || bytes.is_empty() { + return; + } + + let ext = match codec { + wzp_proto::CodecId::H265Main => "h265", + wzp_proto::CodecId::Av1Main => "obu", + _ => "h264", + }; + let seq = FRAME_DUMP_WRITES.fetch_add(1, Ordering::Relaxed) + 1; + let dir = identity_dir().join("frame-dumps"); + let file_name = format!("{seq:06}_{platform}_{stage}_f{frame_no:06}.{ext}"); + let path = dir.join(file_name); + let result = std::fs::create_dir_all(&dir).and_then(|_| std::fs::write(&path, bytes)); + + match result { + Ok(()) => emit_call_debug( + app, + "video:byte_dump", + serde_json::json!({ + "stage": stage, + "platform": platform, + "frame_no": frame_no, + "codec": format!("{:?}", codec), + "bytes": bytes.len(), + "path": path, + }), + ), + Err(e) => { + if seq <= 5 || seq % 30 == 0 { + emit_call_debug( + app, + "video:byte_dump_failed", + serde_json::json!({ + "stage": stage, + "platform": platform, + "frame_no": frame_no, + "codec": format!("{:?}", codec), + "error": e.to_string(), + "path": path, + }), + ); + } + } + } +} + /// RGB24 → I420 (planar 4:2:0). Layout: Y(w×h) | U(w/2×h/2) | V(w/2×h/2). fn rgb_to_i420(rgb: &[u8], w: usize, h: usize) -> Vec { let y_size = w * h; diff --git a/desktop/src/main.ts b/desktop/src/main.ts index 1be7f57..4378047 100644 --- a/desktop/src/main.ts +++ b/desktop/src/main.ts @@ -607,7 +607,7 @@ function drawCameraFrameForSend() { const vh = vdLocalVideo.videoHeight || camCaptureCanvas.height; if (!vw || !vh) return; - const scale = Math.max(cameraSendWidth / vw, cameraSendHeight / vh); + const scale = Math.min(cameraSendWidth / vw, cameraSendHeight / vh); const dw = vw * scale; const dh = vh * scale; const dx = (cameraSendWidth - dw) / 2; @@ -631,6 +631,8 @@ async function captureAndPushCameraFrame() { frame_no: cameraCaptureFrameNo, width: camCaptureCanvas.width, height: camCaptureCanvas.height, + source_width: vdLocalVideo.videoWidth || null, + source_height: vdLocalVideo.videoHeight || null, jpeg_b64_len: b64.length, capture_clock: getVideoFrameCallbackApi() ? "video_frame_callback" : "interval", });