Skip to content

Make duration handling of video more robust #10646

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 61 additions & 27 deletions crates/utils/re_video/src/demux/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,27 @@ pub type GopIndex = usize;
/// Index used for referencing into [`VideoDataDescription::samples`].
pub type SampleIndex = usize;

/// Distinguishes finate videos from video streams.
#[derive(Clone)]
pub enum VideoUpdateType {
/// A finite video with a known duration which won't be updated further.
NoUpdates { duration: Time },

/// A stream that may be periodically updated.
///
/// Video streams may drop samples at the beginning and add new samples at the end.
/// The last sample's duration is treated as unknown.
/// However, it is typically assumed to be as long as the average sample duration.
Stream {
/// Last time we added/removed samples from the [`VideoDataDescription`].
///
/// This is used solely as a heuristic input for how the player schedules work to decoders.
/// For live streams, even those that stopped, this is expected to be wallclock time of when a sample was
/// added do this datastructure. *Not* when the sample was first recorded.
last_time_updated_samples: Instant,
},
}

/// Description of video data.
///
/// Store various metadata about a video.
Expand All @@ -138,10 +159,8 @@ pub struct VideoDataDescription {
/// This happens for streams logged on a non-temporal timeline.
pub timescale: Option<Timescale>,

/// Duration of the video, in time units if known.
///
/// For open ended video streams rather than video files this is generally unknown.
pub duration: Option<Time>,
/// Whether this is a finite video or a stream.
pub update_type: VideoUpdateType,

/// We split video into GOPs, each beginning with a key frame,
/// followed by any number of delta frames.
Expand All @@ -166,14 +185,6 @@ pub struct VideoDataDescription {
/// Meta information about the samples.
pub samples_statistics: SamplesStatistics,

/// If this is potentially a live stream, then when was the last time, we added/removed samples from this data description.
///
/// This is used solely as a heuristic input for how the player schedules work to decoders.
/// For static video data this is expected to be `None`.
/// For live streams, even those that stopped, this is expected to be wallclock time of when a sample was
/// added do this datastructure. *Not* when the sample was first recorded.
pub last_time_updated_samples: Option<Instant>,

/// All the tracks in the mp4; not just the video track.
///
/// Can be nice to show in a UI.
Expand All @@ -186,12 +197,11 @@ impl re_byte_size::SizeBytes for VideoDataDescription {
codec: _,
encoding_details: _,
timescale: _,
duration: _,
update_type: _,
gops,
samples,
samples_statistics,
mp4_tracks,
last_time_updated_samples: _,
} = self;

gops.heap_size_bytes()
Expand Down Expand Up @@ -474,18 +484,6 @@ impl VideoDataDescription {
}
}

/// Length of the video if known.
///
/// NOTE: This includes the duration of the final frame too!
///
/// For video streams (as opposed to video files) this is generally unknown.
#[inline]
pub fn duration(&self) -> Option<std::time::Duration> {
let timescale = self.timescale?;
let duration = self.duration?;
Some(duration.duration(timescale))
}

/// The codec used to encode the video.
#[inline]
pub fn human_readable_codec_string(&self) -> String {
Expand Down Expand Up @@ -515,6 +513,43 @@ impl VideoDataDescription {
self.samples.num_elements()
}

/// Duration of the video.
///
/// For videos that may still grow this is an estimate of the video length so far.
/// (we don't know for sure how long the last sample is going to be)
pub fn duration(&self) -> Option<std::time::Duration> {
match &self.update_type {
VideoUpdateType::NoUpdates { duration } => Some(duration.duration(self.timescale?)),

VideoUpdateType::Stream { .. } => {
if self.samples.num_elements() < 2 {
return None;
}

// TODO(#10090): This is only correct because there's no b-frames on streams right now.
// If there are b-frames determining the last timestamp is a bit more complicated.
let first_sample = self.samples.front()?;
let last_sample = self.samples.back()?;
let timescale = self.timescale?;

// Estimate length of the last sample:
let second_to_last_sample = self
.samples
.get(self.samples.next_index().saturating_sub(2))?;
let estimated_average_sample_duration = (last_sample.presentation_timestamp
- second_to_last_sample.presentation_timestamp)
.duration(timescale)
/ (self.num_samples() - 1) as u32;

Some(
(last_sample.presentation_timestamp - first_sample.presentation_timestamp)
.duration(timescale)
+ estimated_average_sample_duration,
)
}
}
}

/// `num_frames / duration`.
///
/// Note that the video could have a variable framerate!
Expand Down Expand Up @@ -835,7 +870,6 @@ impl std::fmt::Debug for VideoDataDescription {
.field("codec", &self.codec)
.field("encoding_details", &self.encoding_details)
.field("timescale", &self.timescale)
.field("duration", &self.duration)
.field("gops", &self.gops)
.field("samples", &self.samples.iter_indexed().collect::<Vec<_>>())
.finish()
Expand Down
8 changes: 4 additions & 4 deletions crates/utils/re_video/src/demux/mp4.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use super::{GroupOfPictures, SampleMetadata, VideoDataDescription, VideoLoadErro

use crate::{
StableIndexDeque, Time, Timescale,
demux::{ChromaSubsamplingModes, SamplesStatistics, VideoEncodingDetails},
demux::{ChromaSubsamplingModes, SamplesStatistics, VideoEncodingDetails, VideoUpdateType},
h264::encoding_details_from_h264_sps,
};

Expand All @@ -29,7 +29,6 @@ impl VideoDataDescription {
let stsd = track.trak(&mp4).mdia.minf.stbl.stsd.clone();

let timescale = Timescale::new(track.timescale);
let duration = Time::new(track.duration as i64);
let mut samples = StableIndexDeque::<SampleMetadata>::with_capacity(track.samples.len());
let mut gops = StableIndexDeque::<GroupOfPictures>::new();
let mut gop_sample_start_index = 0;
Expand Down Expand Up @@ -138,9 +137,10 @@ impl VideoDataDescription {
codec,
encoding_details: Some(codec_details_from_stds(track, stsd)?),
timescale: Some(timescale),
duration: Some(duration),
update_type: VideoUpdateType::NoUpdates {
duration: Time::new(track.duration as i64),
},
samples_statistics,
last_time_updated_samples: None,
gops,
samples,
mp4_tracks,
Expand Down
1 change: 1 addition & 0 deletions crates/utils/re_video/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ pub use self::{
demux::{
ChromaSubsamplingModes, GopIndex, GroupOfPictures, SampleIndex, SampleMetadata,
SamplesStatistics, VideoCodec, VideoDataDescription, VideoEncodingDetails, VideoLoadError,
VideoUpdateType,
},
};

Expand Down
4 changes: 2 additions & 2 deletions crates/viewer/re_data_ui/src/blob.rs
Original file line number Diff line number Diff line change
Expand Up @@ -229,8 +229,8 @@ pub fn blob_preview_and_save_ui(
if let Some(duration) = video.data_descr().duration() {
VideoTimestamp::from_secs(time % duration.as_secs_f64())
} else {
// TODO(#7484): show something more useful here
VideoTimestamp::from_nanos(i64::MAX)
// Invalid video or unknown timescale.
VideoTimestamp::from_nanos(0)
}
});
let video_time = re_viewer_context::video_timestamp_component_to_video_time(
Expand Down
17 changes: 7 additions & 10 deletions crates/viewer/re_renderer/src/video/player.rs
Original file line number Diff line number Diff line change
Expand Up @@ -581,10 +581,7 @@ impl VideoPlayer {
// * we don't want to show the spinner too eagerly and rather give the impression of a delayed stream
// * some decoders need a certain amount of samples in the queue to produce a frame.
// See AsyncDecoder::min_num_samples_to_enqueue_ahead for more details about decoder peculiarities.
let recently_updated_video = video_description
.last_time_updated_samples
.is_some_and(|t| t.elapsed() < self.config.time_until_video_assumed_ended);
if recently_updated_video {
if !treat_video_as_finite(&self.config, video_description) {
let min_num_samples_to_enqueue_ahead =
self.sample_decoder.min_num_samples_to_enqueue_ahead();
let allowed_delay =
Expand Down Expand Up @@ -635,12 +632,12 @@ fn treat_video_as_finite(
// If this is a potentially live stream, signal the end of the video after a certain amount of time.
// This helps decoders to flush out any pending frames.
// (in particular the ffmpeg-executable based decoder profits from this as it tends to not emit the last 5~10 frames otherwise)
video_description.duration.is_some()
|| video_description
.last_time_updated_samples
.is_some_and(|last_time_updated_samples| {
last_time_updated_samples.elapsed() > config.time_until_video_assumed_ended
})
match &video_description.update_type {
re_video::VideoUpdateType::NoUpdates { .. } => false,
re_video::VideoUpdateType::Stream {
last_time_updated_samples,
} => last_time_updated_samples.elapsed() > config.time_until_video_assumed_ended,
}
}

/// Determine whether the decoder is catching up with the requested frame within a certain tolerance.
Expand Down
17 changes: 11 additions & 6 deletions crates/viewer/re_viewer_context/src/cache/video_stream_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,12 +250,13 @@ fn load_video_data_from_chunks(
codec,
encoding_details: None, // Unknown so far, we'll find out later.
timescale: timescale_for_timeline(store, timeline),
duration: None, // Streams have to be assumed to be open ended, so we don't have a duration.
update_type: re_video::VideoUpdateType::Stream {
last_time_updated_samples: Instant::now(),
},
gops: StableIndexDeque::new(),
samples: StableIndexDeque::with_capacity(sample_chunks.len()), // Number of video chunks is minimum number of samples.
samples_statistics: re_video::SamplesStatistics::NO_BFRAMES, // TODO(#10090): No b-frames for now.
mp4_tracks: Default::default(),
last_time_updated_samples: Some(Instant::now()),
};

for chunk in sample_chunks {
Expand Down Expand Up @@ -565,7 +566,9 @@ impl Cache for VideoStreamCache {
video_sample_buffers,
} = &mut *video_stream;
let video_data = video_renderer.data_descr_mut();
video_data.last_time_updated_samples = Some(Instant::now());
video_data.update_type = re_video::VideoUpdateType::Stream {
last_time_updated_samples: Instant::now(),
};

match event.kind {
re_chunk_store::ChunkStoreDiffKind::Addition => {
Expand Down Expand Up @@ -759,17 +762,19 @@ mod tests {
codec,
encoding_details,
timescale,
duration,
update_type,
gops,
samples,
samples_statistics,
mp4_tracks,
last_time_updated_samples: _,
} = data_descr.clone();

assert_eq!(codec, re_video::VideoCodec::H264);
assert_eq!(timescale, None); // Sequence timeline doesn't have a timescale.
assert_eq!(duration, None); // Open ended video.
assert!(matches!(
update_type,
re_video::VideoUpdateType::Stream { .. }
)); // Open ended video.
assert_eq!(samples_statistics, re_video::SamplesStatistics::NO_BFRAMES);
assert!(mp4_tracks.is_empty());

Expand Down
Loading