From 814869759c59ed3ce16604837fbf55e4f5ff7392 Mon Sep 17 00:00:00 2001
From: Aman Gupta <aman@tmm1.net>
Date: Wed, 2 May 2018 19:29:11 -0700
Subject: demux, player: fix playback of sparse video streams (w/ still images)

Fixes several issues playing back mpegts with video streams marked
as having "still images". For example, see this video which has
frames only every 6s: https://s3.amazonaws.com/tmm1/music-choice.ts

Changes include:
- start playback right away, without waiting for first video frame
- do not consider the sparse video stream in demuxer underrun detection
- do not require multiple video frames for the VO
- use audio as the master stream for demuxer metadata events
- use audio stream for playback time

Signed-off-by: Aman Gupta <aman@tmm1.net>
---
 demux/demux.c      |  6 ++++--
 demux/demux_lavf.c |  5 +++++
 demux/stheader.h   |  1 +
 player/core.h      |  2 ++
 player/playloop.c  | 15 +++++++++++++--
 player/video.c     | 12 +++++++++++-
 6 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/demux/demux.c b/demux/demux.c
index afe8d2b51f..40a68eacae 100644
--- a/demux/demux.c
+++ b/demux/demux.c
@@ -282,6 +282,7 @@ struct demux_stream {
     bool eager;             // try to keep at least 1 packet queued
                             // if false, this stream is disabled, or passively
                             // read (like subtitles)
+    bool still_image;       // stream has still video images
     bool refreshing;        // finding old position after track switches
     bool eof;               // end of demuxed stream? (true if no more packets)
 
@@ -703,8 +704,9 @@ static void update_stream_selection_state(struct demux_internal *in,
     for (int n = 0; n < in->num_streams; n++) {
         struct demux_stream *s = in->streams[n]->ds;
 
+        s->still_image = s->sh->still_image;
         s->eager = s->selected && !s->sh->attached_picture;
-        if (s->eager) {
+        if (s->eager && !s->still_image) {
             any_av_streams |= s->type != STREAM_SUB;
             if (!master ||
                 (master->type == STREAM_VIDEO && s->type == STREAM_AUDIO))
@@ -2994,7 +2996,7 @@ static int cached_demux_control(struct demux_internal *in, int cmd, void *arg)
             struct demux_stream *ds = in->streams[n]->ds;
             if (ds->eager && !(!ds->queue->head && ds->eof) && !ds->ignore_eof)
             {
-                r->underrun |= !ds->reader_head && !ds->eof;
+                r->underrun |= !ds->reader_head && !ds->eof && !ds->still_image;
                 r->ts_reader = MP_PTS_MAX(r->ts_reader, ds->base_ts);
                 r->ts_end = MP_PTS_MAX(r->ts_end, ds->queue->last_ts);
                 any_packets |= !!ds->reader_head;
diff --git a/demux/demux_lavf.c b/demux/demux_lavf.c
index 2049cf6c47..555df9406d 100644
--- a/demux/demux_lavf.c
+++ b/demux/demux_lavf.c
@@ -53,6 +53,9 @@
 #ifndef AV_DISPOSITION_TIMED_THUMBNAILS
 #define AV_DISPOSITION_TIMED_THUMBNAILS 0
 #endif
+#ifndef AV_DISPOSITION_STILL_IMAGE
+#define AV_DISPOSITION_STILL_IMAGE 0
+#endif
 
 #define INITIAL_PROBE_SIZE STREAM_BUFFER_SIZE
 #define PROBE_BUF_SIZE FFMIN(STREAM_MAX_BUFFER_SIZE, 2 * 1024 * 1024)
@@ -717,6 +720,8 @@ static void handle_new_stream(demuxer_t *demuxer, int i)
             sh->forced_track = true;
         if (st->disposition & AV_DISPOSITION_DEPENDENT)
             sh->dependent_track = true;
+        if (st->disposition & AV_DISPOSITION_STILL_IMAGE)
+            sh->still_image = true;
         if (priv->format_hack.use_stream_ids)
             sh->demuxer_id = st->id;
         AVDictionaryEntry *title = av_dict_get(st->metadata, "title", NULL, 0);
diff --git a/demux/stheader.h b/demux/stheader.h
index 700ded89fa..63744487bf 100644
--- a/demux/stheader.h
+++ b/demux/stheader.h
@@ -46,6 +46,7 @@ struct sh_stream {
     bool default_track;         // container default track flag
     bool forced_track;          // container forced track flag
     bool dependent_track;       // container dependent track flag
+    bool still_image;           // video stream contains still images
     int hls_bitrate;
 
     struct mp_tags *tags;
diff --git a/player/core.h b/player/core.h
index bc6cf28ff0..71c39dcaa5 100644
--- a/player/core.h
+++ b/player/core.h
@@ -180,6 +180,8 @@ struct vo_chain {
     // - video consists of a single picture, which should be shown only once
     // - do not sync audio to video in any way
     bool is_coverart;
+    // - video consists of sparse still images
+    bool is_sparse;
 };
 
 // Like vo_chain, for audio.
diff --git a/player/playloop.c b/player/playloop.c
index 8845b58cef..f5c1fde0ef 100644
--- a/player/playloop.c
+++ b/player/playloop.c
@@ -950,7 +950,9 @@ static void handle_dummy_ticks(struct MPContext *mpctx)
 // Update current playback time.
 static void handle_playback_time(struct MPContext *mpctx)
 {
-    if (mpctx->vo_chain && !mpctx->vo_chain->is_coverart &&
+    if (mpctx->vo_chain &&
+        !mpctx->vo_chain->is_coverart &&
+        !mpctx->vo_chain->is_sparse &&
         mpctx->video_status >= STATUS_PLAYING &&
         mpctx->video_status < STATUS_EOF)
     {
@@ -986,6 +988,13 @@ static void handle_playback_restart(struct MPContext *mpctx)
 {
     struct MPOpts *opts = mpctx->opts;
 
+    // Do not wait for video stream if it only has sparse frames.
+    if (mpctx->vo_chain &&
+        mpctx->vo_chain->is_sparse &&
+        mpctx->video_status < STATUS_READY) {
+        mpctx->video_status = STATUS_READY;
+    }
+
     if (mpctx->audio_status < STATUS_READY ||
         mpctx->video_status < STATUS_READY)
         return;
@@ -1008,7 +1017,9 @@ static void handle_playback_restart(struct MPContext *mpctx)
         }
 
         // Video needed, but not started yet -> wait.
-        if (mpctx->vo_chain && !mpctx->vo_chain->is_coverart &&
+        if (mpctx->vo_chain &&
+            !mpctx->vo_chain->is_coverart &&
+            !mpctx->vo_chain->is_sparse &&
             mpctx->video_status <= STATUS_READY)
             return;
 
diff --git a/player/video.c b/player/video.c
index 17dff84984..fde92851a1 100644
--- a/player/video.c
+++ b/player/video.c
@@ -256,6 +256,7 @@ void reinit_video_chain_src(struct MPContext *mpctx, struct track *track)
         vo_c->dec_src = track->dec->f->pins[0];
         vo_c->filter->container_fps = track->dec->fps;
         vo_c->is_coverart = !!track->stream->attached_picture;
+        vo_c->is_sparse = track->stream->still_image;
 
         track->vo_c = vo_c;
         vo_c->track = track;
@@ -365,9 +366,12 @@ static void handle_new_frame(struct MPContext *mpctx)
 
     double frame_time = 0;
     double pts = mpctx->next_frames[0]->pts;
+    bool is_sparse = mpctx->vo_chain && mpctx->vo_chain->is_sparse;
+
     if (mpctx->video_pts != MP_NOPTS_VALUE) {
         frame_time = pts - mpctx->video_pts;
-        double tolerance = mpctx->demuxer->ts_resets_possible ? 5 : 1e4;
+        double tolerance = mpctx->demuxer->ts_resets_possible &&
+                           !is_sparse ? 5 : 1e4;
         if (frame_time <= 0 || frame_time >= tolerance) {
             // Assume a discontinuity.
             MP_WARN(mpctx, "Invalid video timestamp: %f -> %f\n",
@@ -403,6 +407,9 @@ static int get_req_frames(struct MPContext *mpctx, bool eof)
     if (mpctx->video_out->driver->caps & VO_CAP_NORETAIN)
         return 1;
 
+    if (mpctx->vo_chain && mpctx->vo_chain->is_sparse)
+        return 1;
+
     if (mpctx->opts->untimed || mpctx->video_out->driver->untimed)
         return 1;
 
@@ -594,6 +601,9 @@ static void update_av_diff(struct MPContext *mpctx, double offset)
         mpctx->video_status != STATUS_PLAYING)
         return;
 
+    if (mpctx->vo_chain && mpctx->vo_chain->is_sparse)
+        return;
+
     double a_pos = playing_audio_pts(mpctx);
     if (a_pos != MP_NOPTS_VALUE && mpctx->video_pts != MP_NOPTS_VALUE) {
         mpctx->last_av_difference = a_pos - mpctx->video_pts
-- 
cgit v1.2.3