From 3c2f93aec850b279d47ccbb02f30f129a458d6c6 Mon Sep 17 00:00:00 2001 From: wm4 Date: Tue, 14 Jan 2014 17:38:21 +0100 Subject: demux_mkv: improve robustness by explicitly checking for level 1 elements Matroska makes it pretty hard to resync correctly on broken files: random data returns "valid" EBML IDs with a high probability, and when trying to skip them it's likely that you skip a random amount of data (instead of considering the element length invalid). Improve upon this by skipping known level 1 elements only. Consider everything else invalid and call the resync code. This might result in annoying behavior when Matroska adds new level 1 elements, although it won't be particularly harmful. Matroska doesn't really allow us to do better (even mkvtoolnix explicitly checks for known level 1 elements). Since we now don't always want to combine EBML element skipping and resyncing, remove ebml_read_skip_or_resync_cluster(), and make ebml_read_skip() more tolerant against skipping broken elements. Also, don't resync when reading sub-elements, and instead do resyncing when reading them results in an error. --- demux/ebml.c | 73 ++++++++++++++++++++++++++++++------------------------------ 1 file changed, 36 insertions(+), 37 deletions(-) (limited to 'demux/ebml.c') diff --git a/demux/ebml.c b/demux/ebml.c index c875ef564c..2c540ad093 100644 --- a/demux/ebml.c +++ b/demux/ebml.c @@ -41,6 +41,26 @@ #define SIZE_MAX ((size_t)-1) #endif +// Whether the id is a known Matroska level 1 element (allowed as element on +// global file level, after the level 0 MATROSKA_ID_SEGMENT). +// This (intentionally) doesn't include "global" elements. +bool ebml_is_mkv_level1_id(uint32_t id) +{ + switch (id) { + case MATROSKA_ID_SEEKHEAD: + case MATROSKA_ID_INFO: + case MATROSKA_ID_CLUSTER: + case MATROSKA_ID_TRACKS: + case MATROSKA_ID_CUES: + case MATROSKA_ID_ATTACHMENTS: + case MATROSKA_ID_CHAPTERS: + case MATROSKA_ID_TAGS: + return true; + default: + return false; + } +} + /* * Read: the element content data ID. * Return: the ID. @@ -252,21 +272,32 @@ char *ebml_read_utf8(stream_t *s, uint64_t *length) /* * Skip the current element. + * end: the end of the parent element or -1 (for robust error handling) */ -int ebml_read_skip(stream_t *s, uint64_t *length) +int ebml_read_skip(struct mp_log *log, int64_t end, stream_t *s) { uint64_t len; int l; + int64_t pos = stream_tell(s); + len = ebml_read_length(s, &l); if (len == EBML_UINT_INVALID) - return 1; - if (length) - *length = len + l; + goto invalid; + + int64_t pos2 = stream_tell(s); + if (len >= INT64_MAX - pos2 || (end > 0 && pos2 + len > end)) + goto invalid; - stream_skip(s, len); + if (!stream_skip(s, len)) + goto invalid; return 0; + +invalid: + mp_err(log, "Invalid EBML length at position %"PRId64"\n", pos); + stream_seek(s, pos); + return 1; } /* @@ -291,38 +322,6 @@ int ebml_resync_cluster(struct mp_log *log, stream_t *s) return -1; } -/* - * Skip the current element, or on error, call ebml_resync_cluster(). - * end gives the maximum possible file pos (due to EBML parent element size). - */ -int ebml_read_skip_or_resync_cluster(struct mp_log *log, int64_t end, - stream_t *s) -{ - uint64_t len; - int l; - - len = ebml_read_length(s, &l); - if (len == EBML_UINT_INVALID) - goto resync; - - int64_t pos = stream_tell(s); - - if (end >= 0 && pos + len > end) - goto resync; - - // When reading corrupted elements, len will often be a random high number, - // and stream_skip() will fail when skipping past EOF. - if (!stream_skip(s, len)) { - stream_seek(s, pos); - goto resync; - } - - return 0; - -resync: - return ebml_resync_cluster(log, s) < 0 ? -1 : 1; -} - /* * Read the next element, but only the header. The contents * are supposed to be sub-elements which can be read separately. -- cgit v1.2.3