11 files changed, 495 insertions, 1404 deletions
diff --git a/TOOLS/file2string.pl b/TOOLS/file2string.pl
deleted file mode 100755
index 341bb06fd6..0000000000
--- a/TOOLS/file2string.pl
+++ /dev/null
@@ -1,24 +0,0 @@
-#! /usr/bin/env perl
-
-use strict;
-use warnings;
-
-# Convert the contents of a file into a C string constant.
-# Note that the compiler will implicitly add an extra 0 byte at the end
-# of every string, so code using the string may need to remove that to get
-# the exact contents of the original file.
-# FIXME: why not a char array?
-
-# treat only alphanumeric and punctuations (excluding " and ?) as safe
-my $unsafe_chars = qr{[^][A-Za-z0-9!#%&'()*+,./:;<=>^_{|}~ -]};
-
-for my $file (@ARGV) {
-    open my $fh, '<:raw', $file or next;
-    print "/* Generated from $file */\n";
-    while (<$fh>) {
-        # replace unsafe chars with their equivalent octal escapes
-        s/($unsafe_chars)/\\@{[sprintf '%03o', ord($1)]}/gos;
-        print "\"$_\"\n"
-    }
-    close $fh;
-}
diff --git a/TOOLS/file2string.py b/TOOLS/file2string.py
new file mode 100755
index 0000000000..6cdd1a72ae
--- /dev/null
+++ b/TOOLS/file2string.py
@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+
+# Convert the contents of a file into a C string constant.
+# Note that the compiler will implicitly add an extra 0 byte at the end
+# of every string, so code using the string may need to remove that to get
+# the exact contents of the original file.
+
+import sys
+
+# Indexing a byte string yields int on Python 3.x, and a str on Python 2.x
+def pord(c):
+    return ord(c) if type(c) == str else c
+
+def main(infile):
+    conv = ['\\' + ("%03o" % c) for c in range(256)]
+    safe_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" \
+                 "0123456789!#%&'()*+,-./:;<=>?[]^_{|}~ "
+    for c in safe_chars:
+        conv[ord(c)] = c
+    for c, esc in ("\nn", "\tt", r"\\", '""'):
+        conv[ord(c)] = '\\' + esc
+    for line in infile:
+        sys.stdout.write('"' + ''.join(conv[pord(c)] for c in line) + '"\n')
+
+with open(sys.argv[1], 'rb') as infile:
+    sys.stdout.write("// Generated from %s\n\n" % sys.argv[1])
+    main(infile)
diff --git a/TOOLS/lib/Parse/Matroska.pm b/TOOLS/lib/Parse/Matroska.pm
deleted file mode 100644
index e1c08c9814..0000000000
--- a/TOOLS/lib/Parse/Matroska.pm
+++ /dev/null
@@ -1,30 +0,0 @@
-use 5.008;
-use strict;
-use warnings;
-
-# ABSTRACT: Module collection to parse Matroska files.
-package Parse::Matroska;
-
-=head1 DESCRIPTION
-
-C<use>s L<Parse::Matroska::Reader>. See the documentation
-of the modules mentioned in L</"SEE ALSO"> for more information
-in how to use this module.
-
-It's intended for this module to contain high-level interfaces
-to the other modules in the distribution.
-
-=head1 SOURCE CODE
-
-L<https://github.com/Kovensky/Parse-Matroska>
-
-=head1 SEE ALSO
-
-L<Parse::Matroska::Reader>, L<Parse::Matroska::Element>,
-L<Parse::Matroska::Definitions>.
-
-=cut
-
-use Parse::Matroska::Reader;
-
-1;
diff --git a/TOOLS/lib/Parse/Matroska/Definitions.pm b/TOOLS/lib/Parse/Matroska/Definitions.pm
deleted file mode 100644
index 5a5adcd6de..0000000000
--- a/TOOLS/lib/Parse/Matroska/Definitions.pm
+++ /dev/null
@@ -1,384 +0,0 @@
-use 5.008;
-use strict;
-use warnings;
-
-# ABSTRACT: internal EBML grammar definitions
-package Parse::Matroska::Definitions;
-
-use Parse::Matroska::Utils qw{uniq uncamelize};
-
-use Exporter;
-our @ISA       = qw{Exporter};
-our @EXPORT_OK = qw{elem_by_hexid %EBML_DEFINITION %MATROSKA_DEFINITION};
-
-=head1 SYNOPSIS
-
-    use Parse::Matroska::Definitions qw{elem_by_hexid};
-    my $ebml_id = elem_by_hexid('1a45dfa3');
-    print "EBML ID $ebml_id->{elid}'s name: $ebml_id->{name}";
-
-=head1 DESCRIPTION
-
-Contains the definition of the EBML grammar as expected in
-Matroska files. This module is meant mostly for internal use.
-
-As this was extended from a script in mpv-player, some data
-generated is apparently useless for regular module users
-but is still relevant to the mpv-player script. Such data
-is annotated as being for mpv compatibility.
-
-=head1 NOTE
-
-The API of this module is not yet considered stable.
-
-=head1 GLOBALS
-
-These global variables are considered B<immutable>.
-
-=head2 @Parse::Matroska::Definitions::global_elem_list
-
-A global list of known matroska elements. Useful for
-mpv's matroska script, used for generating C headers
-that parse matroska.
-
-=head2 %Parse::Matroska::Definitions::global_elem_dict
-
-A global hash of known matroska elements. Used internally
-by L</elem_by_hexid($id)>.
-
-=cut
-
-@Parse::Matroska::Definitions::global_elem_list = ();
-%Parse::Matroska::Definitions::global_elem_dict = ();
-
-=head2 %EBML_DEFINITION
-
-Optionally-importable hash of known EBML IDs belonging
-to the EBML generic grammar.
-
-=head2 %MATROSKA_DEFINITION
-
-Optionally-importable hash of known EBML IDs belonging
-to the Matroska-specific grammar.
-
-=cut
-
-our %EBML_DEFINITION = define_ebml();
-our %MATROSKA_DEFINITION = define_matroska();
-
-=method elem_by_hexid($id)
-
-Returns an EBML Element Definition corresponding to the provided
-hexadecimal string. Returns C<undef> if the element is unknown.
-
-=cut
-sub elem_by_hexid {
-    my ($elid) = @_;
-    return $Parse::Matroska::Definitions::global_elem_dict{$elid};
-}
-
-################################################
-### Helper functions for document definition ###
-################################################
-
-# used by elem when setting the 'valname' key
-use constant TYPE_MAP => {
-    uint    => 'uint64_t',
-    str     => 'char *',
-    binary  => 'struct bstr',
-    ebml_id => 'uint32_t',
-    float   => 'double',
-    sint    => 'int64_t',
-};
-
-# this will be localized to "MATROSKA" or "EBML" on the elem declarations
-our $ELEM_DEFINE_TYPE = undef;
-
-=method elem($name,$elid,$valtype)
-
-NOTE: never call this function yourself; it changes data structures
-that are considered immutable outside of this package.
-
-Internal API function that generates the EBML Element Definitions.
-
-This API function returns an array which first element is C<$elid>
-and the second is a generated hash. The generated hash is stored
-in the @global_elem_list and %global_elem_dict.
-
-The generated hash contains:
-
-=for :list
-= name
-The EBML Element's name, given through C<$name>.
-= elid
-The EBML Element's hex id, given through C<$elid>. Used for lookups by L</elem_by_hexid($id)>.
-= valtype
-The EBML Element's type, given through C<$valtype>, except when C<$valtype> is an arrayref.
-= multiple
-If C<$name> ends with a C<*>, this is set as true and strips the C<*> from L</name>. Used to
-mark elements that may be repeated.
-= subelements
-An arrayref of elements that may be children of this element, given through C<$valtype> if it
-is an arrayref. Sets L</valtype> to C<sub> if there are subelements.
-= subids
-An arrayref listing all the L</elid>s of subelements, C<uniq>ified.
-
-The following elements are for mpv compatibility:
-
-=for :list
-= definename
-Name used for generating C #defines.
-= fieldname
-Name used for generating C struct fields.
-= structname
-Name used for generating C struct names.
-= ebmltype
-A pre-#defined constant to describe the element's type.
-= valname
-Typename used when declaring a struct field referring to this element.
-
-=cut
-sub elem {
-    my %e = (name => shift, elid => shift, valtype => shift);
-
-    # strip * from name, set 'multiple' if there was one
-    $e{multiple} = scalar $e{name} =~ s/\*$//;
-
-    # ELEM_DEFINE_TYPE is either MATROSKA or EBML
-    $e{definename} = "${ELEM_DEFINE_TYPE}_ID_".uc($e{name});
-    $e{fieldname} = uncamelize $e{name};
-    $e{structname} = "ebml_$e{fieldname}";
-
-    if (ref $e{valtype} eq 'HASH') {
-        $e{subelements} = $e{valtype};
-        $e{subids} = uniq map { $_->{elid} } values %{$e{subelements}};
-        $e{valtype} = 'sub';
-        $e{ebmltype} = 'EBML_TYPE_SUBELEMENTS';
-        $e{valname} = "struct $e{structname}";
-    } else {
-        $e{ebmltype} = "EBML_TYPE_\U$e{valtype}";
-        die "Unrecognized value type $e{valtype}" unless
-            defined ($e{valname} = TYPE_MAP->{$e{valtype}});
-    }
-    my $e = \%e;
-    push @Parse::Matroska::Definitions::global_elem_list, $e;
-    $Parse::Matroska::Definitions::global_elem_dict{$e{elid}} = $e;
-    return ($e{elid}, $e);
-}
-
-#############################################
-### EBML and Matroska document definitons ###
-#############################################
-
-=method define_ebml
-
-Internal function that defines the EBML generic grammar.
-
-Must not be called from outside the package.
-
-=cut
-sub define_ebml {
-    local $ELEM_DEFINE_TYPE = 'EBML';
-    return (
-        elem('EBML', '1a45dfa3', {
-            elem('EBMLVersion',        '4286', 'uint'),
-            elem('EBMLReadVersion',    '42f7', 'uint'),
-            elem('EBMLMaxIDLength',    '42f2', 'uint'),
-            elem('EBMLMaxSizeLength',  '42f3', 'uint'),
-            elem('DocType',            '4282', 'str'),
-            elem('DocTypeVersion',     '4287', 'uint'),
-            elem('DocTypeReadVersion', '4285', 'uint'),
-        }),
-
-        elem('CRC32',      'bf', 'binary'),
-        elem('Void',       'ec', 'binary'),
-    );
-}
-
-
-=method define_matroska
-
-Internal function that defines the Matroska-specific EBML grammar.
-
-Must not be called from outside the package.
-
-=cut
-sub define_matroska {
-    local $ELEM_DEFINE_TYPE = 'MATROSKA';
-    return (
-        elem('Segment', '18538067', {
-            elem('SeekHead*', '114d9b74', {
-                elem('Seek*', '4dbb', {
-                    elem('SeekID',       '53ab', 'ebml_id'),
-                    elem('SeekPosition', '53ac', 'uint'),
-                }),
-            }),
-
-            elem('Info*', '1549a966', {
-                elem('SegmentUID',      '73a4', 'binary'),
-                elem('PrevUID',       '3cb923', 'binary'),
-                elem('NextUID',       '3eb923', 'binary'),
-                elem('TimecodeScale', '2ad7b1', 'uint'),
-                elem('DateUTC',         '4461', 'sint'),
-                elem('Title',           '7ba9', 'str'),
-                elem('MuxingApp',       '4d80', 'str'),
-                elem('WritingApp',      '5741', 'str'),
-                elem('Duration',        '4489', 'float'),
-            }),
-
-            elem('Cluster*', '1f43b675', {
-                elem('Timecode', 'e7', 'uint'),
-                elem('BlockGroup*', 'a0', {
-                    elem('Block',           'a1', 'binary'),
-                    elem('BlockDuration',   '9b', 'uint'),
-                    elem('ReferenceBlock*', 'fb', 'sint'),
-                    elem('DiscardPadding',  '75A2', 'sint'),
-                }),
-                elem('SimpleBlock*', 'a3', 'binary'),
-            }),
-
-            elem('Tracks*', '1654ae6b', {
-                elem('TrackEntry*', 'ae', {
-                    elem('TrackNumber',            'd7', 'uint'),
-                    elem('TrackUID',             '73c5', 'uint'),
-                    elem('TrackType',              '83', 'uint'),
-                    elem('FlagEnabled',            'b9', 'uint'),
-                    elem('FlagDefault',            '88', 'uint'),
-                    elem('FlagForced',           '55aa', 'uint'),
-                    elem('FlagLacing',             '9c', 'uint'),
-                    elem('MinCache',             '6de7', 'uint'),
-                    elem('MaxCache',             '6df8', 'uint'),
-                    elem('DefaultDuration',    '23e383', 'uint'),
-                    elem('TrackTimecodeScale', '23314f', 'float'),
-                    elem('MaxBlockAdditionID',   '55ee', 'uint'),
-                    elem('Name',                 '536e', 'str'),
-                    elem('Language',           '22b59c', 'str'),
-                    elem('CodecID',                '86', 'str'),
-                    elem('CodecPrivate',         '63a2', 'binary'),
-                    elem('CodecName',          '258688', 'str'),
-                    elem('CodecDecodeAll',         'aa', 'uint'),
-                    elem('CodecDelay',           '56AA', 'uint'),
-                    elem('SeekPreRoll',          '56BB', 'uint'),
-                    elem('Video', 'e0', {
-                        elem('FlagInterlaced',  '9a', 'uint'),
-                        elem('PixelWidth',      'b0', 'uint'),
-                        elem('PixelHeight',     'ba', 'uint'),
-                        elem('DisplayWidth',  '54b0', 'uint'),
-                        elem('DisplayHeight', '54ba', 'uint'),
-                        elem('DisplayUnit',   '54b2', 'uint'),
-                        elem('FrameRate',   '2383e3', 'float'),
-                        elem('ColourSpace', '2eb524', 'binary'),
-                        elem('StereoMode',    '53b8', 'uint'),
-                        elem('Colour',        '55B0', {
-                            elem('MatrixCoefficients',      '55B1', 'uint'),
-                            elem('BitsPerChannel',          '55B2', 'uint'),
-                            elem('ChromaSubsamplingHorz',   '55B3', 'uint'),
-                            elem('ChromaSubsamplingVert',   '55B4', 'uint'),
-                            elem('CbSubsamplingHorz',       '55B5', 'uint'),
-                            elem('CbSubsamplingVert',       '55B6', 'uint'),
-                            elem('ChromaSitingHorz',        '55B7', 'uint'),
-                            elem('ChromaSitingVert',        '55B8', 'uint'),
-                            elem('Range',                   '55B9', 'uint'),
-                            elem('TransferCharacteristics', '55BA', 'uint'),
-                            elem('Primaries',               '55BB', 'uint'),
-                            elem('MaxCLL',                  '55BC', 'uint'),
-                            elem('MaxFALL',                 '55BD', 'uint'),
-                            elem('MasteringMetadata',       '55D0', {
-                                elem('PrimaryRChromaticityX',   '55D1', 'float'),
-                                elem('PrimaryRChromaticityY',   '55D2', 'float'),
-                                elem('PrimaryGChromaticityX',   '55D3', 'float'),
-                                elem('PrimaryGChromaticityY',   '55D4', 'float'),
-                                elem('PrimaryBChromaticityX',   '55D5', 'float'),
-                                elem('PrimaryBChromaticityY',   '55D6', 'float'),
-                                elem('WhitePointChromaticityX', '55D7', 'float'),
-                                elem('WhitePointChromaticityY', '55D8', 'float'),
-                                elem('LuminanceMax',            '55D9', 'float'),
-                                elem('LuminanceMin',            '55DA', 'float'),
-                            }),
-                        }),
-                    }),
-                    elem('Audio', 'e1', {
-                        elem('SamplingFrequency',         'b5', 'float'),
-                        elem('OutputSamplingFrequency', '78b5', 'float'),
-                        elem('Channels',                  '9f', 'uint'),
-                        elem('BitDepth',                '6264', 'uint'),
-                    }),
-                    elem('ContentEncodings', '6d80', {
-                        elem('ContentEncoding*', '6240', {
-                            elem('ContentEncodingOrder', '5031', 'uint'),
-                            elem('ContentEncodingScope', '5032', 'uint'),
-                            elem('ContentEncodingType',  '5033', 'uint'),
-                            elem('ContentCompression', '5034', {
-                                elem('ContentCompAlgo',     '4254', 'uint'),
-                                elem('ContentCompSettings', '4255', 'binary'),
-                            }),
-                        }),
-                    }),
-                }),
-            }),
-
-            elem('Cues', '1c53bb6b', {
-                elem('CuePoint*', 'bb', {
-                    elem('CueTime', 'b3', 'uint'),
-                    elem('CueTrackPositions*', 'b7', {
-                        elem('CueTrack',           'f7', 'uint'),
-                        elem('CueClusterPosition', 'f1', 'uint'),
-                        elem('CueRelativePosition','f0', 'uint'),
-                        elem('CueDuration',        'b2', 'uint'),
-                    }),
-                }),
-            }),
-
-            elem('Attachments', '1941a469', {
-                elem('AttachedFile*', '61a7', {
-                    elem('FileDescription', '467e', 'str'),
-                    elem('FileName',        '466e', 'str'),
-                    elem('FileMimeType',    '4660', 'str'),
-                    elem('FileData',        '465c', 'binary'),
-                    elem('FileUID',         '46ae', 'uint'),
-                }),
-            }),
-
-            elem('Chapters', '1043a770', {
-                elem('EditionEntry*', '45b9', {
-                    elem('EditionUID',         '45bc', 'uint'),
-                    elem('EditionFlagHidden',  '45bd', 'uint'),
-                    elem('EditionFlagDefault', '45db', 'uint'),
-                    elem('EditionFlagOrdered', '45dd', 'uint'),
-                    elem('ChapterAtom*', 'b6', {
-                        elem('ChapterUID',               '73c4', 'uint'),
-                        elem('ChapterTimeStart',           '91', 'uint'),
-                        elem('ChapterTimeEnd',             '92', 'uint'),
-                        elem('ChapterFlagHidden',          '98', 'uint'),
-                        elem('ChapterFlagEnabled',       '4598', 'uint'),
-                        elem('ChapterSegmentUID',        '6e67', 'binary'),
-                        elem('ChapterSegmentEditionUID', '6ebc', 'uint'),
-                        elem('ChapterDisplay*', '80', {
-                            elem('ChapString',      '85', 'str'),
-                            elem('ChapLanguage*', '437c', 'str'),
-                            elem('ChapCountry*',  '437e', 'str'),
-                        }),
-                    }),
-                }),
-            }),
-            elem('Tags*', '1254c367', {
-                elem('Tag*', '7373', {
-                    elem('Targets', '63c0', {
-                        elem('TargetTypeValue',     '68ca', 'uint'),
-                        elem('TargetTrackUID',      '63c5', 'uint'),
-                        elem('TargetEditionUID',    '63c9', 'uint'),
-                        elem('TargetChapterUID',    '63c4', 'uint'),
-                        elem('TargetAttachmentUID', '63c6', 'uint'),
-                     }),
-                    elem('SimpleTag*', '67c8', {
-                        elem('TagName',     '45a3', 'str'),
-                        elem('TagLanguage', '447a', 'str'),
-                        elem('TagString',   '4487', 'str'),
-                    }),
-                }),
-            }),
-        }),
-    );
-}
-
-1;
diff --git a/TOOLS/lib/Parse/Matroska/Element.pm b/TOOLS/lib/Parse/Matroska/Element.pm
deleted file mode 100644
index fa0830c11e..0000000000
--- a/TOOLS/lib/Parse/Matroska/Element.pm
+++ /dev/null
@@ -1,331 +0,0 @@
-use 5.008;
-use strict;
-use warnings;
-
-# ABSTRACT: a mid-level representation of an EBML element
-package Parse::Matroska::Element;
-
-use Carp;
-use List::Util qw{first};
-
-=head1 SYNOPSIS
-
-    use Parse::Matroska::Reader;
-    my $reader = Parse::Matroska::Reader->new($path);
-    my $elem = $reader->read_element;
-
-    print "ID: $elem->{elid}\n";
-    print "Name: $elem->{name}\n";
-    print "Length: $elem->{content_len}\n";
-    print "Type: $elem->{type}\n";
-    print "Child count: ", scalar(@{$elem->all_children}), "\n";
-    if ($elem->{type} eq 'sub') {
-        while (my $chld = $elem->next_child) {
-            print "Child Name: $chld->{name}\n";
-        }
-    } else {
-        print "Value: ", $elem->get_value, "\n";
-    }
-
-=head1 DESCRIPTION
-
-Represents a single Matroska element as decoded by
-L<Parse::Matroska::Reader>. This is essentially a hash
-augmented with functions for delay-loading of binary
-values and children elements.
-
-=head1 NOTE
-
-The API of this module is not yet considered stable.
-
-=attr elid
-
-The EBML Element ID, suitable for passing to
-L<Parse::Matroska::Definitions/elem_by_hexid>.
-
-=attr name
-
-The EBML Element's name.
-
-=attr type
-
-The EBML Element's type. Can be C<uint>, C<sint>,
-C<float>, C<ebml_id>, C<str> or C<binary>. See L</value>
-for details.
-
-Equivalent to
-C<elem_by_hexid($elem-E<gt>{value})-E<gt>{valtype}>.
-
-=attr value
-
-The EBML Element's value. Should be obtained through
-L</get_value>.
-
-Is an unicode string if the L</type> is C<str>, that is,
-the string has already been decoded by L<Encode/decode>.
-
-Is C<undef> if the L</type> is C<binary> and the contents
-were delay-loaded and not yet read. L</get_value> will
-do the delayed load if needed.
-
-Is an arrayref if the L</type> is C<sub>, containing
-the children nodes that were already loaded.
-
-Is a hashref if the L</type> is C<ebml_id>, containing
-the referred element's information as defined in
-L<Parse::Matroska::Definitions>. Calling
-C<elem_by_hexid($elem-E<gt>{value}-E<gt>{elid})> will
-return the same object as $elem->{value}.
-
-=attr full_len
-
-The entire length of this EBML Element, including
-the header's.
-
-=attr size_len
-
-The length of the size marker. Used when calculating
-L</full_len> from L</content_len>
-
-=attr content_len
-
-The length of the contents of this EBML Element,
-which excludes the header.
-
-=attr reader
-
-A weakened reference to the associated
-L<Parse::Matroska::Reader>.
-
-=method new(%hash)
-
-Creates a new Element initialized with the hash
-given as argument.
-
-=cut
-sub new {
-    my $class = shift;
-    my $self = {};
-    bless $self, $class;
-
-    $self->initialize(@_);
-    return $self;
-}
-
-=method initialize(%hash)
-
-Called by L</new> on initialization.
-
-=cut
-sub initialize {
-    my ($self, %args) = @_;
-    for (keys %args) {
-        $self->{$_} = $args{$_};
-    }
-    $self->{depth} = 0 unless $self->{depth};
-}
-
-=method skip
-
-Called by the user to ignore the contents of this EBML node.
-Needed when ignoring the children of a node.
-
-=cut
-sub skip {
-    my ($self) = @_;
-    my $reader = $self->{reader};
-    return unless $reader; # we don't have to skip if there's no reader
-    my $pos = $reader->getpos;
-    croak "Too late to skip, reads were already done"
-        if $pos ne $self->{data_pos};
-    $reader->skip($self->{content_len});
-}
-
-=method get_value($keep_bin)
-
-Returns the value contained by this EBML element.
-
-If the element has children, returns an arrayref to
-the children elements that were already encountered.
-
-If the element's type is C<binary> and the value was
-delay-loaded, does the reading now.
-
-If $keep_bin is true, the delay-loaded data is kept
-as the L</value>, otherwise, further calls to
-C<get_value> will reread the data from the L</reader>.
-
-=cut
-sub get_value {
-    my ($self, $keep_bin) = @_;
-
-    return undef if $self->{type} eq 'skip';
-    return $self->{value} if $self->{value};
-
-    my $reader = $self->{reader} or
-        croak "The associated Reader has been deleted";
-
-    # delay-loaded 'binary'
-    if ($self->{type} eq 'binary') {
-        croak "Cannot seek in the current Reader" unless $self->{data_pos};
-        # seek to the data position...
-        $reader->setpos($self->{data_pos});
-        # read the data, keeping it in value if requested
-        if ($keep_bin) {
-            $self->{value} = $reader->readlen($self->{content_len});
-            return $self->{value};
-        } else {
-            return $reader->readlen($self->{content_len});
-        }
-    }
-}
-
-=method next_child($read_bin)
-
-Builtin iterator; reads and returns the next child element.
-Always returns undef if the type isn't C<sub>.
-
-Returns undef at the end of the iterator and resets itself to
-point to the first element; so calling L</next_child($read_bin)>
-after the iterator returned C<undef> will return the first child.
-
-The optional C<$read_bin> parameter has the children elements
-not delay-load their value if their type is C<binary>.
-
-If all children elements have already been read, return
-each element in-order as would be given by
-L</all_children($recurse,$read_bin)>.
-
-=cut
-sub next_child {
-    my ($self, $read_bin) = @_;
-    return unless $self->{type} eq 'sub';
-
-    if ($self->{_all_children_read}) {
-        my $idx = $self->{_last_child} ||= 0;
-        if ($idx == @{$self->{value}}) {
-            # reset the iterator, returning undef once
-            $self->{_last_child} = 0;
-            return;
-        }
-        my $ret = $self->{value}->[$idx];
-
-        ++$idx;
-        $self->{_last_child} = $idx;
-        return $ret;
-    }
-
-    my $len = defined $self->{remaining_len}
-        ? $self->{remaining_len}
-        : $self->{content_len};
-
-    if ($len == 0) {
-        # we've read all children; switch into $self->{value} iteration mode
-        $self->{_all_children_read} = 1;
-        # return undef since the iterator will reset
-        return;
-    }
-
-    $self->{pos_offset} ||= 0;
-    my $pos = $self->{data_pos};
-    my $reader = $self->{reader} or croak "The associated reader has been deleted";
-    $reader->setpos($pos);
-    $reader->{fh}->seek($self->{pos_offset}, 1) if $pos;
-
-    my $chld = $reader->read_element($read_bin);
-    return undef unless defined $chld;
-    $self->{pos_offset} += $chld->{full_len};
-
-    $self->{remaining_len} = $len - $chld->{full_len};
-
-    if ($self->{remaining_len} < 0) {
-        croak "Child elements consumed $self->{remaining_len} more bytes than parent $self->{name} contained";
-    }
-
-    $chld->{depth} = $self->{depth} + 1;
-    $self->{value} ||= [];
-
-    push @{$self->{value}}, $chld;
-
-    return $chld;
-}
-
-=method all_children($recurse,$read_bin)
-
-Calls L</populate_children($recurse,$read_bin)> on self
-and returns an arrayref with the children nodes.
-
-Both C<$recurse> and C<$read_bin> are optional and default
-to false.
-
-=cut
-sub all_children {
-    my ($self, $recurse, $read_bin) = @_;
-    $self->populate_children($recurse, $read_bin);
-    return $self->{value};
-}
-
-=method children_by_name($name)
-
-Searches in the already read children elements for all
-elements with the EBML name C<$name>. Returns an array
-containing all found elements. On scalar context,
-returns only the first element found.
-
-Croaks if the element's C<type> isn't C<sub>.
-
-=cut
-sub children_by_name {
-    my ($self, $name) = @_;
-    return unless defined wantarray; # don't do work if work isn't wanted
-    croak "Element can't have children" unless $self->{type} eq 'sub';
-
-    my @found = grep { $_->{name} eq $name } @{$self->{value}};
-    return @found       if wantarray;         # list
-    return shift @found if defined wantarray; # scalar
-}
-
-=method populate_children($recurse,$read_bin)
-
-Populates the internal array of children elements, that is,
-requests that the associated L<Matroska::Parser::Reader> reads
-all children elements. Returns itself.
-
-Returns false if the element's C<type> isn't C<sub>.
-
-If C<$recurse> is provided and is true, the method will call
-itself in the children elements with the same parameters it
-received; this will build a full EBML tree.
-
-If C<$read_bin> is provided and is true, disables delay-loading
-of the contents of C<binary>-type nodes, reading the contents
-to memory.
-
-If both C<$recurse> and C<$read_bin> are true, entire EBML trees
-can be loaded without requiring seeks, thus behaving correctly
-on unseekable streams. If C<$read_bin> is false, the entire EBML
-tree is still loaded, but calling L</get_value> on C<binary>-type
-nodes will produce an error on unseekable streams.
-
-=cut
-sub populate_children {
-    my ($self, $recurse, $read_bin) = @_;
-
-    return unless $self->{type} eq 'sub';
-
-    if (@{$self->{value}} && $recurse) {
-        # only recurse
-        foreach (@{$self->{value}}) {
-            $_->populate_children($recurse, $read_bin);
-        }
-        return $self;
-    }
-
-    while (my $chld = $self->next_child($read_bin)) {
-        $chld->populate_children($recurse, $read_bin) if $recurse;
-    }
-
-    return $self;
-}
-
-1;
diff --git a/TOOLS/lib/Parse/Matroska/Reader.pm b/TOOLS/lib/Parse/Matroska/Reader.pm
deleted file mode 100644
index 614b7b12c0..0000000000
--- a/TOOLS/lib/Parse/Matroska/Reader.pm
+++ /dev/null
@@ -1,426 +0,0 @@
-use 5.008;
-use strict;
-use warnings;
-
-# ABSTRACT: a low-level reader for EBML files
-package Parse::Matroska::Reader;
-
-use Parse::Matroska::Definitions qw{elem_by_hexid};
-use Parse::Matroska::Element;
-
-use Carp;
-use Scalar::Util qw{openhandle weaken};
-use IO::Handle;
-use IO::File;
-use List::Util qw{first};
-use Encode;
-
-use constant BIGINT_TRY => 'Pari,GMP,FastCalc';
-use Math::BigInt try => BIGINT_TRY;
-use Math::BigRat try => BIGINT_TRY;
-
-=head1 SYNOPSIS
-
-    use Parse::Matroska::Reader;
-    my $reader = Parse::Matroska::Reader->new($path);
-    $reader->close;
-    $reader->open(\$string_with_matroska_data);
-
-    my $elem = $reader->read_element;
-    print "Element ID: $elem->{elid}\n";
-    print "Element name: $elem->{name}\n";
-    if ($elem->{type} ne 'sub') {
-        print "Element value: $elem->get_value\n";
-    } else {
-        while (my $child = $elem->next_child) {
-            print "Child element: $child->{name}\n";
-        }
-    }
-    $reader->close;
-
-=head1 DESCRIPTION
-
-Reads EBML data, which is used in Matroska files.
-This is a low-level reader which is meant to be used as a backend
-for higher level readers. TODO: write the high level readers :)
-
-=head1 NOTE
-
-The API of this module is not yet considered stable.
-
-=method new
-
-Creates a new reader.
-Calls L</open($arg)> with its arguments if provided.
-
-=cut
-sub new {
-    my $class = shift;
-    my $self = {};
-    bless $self, $class;
-
-    $self->open(@_) if @_;
-    return $self;
-}
-
-=method open($arg)
-
-Creates the internal filehandle. The argument can be:
-
-=for :list
-* An open filehandle or L<IO::Handle> object.
-The filehandle is not C<dup()>ed, so calling L</close> in this
-object will close the given filehandle as well.
-* A scalar containing a path to a file.
-* On perl v5.14 or newer, a scalarref pointing to EBML data.
-For similar functionality in older perls, give an L<IO::String> object
-or the handle to an already C<open>ed scalarref.
-
-=cut
-sub open {
-    my ($self, $arg) = @_;
-    $self->{fh} = openhandle($arg) || IO::File->new($arg, "<:raw")
-        or croak "Can't open $arg: $!";
-}
-
-=method close
-
-Closes the internal filehandle.
-
-=cut
-sub close {
-    my ($self) = @_;
-    $self->{fh}->close;
-    delete $self->{fh};
-}
-
-# equivalent to $self->readlen(1), possibly faster
-sub _getc {
-    my ($self) = @_;
-    my $c = $self->{fh}->getc;
-    croak "Can't do read of length 1: $!" if !defined $c && $!;
-    return $c;
-}
-
-=method readlen($length)
-
-Reads C<$length> bytes from the internal filehandle.
-
-=cut
-sub readlen {
-    my ($self, $len) = @_;
-    my $data;
-    my $readlen = $self->{fh}->read($data, $len);
-    croak "Can't do read of length $len: $!"
-                 unless defined $readlen;
-    return $data;
-}
-
-# converts a byte string into an integer
-# we do so by converting the integer into a hex string (big-endian)
-# and then reading the hex-string into an integer
-sub _bin2int($) {
-    my ($bin) = @_;
-    # if the length is larger than 3
-    # the resulting integer might be larger than INT_MAX
-    if (length($bin) > 3) {
-        return Math::BigInt->from_hex(unpack("H*", $bin));
-    }
-    return hex(unpack("H*", $bin));
-}
-
-# creates a floating-point number with the given mantissa and exponent
-sub _ldexp {
-    my ($mantissa, $exponent) = @_;
-    my $r = new Math::BigRat($mantissa);
-    return $r * Math::BigRat->new(2)**$exponent;
-}
-
-# NOTE: the read_* functions are hard to read because they're ports
-# of even harder to read python functions.
-# TODO: make them readable
-
-=method read_id
-
-Reads an EBML ID atom in hexadecimal string format, suitable
-for passing to L<Parse::Matroska::Definitions/elem_by_hexid($id)>.
-
-=cut
-sub read_id {
-    my ($self) = @_;
-    my $t = $self->_getc;
-    return undef unless defined $t;
-    my $i = 0;
-    my $mask = 1<<7;
-
-    if (ord($t) == 0) {
-        croak "Matroska Syntax error: first byte of ID was \\0"
-    }
-    until (ord($t) & $mask) {
-        ++$i;
-        $mask >>= 1;
-    }
-    # return hex string of the bytes we just read
-    return unpack "H*", ($t . $self->readlen($i));
-}
-
-=method read_size
-
-Reads an EBML Data Size atom, which immediately follows
-an EBML ID atom.
-
-This returns an array consisting of:
-
-=for :list
-0. The length of the Data Size atom.
-1. The value encoded in the Data Size atom, which is the length of all the data following it.
-
-=cut
-sub read_size {
-    my ($self) = @_;
-    my $t = $self->_getc;
-    my $i = 0;
-    my $mask = 1<<7;
-
-    if (ord($t) == 0) {
-        croak "Matroska Syntax error: first byte of data size was \\0"
-    }
-    until (ord($t) & $mask) {
-        ++$i;
-        $mask >>= 1;
-    }
-    $t = $t & chr($mask-1); # strip length bits (keep only significant bits)
-    return ($i+1, _bin2int $t . $self->readlen($i));
-}
-
-=method read_str($length)
-
-Reads a string of length C<$length> bytes from the internal filehandle.
-The string is already L<Encode/decode>d from C<UTF-8>, which is the
-standard Matroska string encoding.
-
-=cut
-{
-    my $utf8 = find_encoding("UTF-8");
-    sub read_str {
-        my ($self, $length) = @_;
-        return $utf8->decode($self->readlen($length));
-    }
-}
-
-=method read_uint($length)
-
-Reads an unsigned integer of length C<$length> bytes
-from the internal filehandle.
-
-Returns a L<Math::BigInt> object if C<$length> is greater
-than 4.
-
-=cut
-sub read_uint {
-    my ($self, $length) = @_;
-    return _bin2int $self->readlen($length);
-}
-
-=method read_sint($length)
-
-Reads a signed integer of length C<$length> bytes
-from the internal filehandle.
-
-Returns a L<Math::BigInt> object if C<$length> is greater
-than 4.
-
-=cut
-sub read_sint {
-    my ($self, $length) = @_;
-    my $i = $self->read_uint($length);
-
-    # Apply 2's complement to the unsigned int
-    my $mask = int(2 ** ($length * 8 - 1));
-    # if the most significant bit is set...
-    if ($i & $mask) {
-        # subtract the MSB twice
-        $i -= 2 * $mask;
-    }
-    return $i;
-}
-
-=method read_float($length)
-
-Reads an IEEE floating point number of length C<$length>
-bytes from the internal filehandle.
-
-Only lengths C<4> and C<8> are supported (C C<float> and C<double>).
-
-=cut
-{
-    my $b1 = new Math::BigInt 1;
-
-    sub read_float {
-        my ($self, $length) = @_;
-        my $i = new Math::BigInt $self->read_uint($length)->bstr;
-        my $f;
-
-        # These evil expressions reinterpret an unsigned int as IEEE binary floats
-        if ($length == 4) {
-            $f = _ldexp(($i & ((1<<23) - 1)) + (1<<23), ($i>>23 & ((1<<8) - 1)) - 150);
-            $f = -$f if $i & ($b1<<31);
-        } elsif ($length == 8) {
-            $f = _ldexp(($i & (($b1<<52) - 1)) + ($b1<<52), ($i>>52 & ((1<<12) - 1)) - 1075);
-            $f = -$f if $i & ($b1<<63);
-        } else {
-            croak "Matroska Syntax error: unsupported IEEE float byte size $length";
-        }
-
-        return $f;
-    }
-}
-
-=method read_ebml_id($length)
-
-Reads an EBML ID when it's encoded as the data inside another
-EBML element, that is, when the enclosing element's C<type> is
-C<ebml_id>.
-
-This returns a hashref with the EBML element description as
-defined in L<Parse::Matroska::Definitions>.
-
-=cut
-sub read_ebml_id {
-    my ($self, $length) = @_;
-    return elem_by_hexid(unpack("H*", $self->readlen($length)));
-}
-
-=method skip($length)
-
-Skips C<$length> bytes in the internal filehandle.
-
-=cut
-sub skip {
-    my ($self, $len) = @_;
-    return if $self->{fh}->can('seek') && $self->{fh}->seek($len, 1);
-    $self->readlen($len);
-    return;
-}
-
-=method getpos
-
-Wrapper for L<IO::Seekable/$io-E<gt>getpos> in the internal filehandle.
-
-Returns undef if the internal filehandle can't C<getpos>.
-
-=cut
-sub getpos {
-    my ($self) = @_;
-    return undef unless $self->{fh}->can('getpos');
-    return $self->{fh}->getpos;
-}
-
-=method setpos($pos)
-
-Wrapper for L<IO::Seekable/$io-E<gt>setpos> in the internal filehandle.
-
-Returns C<undef> if the internal filehandle can't C<setpos>.
-
-Croaks if C<setpos> does not seek to the requested position,
-that is, if calling C<getpos> does not yield the same object
-as the C<$pos> argument.
-
-=cut
-sub setpos {
-    my ($self, $pos) = @_;
-    return undef unless $pos && $self->{fh}->can('setpos');
-
-    my $ret = $self->{fh}->setpos($pos);
-    croak "Cannot seek to correct position"
-        unless $self->getpos eq $pos;
-    return $ret;
-}
-
-=method read_element($read_bin)
-
-Reads a full EBML element from the internal filehandle.
-
-Returns a L<Parse::Matroska::Element> object initialized with
-the read data. If C<read_bin> is not present or is false, will
-delay-load the contents of C<binary> type elements, that is,
-they will only be loaded when calling C<get_value> on the
-returned L<Parse::Matroska::Element> object.
-
-Does not read the children of the element if its type is
-C<sub>. Look into the L<Parse::Matroska::Element> interface
-for details in how to read children elements.
-
-Pass a true C<$read_bin> if the stream being read is not
-seekable (C<getpos> is undef) and the contents of C<binary>
-elements is desired, otherwise seeking errors or internal
-filehandle corruption might occur.
-
-=cut
-sub read_element {
-    my ($self, $read_bin) = @_;
-    return undef if $self->{fh}->eof;
-
-    my $elem_pos = $self->getpos;
-
-    my $elid = $self->read_id;
-    my $elem_def = elem_by_hexid($elid);
-    my ($size_len, $content_len) = $self->read_size;
-    my $full_len = length($elid)/2 + $size_len + $content_len;
-
-    my $elem = Parse::Matroska::Element->new(
-        elid => $elid,
-        name => $elem_def && $elem_def->{name},
-        type => $elem_def && $elem_def->{valtype},
-        size_len => $size_len,
-        content_len => $content_len,
-        full_len => $full_len,
-        reader => $self,
-        elem_pos => $elem_pos,
-        data_pos => $self->getpos,
-        );
-    weaken($elem->{reader});
-
-    if (defined $elem_def) {
-        if ($elem->{type} eq 'sub') {
-            $elem->{value} = [];
-        } elsif ($elem->{type} eq 'str') {
-            $elem->{value} = $self->read_str($content_len);
-        } elsif ($elem->{type} eq 'ebml_id') {
-            $elem->{value} = $self->read_ebml_id($content_len);
-        } elsif ($elem->{type} eq 'uint') {
-            $elem->{value} = $self->read_uint($content_len);
-        } elsif ($elem->{type} eq 'sint') {
-            $elem->{value} = $self->read_sint($content_len);
-        } elsif ($elem->{type} eq 'float') {
-            $elem->{value} = $self->read_float($content_len);
-        } elsif ($elem->{type} eq 'skip') {
-            $self->skip($content_len);
-        } elsif ($elem->{type} eq 'binary') {
-            if ($read_bin) {
-                $elem->{value} = $self->readlen($content_len);
-            } else {
-                $self->skip($content_len);
-            }
-        } else {
-            die "Matroska Definition error: type $elem->{valtype} unknown"
-        }
-    } else {
-        $self->skip($content_len);
-    }
-    return $elem;
-}
-
-1;
-
-=head1 CAVEATS
-
-Children elements have to be processed as soon as an element
-with children is found, or their children ignored with
-L<Parse::Matroska::Element/skip>. Not doing so doesn't cause
-errors but results in an invalid structure, with constant C<0>
-depth.
-
-To work correctly in unseekable streams, either the contents
-of C<binary>-type elements has to be ignored or the C<read_bin>
-flag to C<read_element> has to be true.
diff --git a/TOOLS/lib/Parse/Matroska/Utils.pm b/TOOLS/lib/Parse/Matroska/Utils.pm
deleted file mode 100644
index 127d626cb1..0000000000
--- a/TOOLS/lib/Parse/Matroska/Utils.pm
+++ /dev/null
@@ -1,37 +0,0 @@
-use strict;
-use warnings;
-
-# ABSTRACT: internally-used helper functions
-package Parse::Matroska::Utils;
-
-use Exporter;
-our @ISA       = qw{Exporter};
-our @EXPORT_OK = qw{uniq uncamelize};
-
-=method uniq(@array)
-
-The same as L<List::MoreUtils/"uniq LIST">.
-Included to avoid depending on it since it's
-not a core module.
-
-=cut
-sub uniq(@) {
-  my %seen;
-  return grep { !$seen{$_}++ } @_;
-}
-
-=method uncamelize($string)
-
-Converts a "StringLikeTHIS" into a
-"string_like_this".
-
-=cut
-sub uncamelize($) {
-    local $_ = shift;
-    # lc followed by UC: lc_UC
-    s/(?<=[a-z])([A-Z])/_\L$1/g;
-    # UC followed by two lc: _UClclc
-    s/([A-Z])(?=[a-z]{2})/_\L$1/g;
-    # strip leading _ that the second regexp might add; lowercase all
-    s/^_//; lc
-}
diff --git a/TOOLS/matroska.pl b/TOOLS/matroska.pl
deleted file mode 100755
index 41e4f6aa81..0000000000
--- a/TOOLS/matroska.pl
+++ /dev/null
@@ -1,169 +0,0 @@
-#! /usr/bin/env perl
-
-# Generate C definitions for parsing Matroska files.
-
-use strict;
-use warnings;
-
-use FindBin;
-use lib "$FindBin::Bin/lib";
-use Parse::Matroska::Definitions;
-use Parse::Matroska::Reader;
-
-use Getopt::Long;
-use List::Util qw{max};
-
-my @global_elem_list = @Parse::Matroska::Definitions::global_elem_list;
-
-Getopt::Long::Configure(qw{auto_version auto_help});
-my %opt;
-GetOptions(\%opt,
-    "generate-header",
-    "generate-definitions",
-    "full",
-    );
-
-if ($opt{"generate-header"}) {
-    generate_c_header();
-} elsif ($opt{"generate-definitions"}) {
-    generate_c_definitions();
-} else {
-    for (@ARGV) {
-        my $reader = Parse::Matroska::Reader->new($_ eq '-' ? \*STDIN : $_) or die $!;
-        while (my $elem = $reader->read_element($_ eq '-')) {
-            process_elem($elem, $_ eq '-');
-        }
-    }
-}
-
-# Generate declarations for libmpdemux/ebml_types.h
-sub generate_c_header {
-    print "/* Generated by TOOLS/matroska.pl, do not edit manually */\n\n";
-
-    # Write a #define for the ElementID of each known element
-    for my $el (@global_elem_list) {
-        printf "#define %-40s 0x%s\n", $el->{definename}, $el->{elid};
-    }
-    print "\n";
-
-    # Define a struct for each ElementID that has child elements
-    for my $el (@global_elem_list) {
-        next unless $el->{subelements};
-        print "\nstruct $el->{structname} {\n";
-
-        # Figure out the length of the longest variable name
-        # Used for pretty-printing in the next step
-        my $l = max(map { length $_->{valname} } values %{$el->{subelements}});
-
-        # Output each variable, with pointers for array (multiple) elements
-        for my $subel (sort { $a->{definename} cmp $b->{definename} } values %{$el->{subelements}}) {
-            printf "    %-${l}s %s%s;\n",
-                $subel->{valname}, $subel->{multiple}?'*':' ', $subel->{fieldname};
-        }
-        print "\n";
-
-        # Output a counter variable for each element
-        # (presence/absence for scalars, item count for arrays)
-        for my $subel (sort values %{$el->{subelements}}) {
-            print "    int n_$subel->{fieldname};\n"
-        }
-        print "};\n";
-    }
-    print "\n";
-
-    # Output extern references for ebml_elem_desc structs for each of the elements
-    # These are defined by generate_c_definitions
-    for my $el (@global_elem_list) {
-        next unless $el->{subelements};
-        print "extern const struct ebml_elem_desc $el->{structname}_desc;\n";
-    }
-    print "\n";
-
-    # Output the max number of sub-elements a known element might have
-    printf "#define MAX_EBML_SUBELEMENTS %d\n",
-        max(map { scalar keys %{$_->{subelements}} }
-            grep { $_->{subelements} } @global_elem_list);
-}
-
-# Generate definitions for libmpdemux/ebml_defs.c
-sub generate_c_definitions {
-    print "/* Generated by TOOLS/matroska.pl, do not edit manually */\n\n";
-    # ebml_defs.c uses macros declared in ebml.c
-    for my $el (@global_elem_list) {
-        print "\n";
-        if ($el->{subelements}) {
-            # set N for the next macros
-            print "#define N $el->{fieldname}\n";
-
-            # define a struct ebml_$N_desc and gets ready to define fields
-            # this secretly opens two scopes; hence the }}; at the end
-            print "E_S(\"$el->{name}\", ".scalar(keys %{$el->{subelements}}).")\n";
-
-            # define a field for each subelement
-            # also does lots of macro magic, but doesn't open a scope
-            for my $subel (sort { $a->{definename} cmp $b->{definename} } values %{$el->{subelements}}) {
-                print "F($subel->{definename}, $subel->{fieldname}, ".
-                    ($subel->{multiple}?'1':'0').")\n";
-            }
-            # close the struct
-            print "}};\n";
-
-            # unset N since we've used it
-            print "#undef N\n";
-        } else {
-            print "E(\"$el->{name}\", $el->{fieldname}, $el->{ebmltype})\n";
-        }
-    }
-}
-
-sub repr {
-    my @ret;
-    foreach (@_) {
-        if (/'/) {
-            s/"/\\"/g;
-            push @ret, "\"$_\"";
-        } else {
-            push @ret, "'$_'";
-        }
-    }
-    return @ret if wantarray;
-    return pop @ret if defined wantarray;
-    return;
-}
-
-sub process_elem {
-    my ($elem, $read_bin) = @_;
-    unless ($opt{full}) {
-        if ($elem->{name} eq 'Cluster' || $elem->{name} eq 'Cues') {
-            $elem->skip;
-            return;
-        }
-    }
-    die unless $elem;
-
-    if ($elem->{type} ne 'skip') {
-        print "$elem->{depth} $elem->{elid} $elem->{name} size: $elem->{content_len} value: ";
-    }
-
-    if ($elem->{type} eq 'sub') {
-        print "subelements:\n";
-        while (my $chld = $elem->next_child($read_bin)) {
-            process_elem($chld);
-        }
-    } elsif ($elem->{type} eq 'binary') {
-        my $t = "<skipped $elem->{content_len} bytes>";
-        if ($elem->{content_len} < 20) {
-            $t = unpack "H*", $elem->get_value;
-        }
-        print "binary $t\n";
-        delete $elem->{value};
-    } elsif ($elem->{type} eq 'ebml_id') {
-        print "binary $elem->{value}->{elid} (".($elem->{value}->{name}||"UNKNOWN").")\n";
-    } elsif ($elem->{type} eq 'skip') {
-        # skip
-    } elsif ($elem->{type} eq 'str') {
-        print "string ". repr($elem->get_value) . "\n";
-    } else {
-        print "$elem->{type} ". $elem->get_value ."\n";
-    }
-}
diff --git a/TOOLS/matroska.py b/TOOLS/matroska.py
new file mode 100755
index 0000000000..91e65a26b3
--- /dev/null
+++ b/TOOLS/matroska.py
@@ -0,0 +1,463 @@
+#!/usr/bin/env python
+"""
+Generate C definitions for parsing Matroska files.
+Can also be used to directly parse Matroska files and display their contents.
+"""
+
+#
+# This file is part of MPlayer.
+#
+# MPlayer is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# MPlayer is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with MPlayer; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+
+# for compatibility with Python 2.x
+from __future__ import print_function
+
+elements_ebml = (
+    'EBML, 1a45dfa3, sub', (
+        'EBMLVersion, 4286, uint',
+        'EBMLReadVersion, 42f7, uint',
+        'EBMLMaxIDLength, 42f2, uint',
+        'EBMLMaxSizeLength, 42f3, uint',
+        'DocType, 4282, str',
+        'DocTypeVersion, 4287, uint',
+        'DocTypeReadVersion, 4285, uint',
+    ),
+
+    'CRC32, bf, binary',
+    'Void, ec, binary',
+)
+
+elements_matroska = (
+    'Segment, 18538067, sub', (
+
+        'SeekHead*, 114d9b74, sub', (
+            'Seek*, 4dbb, sub', (
+                'SeekID, 53ab, ebml_id',
+                'SeekPosition, 53ac, uint',
+            ),
+        ),
+
+        'Info*, 1549a966, sub', (
+            'SegmentUID, 73a4, binary',
+            'PrevUID, 3cb923, binary',
+            'NextUID, 3eb923, binary',
+            'TimecodeScale, 2ad7b1, uint',
+            'DateUTC, 4461, sint',
+            'Title, 7ba9, str',
+            'MuxingApp, 4d80, str',
+            'WritingApp, 5741, str',
+            'Duration, 4489, float',
+        ),
+
+        'Cluster*, 1f43b675, sub', (
+            'Timecode, e7, uint',
+            'BlockGroup*, a0, sub', (
+                'Block, a1, binary',
+                'BlockDuration, 9b, uint',
+                'ReferenceBlock*, fb, sint',
+                'DiscardPadding,  75A2, sint',
+            ),
+            'SimpleBlock*, a3, binary',
+        ),
+
+        'Tracks*, 1654ae6b, sub', (
+            'TrackEntry*, ae, sub', (
+                'TrackNumber, d7, uint',
+                'TrackUID, 73c5, uint',
+                'TrackType, 83, uint',
+                'FlagEnabled, b9, uint',
+                'FlagDefault, 88, uint',
+                'FlagForced, 55aa, uint',
+                'FlagLacing, 9c, uint',
+                'MinCache, 6de7, uint',
+                'MaxCache, 6df8, uint',
+                'DefaultDuration, 23e383, uint',
+                'TrackTimecodeScale, 23314f, float',
+                'MaxBlockAdditionID, 55ee, uint',
+                'Name, 536e, str',
+                'Language, 22b59c, str',
+                'CodecID, 86, str',
+                'CodecPrivate, 63a2, binary',
+                'CodecName, 258688, str',
+                'CodecDecodeAll, aa, uint',
+                'CodecDelay, 56aa, uint',
+                'SeekPreRoll, 56bb, uint',
+                'Video, e0, sub', (
+                    'FlagInterlaced, 9a, uint',
+                    'PixelWidth, b0, uint',
+                    'PixelHeight, ba, uint',
+                    'DisplayWidth, 54b0, uint',
+                    'DisplayHeight, 54ba, uint',
+                    'DisplayUnit, 54b2, uint',
+                    'FrameRate, 2383e3, float',
+                    'ColourSpace, 2eb524, binary',
+                    'StereoMode, 53b8, uint',
+                    'Colour, 55b0, sub', (
+                        'MatrixCoefficients,      55B1, uint',
+                        'BitsPerChannel,          55B2, uint',
+                        'ChromaSubsamplingHorz,   55B3, uint',
+                        'ChromaSubsamplingVert,   55B4, uint',
+                        'CbSubsamplingHorz,       55B5, uint',
+                        'CbSubsamplingVert,       55B6, uint',
+                        'ChromaSitingHorz,        55B7, uint',
+                        'ChromaSitingVert,        55B8, uint',
+                        'Range,                   55B9, uint',
+                        'TransferCharacteristics, 55BA, uint',
+                        'Primaries,               55BB, uint',
+                        'MaxCLL,                  55BC, uint',
+                        'MaxFALL,                 55BD, uint',
+                        'MasteringMetadata,       55D0, sub', (
+                            'PrimaryRChromaticityX,   55D1, float',
+                            'PrimaryRChromaticityY,   55D2, float',
+                            'PrimaryGChromaticityX,   55D3, float',
+                            'PrimaryGChromaticityY,   55D4, float',
+                            'PrimaryBChromaticityX,   55D5, float',
+                            'PrimaryBChromaticityY,   55D6, float',
+                            'WhitePointChromaticityX, 55D7, float',
+                            'WhitePointChromaticityY, 55D8, float',
+                            'LuminanceMax,            55D9, float',
+                            'LuminanceMin,            55DA, float',
+                        ),
+                    ),
+                ),
+                'Audio, e1, sub', (
+                    'SamplingFrequency, b5, float',
+                    'OutputSamplingFrequency, 78b5, float',
+                    'Channels, 9f, uint',
+                    'BitDepth, 6264, uint',
+                ),
+                'ContentEncodings, 6d80, sub', (
+                    'ContentEncoding*, 6240, sub', (
+                        'ContentEncodingOrder, 5031, uint',
+                        'ContentEncodingScope, 5032, uint',
+                        'ContentEncodingType, 5033, uint',
+                        'ContentCompression, 5034, sub', (
+                            'ContentCompAlgo, 4254, uint',
+                            'ContentCompSettings, 4255, binary',
+                        ),
+                    ),
+                ),
+            ),
+        ),
+
+        'Cues, 1c53bb6b, sub', (
+            'CuePoint*, bb, sub', (
+                'CueTime, b3, uint',
+                'CueTrackPositions*, b7, sub', (
+                    'CueTrack, f7, uint',
+                    'CueClusterPosition, f1, uint',
+                    'CueRelativePosition, f0, uint',
+                    'CueDuration, b2, uint',
+                ),
+            ),
+        ),
+
+        'Attachments, 1941a469, sub', (
+            'AttachedFile*, 61a7, sub', (
+                'FileDescription, 467e, str',
+                'FileName, 466e, str',
+                'FileMimeType, 4660, str',
+                'FileData, 465c, binary',
+                'FileUID, 46ae, uint',
+            ),
+        ),
+
+        'Chapters, 1043a770, sub', (
+            'EditionEntry*, 45b9, sub', (
+                'EditionUID, 45bc, uint',
+                'EditionFlagHidden, 45bd, uint',
+                'EditionFlagDefault, 45db, uint',
+                'EditionFlagOrdered, 45dd, uint',
+                'ChapterAtom*, b6, sub', (
+                    'ChapterUID, 73c4, uint',
+                    'ChapterTimeStart, 91, uint',
+                    'ChapterTimeEnd, 92, uint',
+                    'ChapterFlagHidden, 98, uint',
+                    'ChapterFlagEnabled, 4598, uint',
+                    'ChapterSegmentUID, 6e67, binary',
+                    'ChapterSegmentEditionUID, 6ebc, uint',
+                    'ChapterDisplay*, 80, sub', (
+                        'ChapString, 85, str',
+                        'ChapLanguage*, 437c, str',
+                        'ChapCountry*, 437e, str',
+                    ),
+                ),
+            ),
+        ),
+        'Tags*, 1254c367, sub', (
+            'Tag*, 7373, sub', (
+                'Targets, 63c0, sub', (
+                    'TargetTypeValue, 68ca, uint',
+                    'TargetTrackUID, 63c5, uint',
+                    'TargetEditionUID, 63c9, uint',
+                    'TargetChapterUID, 63c4, uint',
+                    'TargetAttachmentUID, 63c6, uint',
+                 ),
+                'SimpleTag*, 67c8, sub', (
+                    'TagName, 45a3, str',
+                    'TagLanguage, 447a, str',
+                    'TagString, 4487, str'
+                ),
+            ),
+        ),
+    ),
+)
+
+
+import sys
+from math import ldexp
+from binascii import hexlify
+
+def byte2num(s):
+    return int(hexlify(s), 16)
+
+class EOF(Exception): pass
+
+def camelcase_to_words(name):
+    parts = []
+    start = 0
+    for i in range(1, len(name)):
+        if name[i].isupper() and (name[i-1].islower() or
+                                  name[i+1:i+2].islower()):
+            parts.append(name[start:i])
+            start = i
+    parts.append(name[start:])
+    return '_'.join(parts).lower()
+
+class MatroskaElement(object):
+
+    def __init__(self, name, elid, valtype, namespace):
+        self.name = name
+        self.definename = '{0}_ID_{1}'.format(namespace, name.upper())
+        self.fieldname = camelcase_to_words(name)
+        self.structname = 'ebml_' + self.fieldname
+        self.elid = elid
+        self.valtype = valtype
+        if valtype == 'sub':
+            self.ebmltype = 'EBML_TYPE_SUBELEMENTS'
+            self.valname = 'struct ' + self.structname
+        else:
+            self.ebmltype = 'EBML_TYPE_' + valtype.upper()
+            try:
+                self.valname = {'uint': 'uint64_t', 'str': 'char *',
+                                'binary': 'bstr', 'ebml_id': 'uint32_t',
+                                'float': 'double', 'sint': 'int64_t',
+                                }[valtype]
+            except KeyError:
+                raise SyntaxError('Unrecognized value type ' + valtype)
+        self.subelements = ()
+
+    def add_subelements(self, subelements):
+        self.subelements = subelements
+        self.subids = set(x[0].elid for x in subelements)
+
+elementd = {}
+elementlist = []
+def parse_elems(l, namespace):
+    subelements = []
+    for el in l:
+        if isinstance(el, str):
+            name, hexid, eltype = [x.strip() for x in el.split(',')]
+            multiple = name.endswith('*')
+            name = name.strip('*')
+            new = MatroskaElement(name, hexid, eltype, namespace)
+            elementd[hexid] = new
+            elementlist.append(new)
+            subelements.append((new, multiple))
+        else:
+            new.add_subelements(parse_elems(el, namespace))
+    return subelements
+
+parse_elems(elements_ebml, 'EBML')
+parse_elems(elements_matroska, 'MATROSKA')
+
+def generate_C_header():
+    print('// Generated by TOOLS/matroska.py, do not edit manually')
+    print()
+
+    for el in elementlist:
+        print('#define {0.definename:40} 0x{0.elid}'.format(el))
+
+    print()
+
+    for el in reversed(elementlist):
+        if not el.subelements:
+            continue
+        print()
+        print('struct {0.structname} {{'.format(el))
+        l = max(len(subel.valname) for subel, multiple in el.subelements)+1
+        for subel, multiple in el.subelements:
+            print('    {e.valname:{l}} {star}{e.fieldname};'.format(
+                    e=subel, l=l, star=' *'[multiple]))
+        print()
+        for subel, multiple in el.subelements:
+            print('    int  n_{0.fieldname};'.format(subel))
+        print('};')
+
+    for el in elementlist:
+        if not el.subelements:
+            continue
+        print('extern const struct ebml_elem_desc {0.structname}_desc;'.format(
+                el))
+
+    print()
+    print('#define MAX_EBML_SUBELEMENTS', max(len(el.subelements)
+                                              for el in elementlist))
+
+
+
+def generate_C_definitions():
+    print('// Generated by TOOLS/matroska.py, do not edit manually')
+    print()
+    for el in reversed(elementlist):
+        print()
+        if el.subelements:
+            print('#define N', el.fieldname)
+            print('E_S("{0}", {1})'.format(el.name, len(el.subelements)))
+            for subel, multiple in el.subelements:
+                print('F({0.definename}, {0.fieldname}, {1})'.format(
+                        subel, int(multiple)))
+            print('}};')
+            print('#undef N')
+        else:
+            print('E("{0.name}", {0.fieldname}, {0.ebmltype})'.format(el))
+
+def read(s, length):
+    t = s.read(length)
+    if len(t) != length:
+        raise EOF
+    return t
+
+def read_id(s):
+    t = read(s, 1)
+    i = 0
+    mask = 128
+    if ord(t) == 0:
+        raise SyntaxError
+    while not ord(t) & mask:
+        i += 1
+        mask >>= 1
+    t += read(s, i)
+    return t
+
+def read_vint(s):
+    t = read(s, 1)
+    i = 0
+    mask = 128
+    if ord(t) == 0:
+        raise SyntaxError
+    while not ord(t) & mask:
+        i += 1
+        mask >>= 1
+    t = bytes((ord(t) & (mask - 1),))
+    t += read(s, i)
+    return i+1, byte2num(t)
+
+def read_str(s, length):
+    return read(s, length)
+
+def read_uint(s, length):
+    t = read(s, length)
+    return byte2num(t)
+
+def read_sint(s, length):
+    i = read_uint(s, length)
+    mask = 1 << (length * 8 - 1)
+    if i & mask:
+        i -= 2 * mask
+    return i
+
+def read_float(s, length):
+    t = read(s, length)
+    i = byte2num(t)
+    if length == 4:
+        f = ldexp((i & 0x7fffff) + (1 << 23), (i >> 23 & 0xff) - 150)
+        if i & (1 << 31):
+            f = -f
+    elif length == 8:
+        f = ldexp((i & ((1 << 52) - 1)) + (1 << 52), (i >> 52 & 0x7ff) - 1075)
+        if i & (1 << 63):
+            f = -f
+    else:
+        raise SyntaxError
+    return f
+
+def parse_one(s, depth, parent, maxlen):
+    elid = hexlify(read_id(s)).decode('ascii')
+    elem = elementd.get(elid)
+    if parent is not None and elid not in parent.subids and elid not in ('ec', 'bf'):
+        print('Unexpected:', elid)
+        if 1:
+            raise NotImplementedError
+    size, length = read_vint(s)
+    this_length = len(elid) / 2 + size + length
+    if elem is not None:
+        if elem.valtype != 'skip':
+            print(depth, elid, elem.name, 'size:', length, 'value:', end=' ')
+        if elem.valtype == 'sub':
+            print('subelements:')
+            while length > 0:
+                length -= parse_one(s, depth + 1, elem, length)
+            if length < 0:
+                raise SyntaxError
+        elif elem.valtype == 'str':
+            print('string', repr(read_str(s, length).decode('utf8', 'replace')))
+        elif elem.valtype in ('binary', 'ebml_id'):
+            t = read_str(s, length)
+            dec = ''
+            if elem.valtype == 'ebml_id':
+                idelem = elementd.get(hexlify(t).decode('ascii'))
+                if idelem is None:
+                    dec = '(UNKNOWN)'
+                else:
+                    dec = '({0.name})'.format(idelem)
+            if len(t) < 20:
+                t = hexlify(t).decode('ascii')
+            else:
+                t = '<skipped {0} bytes>'.format(len(t))
+            print('binary', t, dec)
+        elif elem.valtype == 'uint':
+            print('uint', read_uint(s, length))
+        elif elem.valtype == 'sint':
+            print('sint', read_sint(s, length))
+        elif elem.valtype == 'float':
+            print('float', read_float(s, length))
+        elif elem.valtype == 'skip':
+            read(s, length)
+        else:
+            raise NotImplementedError
+    else:
+        print(depth, 'Unknown element:', elid, 'size:', length)
+        read(s, length)
+    return this_length
+
+def parse_toplevel(s):
+    parse_one(s, 0, None, 1 << 63)
+
+if sys.argv[1] == '--generate-header':
+    generate_C_header()
+elif sys.argv[1] == '--generate-definitions':
+    generate_C_definitions()
+else:
+    s = open(sys.argv[1], "rb")
+    while 1:
+        start = s.tell()
+        try:
+            parse_toplevel(s)
+        except EOF:
+            if s.tell() != start:
+                raise Exception("Unexpected end of file")
+            break
diff --git a/waftools/generators/sources.py b/waftools/generators/sources.py
index 6f1521036a..b6af693e65 100644
--- a/waftools/generators/sources.py
+++ b/waftools/generators/sources.py
@@ -2,11 +2,11 @@ from waflib.Build import BuildContext
 import os
 
 def __file2string_cmd__(ctx):
-    return '"${{BIN_PERL}}" "{0}/TOOLS/file2string.pl" "${{SRC}}" > "${{TGT}}"' \
+    return '"${{BIN_PYTHON}}" "{0}/TOOLS/file2string.py" "${{SRC}}" > "${{TGT}}"' \
                 .format(ctx.srcnode.abspath())
 
 def __matroska_cmd__(ctx, argument):
-    return '"${{BIN_PERL}}" "{0}/TOOLS/matroska.pl" "{1}" "${{SRC}}" > "${{TGT}}"' \
+    return '"${{BIN_PYTHON}}" "{0}/TOOLS/matroska.py" "{1}" "${{SRC}}" > "${{TGT}}"' \
                 .format(ctx.srcnode.abspath(), argument)
 
 def __zshcomp_cmd__(ctx, argument):
diff --git a/wscript b/wscript
index f47f965265..94db7e296f 100644
--- a/wscript
+++ b/wscript
@@ -81,6 +81,7 @@ build_options = [
     }, {
         'name': '--zsh-comp',
         'desc': 'zsh completion',
+        'func': check_ctx_vars('BIN_PERL'),
         'func': check_true,
         'default': 'disable',
     }, {
@@ -995,11 +996,12 @@ def configure(ctx):
     ctx.find_program(cc,          var='CC')
     ctx.find_program(pkg_config,  var='PKG_CONFIG')
     ctx.find_program(ar,          var='AR')
-    ctx.find_program('perl',      var='BIN_PERL')
+    ctx.find_program('python',    var='BIN_PYTHON')
     ctx.find_program('rst2html',  var='RST2HTML',  mandatory=False)
     ctx.find_program('rst2man',   var='RST2MAN',   mandatory=False)
     ctx.find_program('rst2pdf',   var='RST2PDF',   mandatory=False)
     ctx.find_program(windres,     var='WINDRES',   mandatory=False)
+    ctx.find_program('perl',      var='BIN_PERL',  mandatory=False)
 
     ctx.load('compiler_c')
     ctx.load('waf_customizations')