aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Kovensky <diogomfranco@gmail.com>2012-11-07 11:49:44 -0300
committerGravatar wm4 <wm4@nowhere>2012-11-08 00:28:59 +0100
commitfae73079310eef9dce9737f2e37ff4b80c8830ee (patch)
tree4a9c7d9fbc398b237808283df39562e55077a225
parent58f821e096392e27994102f6de6f8f76c63e38e1 (diff)
Port several python scripts to Perl
file2string.pl and vdpau_functions.pl are direct ports. matroska.py was reimplemented as the Parse::Matroska module in CPAN, and matroska.pl was made a client of Parse::Matroska. A copy of Parse::Matroska is included in TOOLS/lib, and matroska.pl looks there first when trying to load the module. osxbundle.py was not ported since I have no means to verify it. Python is always available on OSX though, so there is no harm in removing the check for it on configure.
-rw-r--r--Makefile14
-rwxr-xr-xTOOLS/file2string.pl24
-rwxr-xr-xTOOLS/file2string.py27
-rw-r--r--TOOLS/lib/Parse/Matroska.pm30
-rw-r--r--TOOLS/lib/Parse/Matroska/Definitions.pm350
-rw-r--r--TOOLS/lib/Parse/Matroska/Element.pm331
-rw-r--r--TOOLS/lib/Parse/Matroska/Reader.pm423
-rw-r--r--TOOLS/lib/Parse/Matroska/Utils.pm37
-rwxr-xr-xTOOLS/matroska.pl169
-rwxr-xr-xTOOLS/matroska.py429
-rwxr-xr-xTOOLS/vdpau_functions.pl (renamed from TOOLS/vdpau_functions.py)58
-rwxr-xr-xconfigure4
12 files changed, 1407 insertions, 489 deletions
diff --git a/Makefile b/Makefile
index 3f32e5639f..c8b82d8f95 100644
--- a/Makefile
+++ b/Makefile
@@ -349,31 +349,31 @@ mpv$(EXESUF):
$(CC) -o $@ $^ $(EXTRALIBS)
codec-cfg.c: codecs.conf.h
-codecs.conf.h: TOOLS/file2string.py etc/codecs.conf
+codecs.conf.h: TOOLS/file2string.pl etc/codecs.conf
./$^ >$@
input/input.c: input/input_conf.h
-input/input_conf.h: TOOLS/file2string.py etc/input.conf
+input/input_conf.h: TOOLS/file2string.pl etc/input.conf
./$^ >$@
libvo/vo_vdpau.c: libvo/vdpau_template.c
-libvo/vdpau_template.c: TOOLS/vdpau_functions.py
+libvo/vdpau_template.c: TOOLS/vdpau_functions.pl
./$< > $@
libmpdemux/ebml.c libmpdemux/demux_mkv.c: libmpdemux/ebml_types.h
-libmpdemux/ebml_types.h: TOOLS/matroska.py
+libmpdemux/ebml_types.h: TOOLS/matroska.pl
./$< --generate-header > $@
libmpdemux/ebml.c: libmpdemux/ebml_defs.c
-libmpdemux/ebml_defs.c: TOOLS/matroska.py
+libmpdemux/ebml_defs.c: TOOLS/matroska.pl
./$< --generate-definitions > $@
libvo/vo_opengl.c: libvo/vo_opengl_shaders.h
-libvo/vo_opengl_shaders.h: TOOLS/file2string.py libvo/vo_opengl_shaders.glsl
+libvo/vo_opengl_shaders.h: TOOLS/file2string.pl libvo/vo_opengl_shaders.glsl
./$^ >$@
sub/osd_libass.c: sub/osd_font.h
-sub/osd_font.h: TOOLS/file2string.py sub/osd_font.pfb
+sub/osd_font.h: TOOLS/file2string.pl sub/osd_font.pfb
./$^ >$@
# ./configure must be rerun if it changed
diff --git a/TOOLS/file2string.pl b/TOOLS/file2string.pl
new file mode 100755
index 0000000000..d9ad215d6d
--- /dev/null
+++ b/TOOLS/file2string.pl
@@ -0,0 +1,24 @@
+#! /usr/bin/env perl
+
+use strict;
+use warnings;
+
+# Convert the contents of a file into a C string constant.
+# Note that the compiler will implicitly add an extra 0 byte at the end
+# of every string, so code using the string may need to remove that to get
+# the exact contents of the original file.
+# FIXME: why not a char array?
+
+# treat only alphanumeric and not-" punctuation as safe
+my $unsafe_chars = qr{[^][A-Za-z0-9!#%&'()*+,./:;<=>?^_{|}~ -]};
+
+for my $file (@ARGV) {
+ open my $fh, '<:raw', $file or next;
+ print "/* Generated from $file */\n";
+ while (<$fh>) {
+ # replace unsafe chars with their equivalent octal escapes
+ s/($unsafe_chars)/\\@{[sprintf '%03o', ord($1)]}/gos;
+ print "\"$_\"\n"
+ }
+ close $fh;
+}
diff --git a/TOOLS/file2string.py b/TOOLS/file2string.py
deleted file mode 100755
index 6cdd1a72ae..0000000000
--- a/TOOLS/file2string.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env python
-
-# Convert the contents of a file into a C string constant.
-# Note that the compiler will implicitly add an extra 0 byte at the end
-# of every string, so code using the string may need to remove that to get
-# the exact contents of the original file.
-
-import sys
-
-# Indexing a byte string yields int on Python 3.x, and a str on Python 2.x
-def pord(c):
- return ord(c) if type(c) == str else c
-
-def main(infile):
- conv = ['\\' + ("%03o" % c) for c in range(256)]
- safe_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" \
- "0123456789!#%&'()*+,-./:;<=>?[]^_{|}~ "
- for c in safe_chars:
- conv[ord(c)] = c
- for c, esc in ("\nn", "\tt", r"\\", '""'):
- conv[ord(c)] = '\\' + esc
- for line in infile:
- sys.stdout.write('"' + ''.join(conv[pord(c)] for c in line) + '"\n')
-
-with open(sys.argv[1], 'rb') as infile:
- sys.stdout.write("// Generated from %s\n\n" % sys.argv[1])
- main(infile)
diff --git a/TOOLS/lib/Parse/Matroska.pm b/TOOLS/lib/Parse/Matroska.pm
new file mode 100644
index 0000000000..e1c08c9814
--- /dev/null
+++ b/TOOLS/lib/Parse/Matroska.pm
@@ -0,0 +1,30 @@
+use 5.008;
+use strict;
+use warnings;
+
+# ABSTRACT: Module collection to parse Matroska files.
+package Parse::Matroska;
+
+=head1 DESCRIPTION
+
+C<use>s L<Parse::Matroska::Reader>. See the documentation
+of the modules mentioned in L</"SEE ALSO"> for more information
+in how to use this module.
+
+It's intended for this module to contain high-level interfaces
+to the other modules in the distribution.
+
+=head1 SOURCE CODE
+
+L<https://github.com/Kovensky/Parse-Matroska>
+
+=head1 SEE ALSO
+
+L<Parse::Matroska::Reader>, L<Parse::Matroska::Element>,
+L<Parse::Matroska::Definitions>.
+
+=cut
+
+use Parse::Matroska::Reader;
+
+1;
diff --git a/TOOLS/lib/Parse/Matroska/Definitions.pm b/TOOLS/lib/Parse/Matroska/Definitions.pm
new file mode 100644
index 0000000000..9b700a7d20
--- /dev/null
+++ b/TOOLS/lib/Parse/Matroska/Definitions.pm
@@ -0,0 +1,350 @@
+use 5.008;
+use strict;
+use warnings;
+
+# ABSTRACT: internal EBML grammar definitions
+package Parse::Matroska::Definitions;
+
+use Parse::Matroska::Utils qw{uniq uncamelize};
+
+use Exporter;
+our @ISA = qw{Exporter};
+our @EXPORT_OK = qw{elem_by_hexid %EBML_DEFINITION %MATROSKA_DEFINITION};
+
+=head1 SYNOPSIS
+
+ use Parse::Matroska::Definitions qw{elem_by_hexid};
+ my $ebml_id = elem_by_hexid('1a45dfa3');
+ print "EBML ID $ebml_id->{elid}'s name: $ebml_id->{name}";
+
+=head1 DESCRIPTION
+
+Contains the definition of the EBML grammar as expected in
+Matroska files. This module is meant mostly for internal use.
+
+As this was extended from a script in mpv-player, some data
+generated is apparently useless for regular module users
+but is still relevant to the mpv-player script. Such data
+is annotated as being for mpv compatibility.
+
+=head1 NOTE
+
+The API of this module is not yet considered stable.
+
+=head1 GLOBALS
+
+These global variables are considered B<immutable>.
+
+=head2 @Parse::Matroska::Definitions::global_elem_list
+
+A global list of known matroska elements. Useful for
+mpv's matroska script, used for generating C headers
+that parse matroska.
+
+=head2 %Parse::Matroska::Definitions::global_elem_dict
+
+A global hash of known matroska elements. Used internally
+by L</elem_by_hexid($id)>.
+
+=cut
+
+@Parse::Matroska::Definitions::global_elem_list = ();
+%Parse::Matroska::Definitions::global_elem_dict = ();
+
+=head2 %EBML_DEFINITION
+
+Optionally-importable hash of known EBML IDs belonging
+to the EBML generic grammar.
+
+=head2 %MATROSKA_DEFINITION
+
+Optionally-importable hash of known EBML IDs belonging
+to the Matroska-specific grammar.
+
+=cut
+
+our %EBML_DEFINITION = define_ebml();
+our %MATROSKA_DEFINITION = define_matroska();
+
+=method elem_by_hexid($id)
+
+Returns an EBML Element Definition corresponding to the provided
+hexadecimal string. Returns C<undef> if the element is unknown.
+
+=cut
+sub elem_by_hexid {
+ my ($elid) = @_;
+ return $Parse::Matroska::Definitions::global_elem_dict{$elid};
+}
+
+################################################
+### Helper functions for document definition ###
+################################################
+
+# used by elem when setting the 'valname' key
+use constant TYPE_MAP => {
+ uint => 'uint64_t',
+ str => 'struct bstr',
+ binary => 'struct bstr',
+ ebml_id => 'uint32_t',
+ float => 'double',
+ sint => 'int64_t',
+};
+
+# this will be localized to "MATROSKA" or "EBML" on the elem declarations
+our $ELEM_DEFINE_TYPE = undef;
+
+=method elem($name,$elid,$valtype)
+
+NOTE: never call this function yourself; it changes data structures
+that are considered immutable outside of this package.
+
+Internal API function that generates the EBML Element Definitions.
+
+This API function returns an array which first element is C<$elid>
+and the second is a generated hash. The generated hash is stored
+in the @global_elem_list and %global_elem_dict.
+
+The generated hash contains:
+
+=for :list
+= name
+The EBML Element's name, given through C<$name>.
+= elid
+The EBML Element's hex id, given through C<$elid>. Used for lookups by L</elem_by_hexid($id)>.
+= valtype
+The EBML Element's type, given through C<$valtype>, except when C<$valtype> is an arrayref.
+= multiple
+If C<$name> ends with a C<*>, this is set as true and strips the C<*> from L</name>. Used to
+mark elements that may be repeated.
+= subelements
+An arrayref of elements that may be children of this element, given through C<$valtype> if it
+is an arrayref. Sets L</valtype> to C<sub> if there are subelements.
+= subids
+An arrayref listing all the L</elid>s of subelements, C<uniq>ified.
+
+The following elements are for mpv compatibility:
+
+=for :list
+= definename
+Name used for generating C #defines.
+= fieldname
+Name used for generating C struct fields.
+= structname
+Name used for generating C struct names.
+= ebmltype
+A pre-#defined constant to describe the element's type.
+= valname
+Typename used when declaring a struct field referring to this element.
+
+=cut
+sub elem {
+ my %e = (name => shift, elid => shift, valtype => shift);
+
+ # strip * from name, set 'multiple' if there was one
+ $e{multiple} = scalar $e{name} =~ s/\*$//;
+
+ # ELEM_DEFINE_TYPE is either MATROSKA or EBML
+ $e{definename} = "${ELEM_DEFINE_TYPE}_ID_".uc($e{name});
+ $e{fieldname} = uncamelize $e{name};
+ $e{structname} = "ebml_$e{fieldname}";
+
+ if (ref $e{valtype} eq 'HASH') {
+ $e{subelements} = $e{valtype};
+ $e{subids} = uniq map { $_->{elid} } values %{$e{subelements}};
+ $e{valtype} = 'sub';
+ $e{ebmltype} = 'EBML_TYPE_SUBELEMENTS';
+ $e{valname} = "struct $e{structname}";
+ } else {
+ $e{ebmltype} = "EBML_TYPE_\U$e{valtype}";
+ die "Unrecognized value type $e{valtype}" unless
+ defined ($e{valname} = TYPE_MAP->{$e{valtype}});
+ }
+ my $e = \%e;
+ push @Parse::Matroska::Definitions::global_elem_list, $e;
+ $Parse::Matroska::Definitions::global_elem_dict{$e{elid}} = $e;
+ return ($e{elid}, $e);
+}
+
+#############################################
+### EBML and Matroska document definitons ###
+#############################################
+
+=method define_ebml
+
+Internal function that defines the EBML generic grammar.
+
+Must not be called from outside the package.
+
+=cut
+sub define_ebml {
+ local $ELEM_DEFINE_TYPE = 'EBML';
+ return (
+ elem('EBML', '1a45dfa3', {
+ elem('EBMLVersion', '4286', 'uint'),
+ elem('EBMLReadVersion', '42f7', 'uint'),
+ elem('EBMLMaxIDLength', '42f2', 'uint'),
+ elem('EBMLMaxSizeLength', '42f3', 'uint'),
+ elem('DocType', '4282', 'str'),
+ elem('DocTypeVersion', '4287', 'uint'),
+ elem('DocTypeReadVersion', '4285', 'uint'),
+ }),
+
+ elem('CRC32', 'bf', 'binary'),
+ elem('Void', 'ec', 'binary'),
+ );
+}
+
+
+=method define_matroska
+
+Internal function that defines the Matroska-specific EBML grammar.
+
+Must not be called from outside the package.
+
+=cut
+sub define_matroska {
+ local $ELEM_DEFINE_TYPE = 'MATROSKA';
+ return (
+ elem('Segment', '18538067', {
+ elem('SeekHead*', '114d9b74', {
+ elem('Seek*', '4dbb', {
+ elem('SeekID', '53ab', 'ebml_id'),
+ elem('SeekPosition', '53ac', 'uint'),
+ }),
+ }),
+
+ elem('Info*', '1549a966', {
+ elem('SegmentUID', '73a4', 'binary'),
+ elem('PrevUID', '3cb923', 'binary'),
+ elem('NextUID', '3eb923', 'binary'),
+ elem('TimecodeScale', '2ad7b1', 'uint'),
+ elem('DateUTC', '4461', 'sint'),
+ elem('Title', '7ba9', 'str'),
+ elem('MuxingApp', '4d80', 'str'),
+ elem('WritingApp', '5741', 'str'),
+ elem('Duration', '4489', 'float'),
+ }),
+
+ elem('Cluster*', '1f43b675', {
+ elem('Timecode', 'e7', 'uint'),
+ elem('BlockGroup*', 'a0', {
+ elem('Block', 'a1', 'binary'),
+ elem('BlockDuration', '9b', 'uint'),
+ elem('ReferenceBlock*', 'fb', 'sint'),
+ }),
+ elem('SimpleBlock*', 'a3', 'binary'),
+ }),
+
+ elem('Tracks*', '1654ae6b', {
+ elem('TrackEntry*', 'ae', {
+ elem('TrackNumber', 'd7', 'uint'),
+ elem('TrackUID', '73c5', 'uint'),
+ elem('TrackType', '83', 'uint'),
+ elem('FlagEnabled', 'b9', 'uint'),
+ elem('FlagDefault', '88', 'uint'),
+ elem('FlagForced', '55aa', 'uint'),
+ elem('FlagLacing', '9c', 'uint'),
+ elem('MinCache', '6de7', 'uint'),
+ elem('MaxCache', '6df8', 'uint'),
+ elem('DefaultDuration', '23e383', 'uint'),
+ elem('TrackTimecodeScale', '23314f', 'float'),
+ elem('MaxBlockAdditionID', '55ee', 'uint'),
+ elem('Name', '536e', 'str'),
+ elem('Language', '22b59c', 'str'),
+ elem('CodecID', '86', 'str'),
+ elem('CodecPrivate', '63a2', 'binary'),
+ elem('CodecName', '258688', 'str'),
+ elem('CodecDecodeAll', 'aa', 'uint'),
+ elem('Video', 'e0', {
+ elem('FlagInterlaced', '9a', 'uint'),
+ elem('PixelWidth', 'b0', 'uint'),
+ elem('PixelHeight', 'ba', 'uint'),
+ elem('DisplayWidth', '54b0', 'uint'),
+ elem('DisplayHeight', '54ba', 'uint'),
+ elem('DisplayUnit', '54b2', 'uint'),
+ elem('FrameRate', '2383e3', 'float'),
+ }),
+ elem('Audio', 'e1', {
+ elem('SamplingFrequency', 'b5', 'float'),
+ elem('OutputSamplingFrequency', '78b5', 'float'),
+ elem('Channels', '9f', 'uint'),
+ elem('BitDepth', '6264', 'uint'),
+ }),
+ elem('ContentEncodings', '6d80', {
+ elem('ContentEncoding*', '6240', {
+ elem('ContentEncodingOrder', '5031', 'uint'),
+ elem('ContentEncodingScope', '5032', 'uint'),
+ elem('ContentEncodingType', '5033', 'uint'),
+ elem('ContentCompression', '5034', {
+ elem('ContentCompAlgo', '4254', 'uint'),
+ elem('ContentCompSettings', '4255', 'binary'),
+ }),
+ }),
+ }),
+ }),
+ }),
+
+ elem('Cues', '1c53bb6b', {
+ elem('CuePoint*', 'bb', {
+ elem('CueTime', 'b3', 'uint'),
+ elem('CueTrackPositions*', 'b7', {
+ elem('CueTrack', 'f7', 'uint'),
+ elem('CueClusterPosition', 'f1', 'uint'),
+ }),
+ }),
+ }),
+
+ elem('Attachments', '1941a469', {
+ elem('AttachedFile*', '61a7', {
+ elem('FileDescription', '467e', 'str'),
+ elem('FileName', '466e', 'str'),
+ elem('FileMimeType', '4660', 'str'),
+ elem('FileData', '465c', 'binary'),
+ elem('FileUID', '46ae', 'uint'),
+ }),
+ }),
+
+ elem('Chapters', '1043a770', {
+ elem('EditionEntry*', '45b9', {
+ elem('EditionUID', '45bc', 'uint'),
+ elem('EditionFlagHidden', '45bd', 'uint'),
+ elem('EditionFlagDefault', '45db', 'uint'),
+ elem('EditionFlagOrdered', '45dd', 'uint'),
+ elem('ChapterAtom*', 'b6', {
+ elem('ChapterUID', '73c4', 'uint'),
+ elem('ChapterTimeStart', '91', 'uint'),
+ elem('ChapterTimeEnd', '92', 'uint'),
+ elem('ChapterFlagHidden', '98', 'uint'),
+ elem('ChapterFlagEnabled', '4598', 'uint'),
+ elem('ChapterSegmentUID', '6e67', 'binary'),
+ elem('ChapterSegmentEditionUID', '6ebc', 'uint'),
+ elem('ChapterDisplay*', '80', {
+ elem('ChapString', '85', 'str'),
+ elem('ChapLanguage*', '437c', 'str'),
+ elem('ChapCountry*', '437e', 'str'),
+ }),
+ }),
+ }),
+ }),
+ elem('Tags*', '1254c367', {
+ elem('Tag*', '7373', {
+ elem('Targets', '63c0', {
+ elem('TargetTypeValue', '68ca', 'uint'),
+ elem('TargetTrackUID', '63c5', 'uint'),
+ elem('TargetEditionUID', '63c9', 'uint'),
+ elem('TargetChapterUID', '63c4', 'uint'),
+ elem('TargetAttachmentUID', '63c6', 'uint'),
+ }),
+ elem('SimpleTag*', '67c8', {
+ elem('TagName', '45a3', 'str'),
+ elem('TagLanguage', '447a', 'str'),
+ elem('TagString', '4487', 'str'),
+ }),
+ }),
+ }),
+ }),
+ );
+}
+
+1;
diff --git a/TOOLS/lib/Parse/Matroska/Element.pm b/TOOLS/lib/Parse/Matroska/Element.pm
new file mode 100644
index 0000000000..fa0830c11e
--- /dev/null
+++ b/TOOLS/lib/Parse/Matroska/Element.pm
@@ -0,0 +1,331 @@
+use 5.008;
+use strict;
+use warnings;
+
+# ABSTRACT: a mid-level representation of an EBML element
+package Parse::Matroska::Element;
+
+use Carp;
+use List::Util qw{first};
+
+=head1 SYNOPSIS
+
+ use Parse::Matroska::Reader;
+ my $reader = Parse::Matroska::Reader->new($path);
+ my $elem = $reader->read_element;
+
+ print "ID: $elem->{elid}\n";
+ print "Name: $elem->{name}\n";
+ print "Length: $elem->{content_len}\n";
+ print "Type: $elem->{type}\n";
+ print "Child count: ", scalar(@{$elem->all_children}), "\n";
+ if ($elem->{type} eq 'sub') {
+ while (my $chld = $elem->next_child) {
+ print "Child Name: $chld->{name}\n";
+ }
+ } else {
+ print "Value: ", $elem->get_value, "\n";
+ }
+
+=head1 DESCRIPTION
+
+Represents a single Matroska element as decoded by
+L<Parse::Matroska::Reader>. This is essentially a hash
+augmented with functions for delay-loading of binary
+values and children elements.
+
+=head1 NOTE
+
+The API of this module is not yet considered stable.
+
+=attr elid
+
+The EBML Element ID, suitable for passing to
+L<Parse::Matroska::Definitions/elem_by_hexid>.
+
+=attr name
+
+The EBML Element's name.
+
+=attr type
+
+The EBML Element's type. Can be C<uint>, C<sint>,
+C<float>, C<ebml_id>, C<str> or C<binary>. See L</value>
+for details.
+
+Equivalent to
+C<elem_by_hexid($elem-E<gt>{value})-E<gt>{valtype}>.
+
+=attr value
+
+The EBML Element's value. Should be obtained through
+L</get_value>.
+
+Is an unicode string if the L</type> is C<str>, that is,
+the string has already been decoded by L<Encode/decode>.
+
+Is C<undef> if the L</type> is C<binary> and the contents
+were delay-loaded and not yet read. L</get_value> will
+do the delayed load if needed.
+
+Is an arrayref if the L</type> is C<sub>, containing
+the children nodes that were already loaded.
+
+Is a hashref if the L</type> is C<ebml_id>, containing
+the referred element's information as defined in
+L<Parse::Matroska::Definitions>. Calling
+C<elem_by_hexid($elem-E<gt>{value}-E<gt>{elid})> will
+return the same object as $elem->{value}.
+
+=attr full_len
+
+The entire length of this EBML Element, including
+the header's.
+
+=attr size_len
+
+The length of the size marker. Used when calculating
+L</full_len> from L</content_len>
+
+=attr content_len
+
+The length of the contents of this EBML Element,
+which excludes the header.
+
+=attr reader
+
+A weakened reference to the associated
+L<Parse::Matroska::Reader>.
+
+=method new(%hash)
+
+Creates a new Element initialized with the hash
+given as argument.
+
+=cut
+sub new {
+ my $class = shift;
+ my $self = {};
+ bless $self, $class;
+
+ $self->initialize(@_);
+ return $self;
+}
+
+=method initialize(%hash)
+
+Called by L</new> on initialization.
+
+=cut
+sub initialize {
+ my ($self, %args) = @_;
+ for (keys %args) {
+ $self->{$_} = $args{$_};
+ }
+ $self->{depth} = 0 unless $self->{depth};
+}
+
+=method skip
+
+Called by the user to ignore the contents of this EBML node.
+Needed when ignoring the children of a node.
+
+=cut
+sub skip {
+ my ($self) = @_;
+ my $reader = $self->{reader};
+ return unless $reader; # we don't have to skip if there's no reader
+ my $pos = $reader->getpos;
+ croak "Too late to skip, reads were already done"
+ if $pos ne $self->{data_pos};
+ $reader->skip($self->{content_len});
+}
+
+=method get_value($keep_bin)
+
+Returns the value contained by this EBML element.
+
+If the element has children, returns an arrayref to
+the children elements that were already encountered.
+
+If the element's type is C<binary> and the value was
+delay-loaded, does the reading now.
+
+If $keep_bin is true, the delay-loaded data is kept
+as the L</value>, otherwise, further calls to
+C<get_value> will reread the data from the L</reader>.
+
+=cut
+sub get_value {
+ my ($self, $keep_bin) = @_;
+
+ return undef if $self->{type} eq 'skip';
+ return $self->{value} if $self->{value};
+
+ my $reader = $self->{reader} or
+ croak "The associated Reader has been deleted";
+
+ # delay-loaded 'binary'
+ if ($self->{type} eq 'binary') {
+ croak "Cannot seek in the current Reader" unless $self->{data_pos};
+ # seek to the data position...
+ $reader->setpos($self->{data_pos});
+ # read the data, keeping it in value if requested
+ if ($keep_bin) {
+ $self->{value} = $reader->readlen($self->{content_len});
+ return $self->{value};
+ } else {
+ return $reader->readlen($self->{content_len});
+ }
+ }
+}
+
+=method next_child($read_bin)
+
+Builtin iterator; reads and returns the next child element.
+Always returns undef if the type isn't C<sub>.
+
+Returns undef at the end of the iterator and resets itself to
+point to the first element; so calling L</next_child($read_bin)>
+after the iterator returned C<undef> will return the first child.
+
+The optional C<$read_bin> parameter has the children elements
+not delay-load their value if their type is C<binary>.
+
+If all children elements have already been read, return
+each element in-order as would be given by
+L</all_children($recurse,$read_bin)>.
+
+=cut
+sub next_child {
+ my ($self, $read_bin) = @_;
+ return unless $self->{type} eq 'sub';
+
+ if ($self->{_all_children_read}) {
+ my $idx = $self->{_last_child} ||= 0;
+ if ($idx == @{$self->{value}}) {
+ # reset the iterator, returning undef once
+ $self->{_last_child} = 0;
+ return;
+ }
+ my $ret = $self->{value}->[$idx];
+
+ ++$idx;
+ $self->{_last_child} = $idx;
+ return $ret;
+ }
+
+ my $len = defined $self->{remaining_len}
+ ? $self->{remaining_len}
+ : $self->{content_len};
+
+ if ($len == 0) {
+ # we've read all children; switch into $self->{value} iteration mode
+ $self->{_all_children_read} = 1;
+ # return undef since the iterator will reset
+ return;
+ }
+
+ $self->{pos_offset} ||= 0;
+ my $pos = $self->{data_pos};
+ my $reader = $self->{reader} or croak "The associated reader has been deleted";
+ $reader->setpos($pos);
+ $reader->{fh}->seek($self->{pos_offset}, 1) if $pos;
+
+ my $chld = $reader->read_element($read_bin);
+ return undef unless defined $chld;
+ $self->{pos_offset} += $chld->{full_len};
+
+ $self->{remaining_len} = $len - $chld->{full_len};
+
+ if ($self->{remaining_len} < 0) {
+ croak "Child elements consumed $self->{remaining_len} more bytes than parent $self->{name} contained";
+ }
+
+ $chld->{depth} = $self->{depth} + 1;
+ $self->{value} ||= [];
+
+ push @{$self->{value}}, $chld;
+
+ return $chld;
+}
+
+=method all_children($recurse,$read_bin)
+
+Calls L</populate_children($recurse,$read_bin)> on self
+and returns an arrayref with the children nodes.
+
+Both C<$recurse> and C<$read_bin> are optional and default
+to false.
+
+=cut
+sub all_children {
+ my ($self, $recurse, $read_bin) = @_;
+ $self->populate_children($recurse, $read_bin);
+ return $self->{value};
+}
+
+=method children_by_name($name)
+
+Searches in the already read children elements for all
+elements with the EBML name C<$name>. Returns an array
+containing all found elements. On scalar context,
+returns only the first element found.
+
+Croaks if the element's C<type> isn't C<sub>.
+
+=cut
+sub children_by_name {
+ my ($self, $name) = @_;
+ return unless defined wantarray; # don't do work if work isn't wanted
+ croak "Element can't have children" unless $self->{type} eq 'sub';
+
+ my @found = grep { $_->{name} eq $name } @{$self->{value}};
+ return @found if wantarray; # list
+ return shift @found if defined wantarray; # scalar
+}
+
+=method populate_children($recurse,$read_bin)
+
+Populates the internal array of children elements, that is,
+requests that the associated L<Matroska::Parser::Reader> reads
+all children elements. Returns itself.
+
+Returns false if the element's C<type> isn't C<sub>.
+
+If C<$recurse> is provided and is true, the method will call
+itself in the children elements with the same parameters it
+received; this will build a full EBML tree.
+
+If C<$read_bin> is provided and is true, disables delay-loading
+of the contents of C<binary>-type nodes, reading the contents
+to memory.
+
+If both C<$recurse> and C<$read_bin> are true, entire EBML trees
+can be loaded without requiring seeks, thus behaving correctly
+on unseekable streams. If C<$read_bin> is false, the entire EBML
+tree is still loaded, but calling L</get_value> on C<binary>-type
+nodes will produce an error on unseekable streams.
+
+=cut
+sub populate_children {
+ my ($self, $recurse, $read_bin) = @_;
+
+ return unless $self->{type} eq 'sub';
+
+ if (@{$self->{value}} && $recurse) {
+ # only recurse
+ foreach (@{$self->{value}}) {
+ $_->populate_children($recurse, $read_bin);
+ }
+ return $self;
+ }
+
+ while (my $chld = $self->next_child($read_bin)) {
+ $chld->populate_children($recurse, $read_bin) if $recurse;
+ }
+
+ return $self;
+}
+
+1;
diff --git a/TOOLS/lib/Parse/Matroska/Reader.pm b/TOOLS/lib/Parse/Matroska/Reader.pm
new file mode 100644
index 0000000000..47e67ce5f7
--- /dev/null
+++ b/TOOLS/lib/Parse/Matroska/Reader.pm
@@ -0,0 +1,423 @@
+use 5.008;
+use strict;
+use warnings;
+
+# ABSTRACT: a low-level reader for EBML files
+package Parse::Matroska::Reader;
+
+use Parse::Matroska::Definitions qw{elem_by_hexid};
+use Parse::Matroska::Element;
+
+use Carp;
+use Scalar::Util qw{openhandle weaken};
+use IO::Handle;
+use IO::File;
+use List::Util qw{first};
+use Encode;
+
+use constant BIGINT_TRY => 'Pari,GMP,FastCalc';
+use Math::BigInt try => BIGINT_TRY;
+use Math::BigRat try => BIGINT_TRY;
+
+=head1 SYNOPSIS
+
+ use Parse::Matroska::Reader;
+ my $reader = Parse::Matroska::Reader->new($path);
+ $reader->close;
+ $reader->open(\$string_with_matroska_data);
+
+ my $elem = $reader->read_element;
+ print "Element ID: $elem->{elid}\n";
+ print "Element name: $elem->{name}\n";
+ if ($elem->{type} ne 'sub') {
+ print "Element value: $elem->get_value\n";
+ } else {
+ while (my $child = $elem->next_child) {
+ print "Child element: $child->{name}\n";
+ }
+ }
+ $reader->close;
+
+=head1 DESCRIPTION
+
+Reads EBML data, which is used in Matroska files.
+This is a low-level reader which is meant to be used as a backend
+for higher level readers. TODO: write the high level readers :)
+
+=head1 NOTE
+
+The API of this module is not yet considered stable.
+
+=method new
+
+Creates a new reader.
+Calls L</open($arg)> with its arguments if provided.
+
+=cut
+sub new {
+ my $class = shift;
+ my $self = {};
+ bless $self, $class;
+
+ $self->open(@_) if @_;
+ return $self;
+}
+
+=method open($arg)
+
+Creates the internal filehandle. The argument can be:
+
+=for :list
+* An open filehandle or L<IO::Handle> object.
+The filehandle is not C<dup()>ed, so calling L</close> in this
+object will close the given filehandle as well.
+* A scalar containing a path to a file.
+* On perl v5.14 or newer, a scalarref pointing to EBML data.
+For similar functionality in older perls, give an L<IO::String> object
+or the handle to an already C<open>ed scalarref.
+
+=cut
+sub open {
+ my ($self, $arg) = @_;
+ $self->{fh} = openhandle($arg) || IO::File->new($arg, "<:raw")
+ or croak "Can't open $arg: $!";
+}
+
+=method close
+
+Closes the internal filehandle.
+
+=cut
+sub close {
+ my ($self) = @_;
+ $self->{fh}->close;
+ delete $self->{fh};
+}
+
+# equivalent to $self->readlen(1), possibly faster
+sub _getc {
+ my ($self) = @_;
+ my $c = $self->{fh}->getc;
+ croak "Can't do read of length 1: $!" if !defined $c && $!;
+ return $c;
+}
+
+=method readlen($length)
+
+Reads C<$length> bytes from the internal filehandle.
+
+=cut
+sub readlen {
+ my ($self, $len) = @_;
+ my $data;
+ my $readlen = $self->{fh}->read($data, $len);
+ croak "Can't do read of length $len: $!"
+ unless defined $readlen;
+ return $data;
+}
+
+# converts a byte string into an integer
+# we do so by converting the integer into a hex string (big-endian)
+# and then reading the hex-string into an integer
+sub _bin2int($) {
+ my ($bin) = @_;
+ # if the length is larger than 3
+ # the resulting integer might be larger than INT_MAX
+ if (length($bin) > 3) {
+ return Math::BigInt->from_hex(unpack("H*", $bin));
+ }
+ return hex(unpack("H*", $bin));
+}
+
+# creates a floating-point number with the given mantissa and exponent
+sub _ldexp {
+ my ($mantissa, $exponent) = @_;
+ return $mantissa * Math::BigRat->new(2)**$exponent;
+}
+
+# NOTE: the read_* functions are hard to read because they're ports
+# of even harder to read python functions.
+# TODO: make them readable
+
+=method read_id
+
+Reads an EBML ID atom in hexadecimal string format, suitable
+for passing to L<Parse::Matroska::Definitions/elem_by_hexid($id)>.
+
+=cut
+sub read_id {
+ my ($self) = @_;
+ my $t = $self->_getc;
+ return undef unless defined $t;
+ my $i = 0;
+ my $mask = 1<<7;
+
+ if (ord($t) == 0) {
+ croak "Matroska Syntax error: first byte of ID was \\0"
+ }
+ until (ord($t) & $mask) {
+ ++$i;
+ $mask >>= 1;
+ }
+ # return hex string of the bytes we just read
+ return unpack "H*", ($t . $self->readlen($i));
+}
+
+=method read_size
+
+Reads an EBML Data Size atom, which immediately follows
+an EBML ID atom.
+
+This returns an array consisting of:
+
+=for :list
+0. The length of the Data Size atom.
+1. The value encoded in the Data Size atom, which is the length of all the data following it.
+
+=cut
+sub read_size {
+ my ($self) = @_;
+ my $t = $self->_getc;
+ my $i = 0;
+ my $mask = 1<<7;
+
+ if (ord($t) == 0) {
+ croak "Matroska Syntax error: first byte of data size was \\0"
+ }
+ until (ord($t) & $mask) {
+ ++$i;
+ $mask >>= 1;
+ }
+ $t = $t & chr($mask-1); # strip length bits (keep only significant bits)
+ return ($i+1, _bin2int $t . $self->readlen($i));
+}
+
+=method read_str($length)
+
+Reads a string of length C<$length> bytes from the internal filehandle.
+The string is already L<Encode/decode>d from C<UTF-8>, which is the
+standard Matroska string encoding.
+
+=cut
+{
+ my $utf8 = find_encoding("UTF-8");
+ sub read_str {
+ my ($self, $length) = @_;
+ return $utf8->decode($self->readlen($length));
+ }
+}
+
+=method read_uint($length)
+
+Reads an unsigned integer of length C<$length> bytes
+from the internal filehandle.
+
+Returns a L<Math::BigInt> object if C<$length> is greater
+than 4.
+
+=cut
+sub read_uint {
+ my ($self, $length) = @_;
+ return _bin2int $self->readlen($length);
+}
+
+=method read_sint($length)
+
+Reads a signed integer of length C<$length> bytes
+from the internal filehandle.
+
+Returns a L<Math::BigInt> object if C<$length> is greater
+than 4.
+
+=cut
+sub read_sint {
+ my ($self, $length) = @_;
+ my $i = $self->read_uint($length);
+
+ # Apply 2's complement to the unsigned int
+ my $mask = int(2 ** ($length * 8 - 1));
+ # if the most significant bit is set...
+ if ($i & $mask) {
+ # subtract the MSB twice
+ $i -= 2 * $mask;
+ }
+ return $i;
+}
+
+=method read_float($length)
+
+Reads an IEEE floating point number of length C<$length>
+bytes from the internal filehandle.
+
+Only lengths C<4> and C<8> are supported (C C<float> and C<double>).
+
+=cut
+sub read_float {
+ my ($self, $length) = @_;
+ my $i = $self->read_uint($length);
+ my $f;
+
+ use bigrat try => BIGINT_TRY;
+
+ # These evil expressions reinterpret an unsigned int as IEEE binary floats
+ if ($length == 4) {
+ $f = _ldexp(($i & (1<<23 - 1)) + (1<<23), ($i>>23 & (1<<8 - 1)) - 150);
+ $f = -$f if $i & (1<<31);
+ } elsif ($length == 8) {
+ $f = _ldexp(($i & (1<<52 - 1)) + (1<<52), ($i>>52 & (1<<12 - 1)) - 1075);
+ $f = -$f if $i & (1<<63);
+ } else {
+ croak "Matroska Syntax error: unsupported IEEE float byte size $length";
+ }
+
+ return $f;
+}
+
+=method read_ebml_id($length)
+
+Reads an EBML ID when it's encoded as the data inside another
+EBML element, that is, when the enclosing element's C<type> is
+C<ebml_id>.
+
+This returns a hashref with the EBML element description as
+defined in L<Parse::Matroska::Definitions>.
+
+=cut
+sub read_ebml_id {
+ my ($self, $length) = @_;
+ return elem_by_hexid(unpack("H*", $self->readlen($length)));
+}
+
+=method skip($length)
+
+Skips C<$length> bytes in the internal filehandle.
+
+=cut
+sub skip {
+ my ($self, $len) = @_;
+ return if $self->{fh}->can('seek') && $self->{fh}->seek($len, 1);
+ $self->readlen($len);
+ return;
+}
+
+=method getpos
+
+Wrapper for L<IO::Seekable/$io-E<gt>getpos> in the internal filehandle.
+
+Returns undef if the internal filehandle can't C<getpos>.
+
+=cut
+sub getpos {
+ my ($self) = @_;
+ return undef unless $self->{fh}->can('getpos');
+ return $self->{fh}->getpos;
+}
+
+=method setpos($pos)
+
+Wrapper for L<IO::Seekable/$io-E<gt>setpos> in the internal filehandle.
+
+Returns C<undef> if the internal filehandle can't C<setpos>.
+
+Croaks if C<setpos> does not seek to the requested position,
+that is, if calling C<getpos> does not yield the same object
+as the C<$pos> argument.
+
+=cut
+sub setpos {
+ my ($self, $pos) = @_;
+ return undef unless $pos && $self->{fh}->can('setpos');
+
+ my $ret = $self->{fh}->setpos($pos);
+ croak "Cannot seek to correct position"
+ unless $self->getpos eq $pos;
+ return $ret;
+}
+
+=method read_element($read_bin)
+
+Reads a full EBML element from the internal filehandle.
+
+Returns a L<Parse::Matroska::Element> object initialized with
+the read data. If C<read_bin> is not present or is false, will
+delay-load the contents of C<binary> type elements, that is,
+they will only be loaded when calling C<get_value> on the
+returned L<Parse::Matroska::Element> object.
+
+Does not read the children of the element if its type is
+C<sub>. Look into the L<Parse::Matroska::Element> interface
+for details in how to read children elements.
+
+Pass a true C<$read_bin> if the stream being read is not
+seekable (C<getpos> is undef) and the contents of C<binary>
+elements is desired, otherwise seeking errors or internal
+filehandle corruption might occur.
+
+=cut
+sub read_element {
+ my ($self, $read_bin) = @_;
+ return undef if $self->{fh}->eof;
+
+ my $elem_pos = $self->getpos;
+
+ my $elid = $self->read_id;
+ my $elem_def = elem_by_hexid($elid);
+ my ($size_len, $content_len) = $self->read_size;
+ my $full_len = length($elid)/2 + $size_len + $content_len;
+
+ my $elem = Parse::Matroska::Element->new(
+ elid => $elid,
+ name => $elem_def && $elem_def->{name},
+ type => $elem_def && $elem_def->{valtype},
+ size_len => $size_len,
+ content_len => $content_len,
+ full_len => $full_len,
+ reader => $self,
+ elem_pos => $elem_pos,
+ data_pos => $self->getpos,
+ );
+ weaken($elem->{reader});
+
+ if (defined $elem_def) {
+ if ($elem->{type} eq 'sub') {
+ $elem->{value} = [];
+ } elsif ($elem->{type} eq 'str') {
+ $elem->{value} = $self->read_str($content_len);
+ } elsif ($elem->{type} eq 'ebml_id') {
+ $elem->{value} = $self->read_ebml_id($content_len);
+ } elsif ($elem->{type} eq 'uint') {
+ $elem->{value} = $self->read_uint($content_len);
+ } elsif ($elem->{type} eq 'sint') {
+ $elem->{value} = $self->read_sint($content_len);
+ } elsif ($elem->{type} eq 'float') {
+ $elem->{value} = $self->read_float($content_len);
+ } elsif ($elem->{type} eq 'skip') {
+ $self->skip($content_len);
+ } elsif ($elem->{type} eq 'binary') {
+ if ($read_bin) {
+ $elem->{value} = $self->readlen($content_len);
+ } else {
+ $self->skip($content_len);
+ }
+ } else {
+ die "Matroska Definition error: type $elem->{valtype} unknown"
+ }
+ } else {
+ $self->skip($content_len);
+ }
+ return $elem;
+}
+
+1;
+
+=head1 CAVEATS
+
+Children elements have to be processed as soon as an element
+with children is found, or their children ignored with
+L<Parse::Matroska::Element/skip>. Not doing so doesn't cause
+errors but results in an invalid structure, with constant C<0>
+depth.
+
+To work correctly in unseekable streams, either the contents
+of C<binary>-type elements has to be ignored or the C<read_bin>
+flag to C<read_element> has to be true.
diff --git a/TOOLS/lib/Parse/Matroska/Utils.pm b/TOOLS/lib/Parse/Matroska/Utils.pm
new file mode 100644
index 0000000000..127d626cb1
--- /dev/null
+++ b/TOOLS/lib/Parse/Matroska/Utils.pm
@@ -0,0 +1,37 @@
+use strict;
+use warnings;
+
+# ABSTRACT: internally-used helper functions
+package Parse::Matroska::Utils;
+
+use Exporter;
+our @ISA = qw{Exporter};
+our @EXPORT_OK = qw{uniq uncamelize};
+
+=method uniq(@array)
+
+The same as L<List::MoreUtils/"uniq LIST">.
+Included to avoid depending on it since it's
+not a core module.
+
+=cut
+sub uniq(@) {
+ my %seen;
+ return grep { !$seen{$_}++ } @_;
+}
+
+=method uncamelize($string)
+
+Converts a "StringLikeTHIS" into a
+"string_like_this".
+
+=cut
+sub uncamelize($) {
+ local $_ = shift;
+ # lc followed by UC: lc_UC
+ s/(?<=[a-z])([A-Z])/_\L$1/g;
+ # UC followed by two lc: _UClclc
+ s/([A-Z])(?=[a-z]{2})/_\L$1/g;
+ # strip leading _ that the second regexp might add; lowercase all
+ s/^_//; lc
+}
diff --git a/TOOLS/matroska.pl b/TOOLS/matroska.pl
new file mode 100755
index 0000000000..3ab06df6f9
--- /dev/null
+++ b/TOOLS/matroska.pl
@@ -0,0 +1,169 @@
+#! /usr/bin/env perl
+
+# Generate C definitions for parsing Matroska files.
+
+use strict;
+use warnings;
+
+use FindBin;
+use lib "$FindBin::Bin/lib";
+use Parse::Matroska::Definitions;
+use Parse::Matroska::Reader;
+
+use Getopt::Long;
+use List::Util qw{max};
+
+my @global_elem_list = @Parse::Matroska::Definitions::global_elem_list;
+
+Getopt::Long::Configure(qw{auto_version auto_help});
+my %opt;
+GetOptions(\%opt,
+ "generate-header",
+ "generate-definitions",
+ "full",
+ );
+
+if ($opt{"generate-header"}) {
+ generate_c_header();
+} elsif ($opt{"generate-definitions"}) {
+ generate_c_definitions();
+} else {
+ for (@ARGV) {
+ my $reader = Parse::Matroska::Reader->new($_ eq '-' ? \*STDIN : $_) or die $!;
+ while (my $elem = $reader->read_element($_ eq '-')) {
+ process_elem($elem, $_ eq '-');
+ }
+ }
+}
+
+# Generate declarations for libmpdemux/ebml_types.h
+sub generate_c_header {
+ print "/* Generated by TOOLS/matroska.pl, do not edit manually */\n\n";
+
+ # Write a #define for the ElementID of each known element
+ for my $el (@global_elem_list) {
+ printf "#define %-40s 0x%s\n", $el->{definename}, $el->{elid};
+ }
+ print "\n";
+
+ # Define a struct for each ElementID that has child elements
+ for my $el (@global_elem_list) {
+ next unless $el->{subelements};
+ print "\nstruct $el->{structname} {\n";
+
+ # Figure out the length of the longest variable name
+ # Used for pretty-printing in the next step
+ my $l = max(map { length $_->{valname} } values %{$el->{subelements}});
+
+ # Output each variable, with pointers for array (multiple) elements
+ for my $subel (values %{$el->{subelements}}) {
+ printf " %-${l}s %s%s;\n",
+ $subel->{valname}, $subel->{multiple}?'*':' ', $subel->{fieldname};
+ }
+ print "\n";
+
+ # Output a counter variable for each element
+ # (presence/absence for scalars, item count for arrays)
+ for my $subel (values %{$el->{subelements}}) {
+ print " int n_$subel->{fieldname};\n"
+ }
+ print "};\n";
+ }
+ print "\n";
+
+ # Output extern references for ebml_elem_desc structs for each of the elements
+ # These are defined by generate_c_definitions
+ for my $el (@global_elem_list) {
+ next unless $el->{subelements};
+ print "extern const struct ebml_elem_desc $el->{structname}_desc;\n";
+ }
+ print "\n";
+
+ # Output the max number of sub-elements a known element might have
+ printf "#define MAX_EBML_SUBELEMENTS %d\n",
+ max(map { scalar keys %{$_->{subelements}} }
+ grep { $_->{subelements} } @global_elem_list);
+}
+
+# Generate definitions for libmpdemux/ebml_defs.c
+sub generate_c_definitions {
+ print "/* Generated by TOOLS/matroska.pl, do not edit manually */\n\n";
+ # ebml_defs.c uses macros declared in ebml.c
+ for my $el (@global_elem_list) {
+ print "\n";
+ if ($el->{subelements}) {
+ # set N for the next macros
+ print "#define N $el->{fieldname}\n";
+
+ # define a struct ebml_$N_desc and gets ready to define fields
+ # this secretly opens two scopes; hence the }}; at the end
+ print "E_S(\"$el->{name}\", ".scalar(keys %{$el->{subelements}}).")\n";
+
+ # define a field for each subelement
+ # also does lots of macro magic, but doesn't open a scope
+ for my $subel (values %{$el->{subelements}}) {
+ print "F($subel->{definename}, $subel->{fieldname}, ".
+ ($subel->{multiple}?'1':'0').")\n";
+ }
+ # close the struct
+ print "}};\n";
+
+ # unset N since we've used it
+ print "#undef N\n";
+ } else {
+ print "E(\"$el->{name}\", $el->{fieldname}, $el->{ebmltype})\n";
+ }
+ }
+}
+
+sub repr {
+ my @ret;
+ foreach (@_) {
+ if (/'/) {
+ s/"/\\"/g;
+ push @ret, "\"$_\"";
+ } else {
+ push @ret, "'$_'";
+ }
+ }
+ return @ret if wantarray;
+ return pop @ret if defined wantarray;
+ return;
+}
+
+sub process_elem {
+ my ($elem, $read_bin) = @_;
+ unless ($opt{full}) {
+ if ($elem->{name} eq 'Cluster' || $elem->{name} eq 'Cues') {
+ $elem->skip;
+ return;
+ }
+ }
+ die unless $elem;
+
+ if ($elem->{type} ne 'skip') {
+ print "$elem->{depth} $elem->{elid} $elem->{name} size: $elem->{content_len} value: ";
+ }
+
+ if ($elem->{type} eq 'sub') {
+ print "subelements:\n";
+ while (my $chld = $elem->next_child($read_bin)) {
+ process_elem($chld);
+ }
+ } elsif ($elem->{type} eq 'binary') {
+ my $t = "<skipped $elem->{content_len} bytes>";
+ if ($elem->{content_len} < 20) {
+ $t = unpack "H*", $elem->get_value;
+ }
+ print "binary $t\n";
+ delete $elem->{value};
+ } elsif ($elem->{type} eq 'ebml_id') {
+ print "binary $elem->{value}->{elid} (".($elem->{value}->{name}||"UNKNOWN").")\n";
+ } elsif ($elem->{type} eq 'skip') {
+ # skip
+ } elsif ($elem->{type} eq 'str') {
+ print "string ". repr($elem->get_value) . "\n";
+ } else {
+ print "$elem->{type} ". $elem->get_value ."\n";
+ }
+} \ No newline at end of file
diff --git a/TOOLS/matroska.py b/TOOLS/matroska.py
deleted file mode 100755
index 848b033cbd..0000000000
--- a/TOOLS/matroska.py
+++ /dev/null
@@ -1,429 +0,0 @@
-#!/usr/bin/env python
-"""
-Generate C definitions for parsing Matroska files.
-Can also be used to directly parse Matroska files and display their contents.
-"""
-
-#
-# This file is part of MPlayer.
-#
-# MPlayer is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# MPlayer is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with MPlayer; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-#
-
-# for compatibility with Python 2.x
-from __future__ import print_function
-
-elements_ebml = (
- 'EBML, 1a45dfa3, sub', (
- 'EBMLVersion, 4286, uint',
- 'EBMLReadVersion, 42f7, uint',
- 'EBMLMaxIDLength, 42f2, uint',
- 'EBMLMaxSizeLength, 42f3, uint',
- 'DocType, 4282, str',
- 'DocTypeVersion, 4287, uint',
- 'DocTypeReadVersion, 4285, uint',
- ),
-
- 'CRC32, bf, binary',
- 'Void, ec, binary',
-)
-
-elements_matroska = (
- 'Segment, 18538067, sub', (
-
- 'SeekHead*, 114d9b74, sub', (
- 'Seek*, 4dbb, sub', (
- 'SeekID, 53ab, ebml_id',
- 'SeekPosition, 53ac, uint',
- ),
- ),
-
- 'Info*, 1549a966, sub', (
- 'SegmentUID, 73a4, binary',
- 'PrevUID, 3cb923, binary',
- 'NextUID, 3eb923, binary',
- 'TimecodeScale, 2ad7b1, uint',
- 'DateUTC, 4461, sint',
- 'Title, 7ba9, str',
- 'MuxingApp, 4d80, str',
- 'WritingApp, 5741, str',
- 'Duration, 4489, float',
- ),
-
- 'Cluster*, 1f43b675, sub', (
- 'Timecode, e7, uint',
- 'BlockGroup*, a0, sub', (
- 'Block, a1, binary',
- 'BlockDuration, 9b, uint',
- 'ReferenceBlock*, fb, sint',
- ),
- 'SimpleBlock*, a3, binary',
- ),
-
- 'Tracks*, 1654ae6b, sub', (
- 'TrackEntry*, ae, sub', (
- 'TrackNumber, d7, uint',
- 'TrackUID, 73c5, uint',
- 'TrackType, 83, uint',
- 'FlagEnabled, b9, uint',
- 'FlagDefault, 88, uint',
- 'FlagForced, 55aa, uint',
- 'FlagLacing, 9c, uint',
- 'MinCache, 6de7, uint',
- 'MaxCache, 6df8, uint',
- 'DefaultDuration, 23e383, uint',
- 'TrackTimecodeScale, 23314f, float',
- 'MaxBlockAdditionID, 55ee, uint',
- 'Name, 536e, str',
- 'Language, 22b59c, str',
- 'CodecID, 86, str',
- 'CodecPrivate, 63a2, binary',
- 'CodecName, 258688, str',
- 'CodecDecodeAll, aa, uint',
- 'Video, e0, sub', (
- 'FlagInterlaced, 9a, uint',
- 'PixelWidth, b0, uint',
- 'PixelHeight, ba, uint',
- 'DisplayWidth, 54b0, uint',
- 'DisplayHeight, 54ba, uint',
- 'DisplayUnit, 54b2, uint',
- 'FrameRate, 2383e3, float',
- ),
- 'Audio, e1, sub', (
- 'SamplingFrequency, b5, float',
- 'OutputSamplingFrequency, 78b5, float',
- 'Channels, 9f, uint',
- 'BitDepth, 6264, uint',
- ),
- 'ContentEncodings, 6d80, sub', (
- 'ContentEncoding*, 6240, sub', (
- 'ContentEncodingOrder, 5031, uint',
- 'ContentEncodingScope, 5032, uint',
- 'ContentEncodingType, 5033, uint',
- 'ContentCompression, 5034, sub', (
- 'ContentCompAlgo, 4254, uint',
- 'ContentCompSettings, 4255, binary',
- ),
- ),
- ),
- ),
- ),
-
- 'Cues, 1c53bb6b, sub', (
- 'CuePoint*, bb, sub', (
- 'CueTime, b3, uint',
- 'CueTrackPositions*, b7, sub', (
- 'CueTrack, f7, uint',
- 'CueClusterPosition, f1, uint',
- ),
- ),
- ),
-
- 'Attachments, 1941a469, sub', (
- 'AttachedFile*, 61a7, sub', (
- 'FileDescription, 467e, str',
- 'FileName, 466e, str',
- 'FileMimeType, 4660, str',
- 'FileData, 465c, binary',
- 'FileUID, 46ae, uint',
- ),
- ),
-
- 'Chapters, 1043a770, sub', (
- 'EditionEntry*, 45b9, sub', (
- 'EditionUID, 45bc, uint',
- 'EditionFlagHidden, 45bd, uint',
- 'EditionFlagDefault, 45db, uint',
- 'EditionFlagOrdered, 45dd, uint',
- 'ChapterAtom*, b6, sub', (
- 'ChapterUID, 73c4, uint',
- 'ChapterTimeStart, 91, uint',
- 'ChapterTimeEnd, 92, uint',
- 'ChapterFlagHidden, 98, uint',
- 'ChapterFlagEnabled, 4598, uint',
- 'ChapterSegmentUID, 6e67, binary',
- 'ChapterSegmentEditionUID, 6ebc, uint',
- 'ChapterDisplay*, 80, sub', (
- 'ChapString, 85, str',
- 'ChapLanguage*, 437c, str',
- 'ChapCountry*, 437e, str',
- ),
- ),
- ),
- ),
- 'Tags*, 1254c367, sub', (
- 'Tag*, 7373, sub', (
- 'Targets, 63c0, sub', (
- 'TargetTypeValue, 68ca, uint',
- 'TargetTrackUID, 63c5, uint',
- 'TargetEditionUID, 63c9, uint',
- 'TargetChapterUID, 63c4, uint',
- 'TargetAttachmentUID, 63c6, uint',
- ),
- 'SimpleTag*, 67c8, sub', (
- 'TagName, 45a3, str',
- 'TagLanguage, 447a, str',
- 'TagString, 4487, str'
- ),
- ),
- ),
- ),
-)
-
-
-import sys
-from math import ldexp
-from binascii import hexlify
-
-def byte2num(s):
- return int(hexlify(s), 16)
-
-class EOF(Exception): pass
-
-def camelcase_to_words(name):
- parts = []
- start = 0
- for i in range(1, len(name)):
- if name[i].isupper() and (name[i-1].islower() or
- name[i+1:i+2].islower()):
- parts.append(name[start:i])
- start = i
- parts.append(name[start:])
- return '_'.join(parts).lower()
-
-class MatroskaElement(object):
-
- def __init__(self, name, elid, valtype, namespace):
- self.name = name
- self.definename = '{0}_ID_{1}'.format(namespace, name.upper())
- self.fieldname = camelcase_to_words(name)
- self.structname = 'ebml_' + self.fieldname
- self.elid = elid
- self.valtype = valtype
- if valtype == 'sub':
- self.ebmltype = 'EBML_TYPE_SUBELEMENTS'
- self.valname = 'struct ' + self.structname
- else:
- self.ebmltype = 'EBML_TYPE_' + valtype.upper()
- try:
- self.valname = {'uint': 'uint64_t', 'str': 'struct bstr',
- 'binary': 'struct bstr', 'ebml_id': 'uint32_t',
- 'float': 'double', 'sint': 'int64_t',
- }[valtype]
- except KeyError:
- raise SyntaxError('Unrecognized value type ' + valtype)
- self.subelements = ()
-
- def add_subelements(self, subelements):
- self.subelements = subelements
- self.subids = set(x[0].elid for x in subelements)
-
-elementd = {}
-elementlist = []
-def parse_elems(l, namespace):
- subelements = []
- for el in l:
- if isinstance(el, str):
- name, hexid, eltype = [x.strip() for x in el.split(',')]
- multiple = name.endswith('*')
- name = name.strip('*')
- new = MatroskaElement(name, hexid, eltype, namespace)
- elementd[hexid] = new
- elementlist.append(new)
- subelements.append((new, multiple))
- else:
- new.add_subelements(parse_elems(el, namespace))
- return subelements
-
-parse_elems(elements_ebml, 'EBML')
-parse_elems(elements_matroska, 'MATROSKA')
-
-def generate_C_header():
- print('// Generated by TOOLS/matroska.py, do not edit manually')
- print()
-
- for el in elementlist:
- print('#define {0.definename:40} 0x{0.elid}'.format(el))
-
- print()
-
- for el in reversed(elementlist):
- if not el.subelements:
- continue
- print()
- print('struct {0.structname} {{'.format(el))
- l = max(len(subel.valname) for subel, multiple in el.subelements)+1
- for subel, multiple in el.subelements:
- print(' {e.valname:{l}} {star}{e.fieldname};'.format(
- e=subel, l=l, star=' *'[multiple]))
- print()
- for subel, multiple in el.subelements:
- print(' int n_{0.fieldname};'.format(subel))
- print('};')
-
- for el in elementlist:
- if not el.subelements:
- continue
- print('extern const struct ebml_elem_desc {0.structname}_desc;'.format(
- el))
-
- print()
- print('#define MAX_EBML_SUBELEMENTS', max(len(el.subelements)
- for el in elementlist))
-
-
-
-def generate_C_definitions():
- print('// Generated by TOOLS/matroska.py, do not edit manually')
- print()
- for el in reversed(elementlist):
- print()
- if el.subelements:
- print('#define N', el.fieldname)
- print('E_S("{0}", {1})'.format(el.name, len(el.subelements)))
- for subel, multiple in el.subelements:
- print('F({0.definename}, {0.fieldname}, {1})'.format(
- subel, int(multiple)))
- print('}};')
- print('#undef N')
- else:
- print('E("{0.name}", {0.fieldname}, {0.ebmltype})'.format(el))
-
-def read(s, length):
- t = s.read(length)
- if len(t) != length:
- raise EOF
- return t
-
-def read_id(s):
- t = read(s, 1)
- i = 0
- mask = 128
- if ord(t) == 0:
- raise SyntaxError
- while not ord(t) & mask:
- i += 1
- mask >>= 1
- t += read(s, i)
- return t
-
-def read_vint(s):
- t = read(s, 1)
- i = 0
- mask = 128
- if ord(t) == 0:
- raise SyntaxError
- while not ord(t) & mask:
- i += 1
- mask >>= 1
- t = bytes((ord(t) & (mask - 1),))
- t += read(s, i)
- return i+1, byte2num(t)
-
-def read_str(s, length):
- return read(s, length)
-
-def read_uint(s, length):
- t = read(s, length)
- return byte2num(t)
-
-def read_sint(s, length):
- i = read_uint(s, length)
- mask = 1 << (length * 8 - 1)
- if i & mask:
- i -= 2 * mask
- return i
-
-def read_float(s, length):
- t = read(s, length)
- i = byte2num(t)
- if length == 4:
- f = ldexp((i & 0x7fffff) + (1 << 23), (i >> 23 & 0xff) - 150)
- if i & (1 << 31):
- f = -f
- elif length == 8:
- f = ldexp((i & ((1 << 52) - 1)) + (1 << 52), (i >> 52 & 0x7ff) - 1075)
- if i & (1 << 63):
- f = -f
- else:
- raise SyntaxError
- return f
-
-def parse_one(s, depth, parent, maxlen):
- elid = hexlify(read_id(s)).decode('ascii')
- elem = elementd.get(elid)
- if parent is not None and elid not in parent.subids and elid not in ('ec', 'bf'):
- print('Unexpected:', elid)
- if 1:
- raise NotImplementedError
- size, length = read_vint(s)
- this_length = len(elid) / 2 + size + length
- if elem is not None:
- if elem.valtype != 'skip':
- print(depth, elid, elem.name, 'size:', length, 'value:', end=' ')
- if elem.valtype == 'sub':
- print('subelements:')
- while length > 0:
- length -= parse_one(s, depth + 1, elem, length)
- if length < 0:
- raise SyntaxError
- elif elem.valtype == 'str':
- print('string', repr(read_str(s, length).decode('utf8', 'replace')))
- elif elem.valtype in ('binary', 'ebml_id'):
- t = read_str(s, length)
- dec = ''
- if elem.valtype == 'ebml_id':
- idelem = elementd.get(hexlify(t).decode('ascii'))
- if idelem is None:
- dec = '(UNKNOWN)'
- else:
- dec = '({0.name})'.format(idelem)
- if len(t) < 20:
- t = hexlify(t).decode('ascii')
- else:
- t = '<skipped {0} bytes>'.format(len(t))
- print('binary', t, dec)
- elif elem.valtype == 'uint':
- print('uint', read_uint(s, length))
- elif elem.valtype == 'sint':
- print('sint', read_sint(s, length))
- elif elem.valtype == 'float':
- print('float', read_float(s, length))
- elif elem.valtype == 'skip':
- read(s, length)
- else:
- raise NotImplementedError
- else:
- print(depth, 'Unknown element:', elid, 'size:', length)
- read(s, length)
- return this_length
-
-def parse_toplevel(s):
- parse_one(s, 0, None, 1 << 63)
-
-if sys.argv[1] == '--generate-header':
- generate_C_header()
-elif sys.argv[1] == '--generate-definitions':
- generate_C_definitions()
-else:
- s = open(sys.argv[1], "rb")
- while 1:
- start = s.tell()
- try:
- parse_toplevel(s)
- except EOF:
- if s.tell() != start:
- raise Exception("Unexpected end of file")
- break
diff --git a/TOOLS/vdpau_functions.py b/TOOLS/vdpau_functions.pl
index 85e6f1d942..8bab4e533b 100755
--- a/TOOLS/vdpau_functions.py
+++ b/TOOLS/vdpau_functions.pl
@@ -1,8 +1,39 @@
-#!/usr/bin/env python
+#! /usr/bin/env perl
-# Generate vdpau_template.c
+# Generates vdpau_template.c
-functions = """
+use strict;
+use warnings;
+
+sub camelize($) {
+ my $s = shift;
+ $s =~ s/(?:^|_)([a-z])/\u$1/g;
+ $s;
+}
+
+print <<EOF;
+/* Lists the VDPAU functions used by MPV.
+ * Generated by vdpau_functions.pl.
+ * First argument on each line is the VDPAU function type name,
+ * second is the macro name needed to get the function address,
+ * third is the name MPV uses for the function.
+ */
+
+EOF
+
+while (my $f = <DATA>) {
+ # strip whitespace, ignore anything after a '#'
+ $f =~ /^\s*(.*?)\s*(?:(?<!\\)#.*)?$/;
+ $f = $1;
+ next unless $f; # empty / comment line
+
+ my ($mp_name, $vdpau_name) = split /\s+/, $f;
+ $vdpau_name = camelize $mp_name unless $vdpau_name;
+
+ print "VDP_FUNCTION(Vdp$vdpau_name, VDP_FUNC_ID_\U$mp_name\E, $mp_name)\n";
+}
+
+__DATA__
# get_error_string should be first, because the function lookup loop should
# have it available to print errors for other functions
get_error_string
@@ -41,24 +72,3 @@ video_mixer_set_feature_enables
video_surface_create
video_surface_destroy
video_surface_put_bits_y_cb_cr
-"""
-
-print("""
-/* List the VDPAU functions used by MPlayer.
- * Generated by vdpau_functions.py.
- * First argument on each line is the VDPAU function type name,
- * second macro name needed to get function address,
- * third name MPlayer uses for the function.
- */
-""")
-for line in functions.splitlines():
- parts = line.split('#')[0].strip().split()
- if not parts:
- continue # empty/comment line
- if len(parts) > 1:
- mp_name, vdpau_name = parts
- else:
- mp_name = parts[0]
- vdpau_name = ''.join(part.capitalize() for part in mp_name.split('_'))
- macro_name = mp_name.upper()
- print('VDP_FUNCTION(Vdp%s, VDP_FUNC_ID_%s, %s)' % (vdpau_name, macro_name, mp_name))
diff --git a/configure b/configure
index b0c3a53932..f36fff9fc5 100755
--- a/configure
+++ b/configure
@@ -1029,8 +1029,8 @@ echocheck "working compiler"
cflag_check "" || die "Compiler is not functioning correctly. Check your installation and custom CFLAGS $CFLAGS ."
echo "yes"
-echocheck "python"
-command_check python -c '' || die "Python is not functioning correctly. Check your installation and PATH."
+echocheck "perl"
+command_check perl -Mv5.8 -e';' || die "Perl is not functioning correctly or is ancient. Install the latest perl available."
echo yes
if test -z "$_target" && x86 ; then