diff options
Diffstat (limited to 'TOOLS')
-rwxr-xr-x | TOOLS/file2string.pl | 24 | ||||
-rwxr-xr-x | TOOLS/file2string.py | 27 | ||||
-rw-r--r-- | TOOLS/lib/Parse/Matroska.pm | 30 | ||||
-rw-r--r-- | TOOLS/lib/Parse/Matroska/Definitions.pm | 384 | ||||
-rw-r--r-- | TOOLS/lib/Parse/Matroska/Element.pm | 331 | ||||
-rw-r--r-- | TOOLS/lib/Parse/Matroska/Reader.pm | 426 | ||||
-rw-r--r-- | TOOLS/lib/Parse/Matroska/Utils.pm | 37 | ||||
-rwxr-xr-x | TOOLS/matroska.pl | 169 | ||||
-rwxr-xr-x | TOOLS/matroska.py | 463 |
9 files changed, 490 insertions, 1401 deletions
diff --git a/TOOLS/file2string.pl b/TOOLS/file2string.pl deleted file mode 100755 index 341bb06fd6..0000000000 --- a/TOOLS/file2string.pl +++ /dev/null @@ -1,24 +0,0 @@ -#! /usr/bin/env perl - -use strict; -use warnings; - -# Convert the contents of a file into a C string constant. -# Note that the compiler will implicitly add an extra 0 byte at the end -# of every string, so code using the string may need to remove that to get -# the exact contents of the original file. -# FIXME: why not a char array? - -# treat only alphanumeric and punctuations (excluding " and ?) as safe -my $unsafe_chars = qr{[^][A-Za-z0-9!#%&'()*+,./:;<=>^_{|}~ -]}; - -for my $file (@ARGV) { - open my $fh, '<:raw', $file or next; - print "/* Generated from $file */\n"; - while (<$fh>) { - # replace unsafe chars with their equivalent octal escapes - s/($unsafe_chars)/\\@{[sprintf '%03o', ord($1)]}/gos; - print "\"$_\"\n" - } - close $fh; -} diff --git a/TOOLS/file2string.py b/TOOLS/file2string.py new file mode 100755 index 0000000000..6cdd1a72ae --- /dev/null +++ b/TOOLS/file2string.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python + +# Convert the contents of a file into a C string constant. +# Note that the compiler will implicitly add an extra 0 byte at the end +# of every string, so code using the string may need to remove that to get +# the exact contents of the original file. + +import sys + +# Indexing a byte string yields int on Python 3.x, and a str on Python 2.x +def pord(c): + return ord(c) if type(c) == str else c + +def main(infile): + conv = ['\\' + ("%03o" % c) for c in range(256)] + safe_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" \ + "0123456789!#%&'()*+,-./:;<=>?[]^_{|}~ " + for c in safe_chars: + conv[ord(c)] = c + for c, esc in ("\nn", "\tt", r"\\", '""'): + conv[ord(c)] = '\\' + esc + for line in infile: + sys.stdout.write('"' + ''.join(conv[pord(c)] for c in line) + '"\n') + +with open(sys.argv[1], 'rb') as infile: + sys.stdout.write("// Generated from %s\n\n" % sys.argv[1]) + main(infile) diff --git a/TOOLS/lib/Parse/Matroska.pm b/TOOLS/lib/Parse/Matroska.pm deleted file mode 100644 index e1c08c9814..0000000000 --- a/TOOLS/lib/Parse/Matroska.pm +++ /dev/null @@ -1,30 +0,0 @@ -use 5.008; -use strict; -use warnings; - -# ABSTRACT: Module collection to parse Matroska files. -package Parse::Matroska; - -=head1 DESCRIPTION - -C<use>s L<Parse::Matroska::Reader>. See the documentation -of the modules mentioned in L</"SEE ALSO"> for more information -in how to use this module. - -It's intended for this module to contain high-level interfaces -to the other modules in the distribution. - -=head1 SOURCE CODE - -L<https://github.com/Kovensky/Parse-Matroska> - -=head1 SEE ALSO - -L<Parse::Matroska::Reader>, L<Parse::Matroska::Element>, -L<Parse::Matroska::Definitions>. - -=cut - -use Parse::Matroska::Reader; - -1; diff --git a/TOOLS/lib/Parse/Matroska/Definitions.pm b/TOOLS/lib/Parse/Matroska/Definitions.pm deleted file mode 100644 index 5a5adcd6de..0000000000 --- a/TOOLS/lib/Parse/Matroska/Definitions.pm +++ /dev/null @@ -1,384 +0,0 @@ -use 5.008; -use strict; -use warnings; - -# ABSTRACT: internal EBML grammar definitions -package Parse::Matroska::Definitions; - -use Parse::Matroska::Utils qw{uniq uncamelize}; - -use Exporter; -our @ISA = qw{Exporter}; -our @EXPORT_OK = qw{elem_by_hexid %EBML_DEFINITION %MATROSKA_DEFINITION}; - -=head1 SYNOPSIS - - use Parse::Matroska::Definitions qw{elem_by_hexid}; - my $ebml_id = elem_by_hexid('1a45dfa3'); - print "EBML ID $ebml_id->{elid}'s name: $ebml_id->{name}"; - -=head1 DESCRIPTION - -Contains the definition of the EBML grammar as expected in -Matroska files. This module is meant mostly for internal use. - -As this was extended from a script in mpv-player, some data -generated is apparently useless for regular module users -but is still relevant to the mpv-player script. Such data -is annotated as being for mpv compatibility. - -=head1 NOTE - -The API of this module is not yet considered stable. - -=head1 GLOBALS - -These global variables are considered B<immutable>. - -=head2 @Parse::Matroska::Definitions::global_elem_list - -A global list of known matroska elements. Useful for -mpv's matroska script, used for generating C headers -that parse matroska. - -=head2 %Parse::Matroska::Definitions::global_elem_dict - -A global hash of known matroska elements. Used internally -by L</elem_by_hexid($id)>. - -=cut - -@Parse::Matroska::Definitions::global_elem_list = (); -%Parse::Matroska::Definitions::global_elem_dict = (); - -=head2 %EBML_DEFINITION - -Optionally-importable hash of known EBML IDs belonging -to the EBML generic grammar. - -=head2 %MATROSKA_DEFINITION - -Optionally-importable hash of known EBML IDs belonging -to the Matroska-specific grammar. - -=cut - -our %EBML_DEFINITION = define_ebml(); -our %MATROSKA_DEFINITION = define_matroska(); - -=method elem_by_hexid($id) - -Returns an EBML Element Definition corresponding to the provided -hexadecimal string. Returns C<undef> if the element is unknown. - -=cut -sub elem_by_hexid { - my ($elid) = @_; - return $Parse::Matroska::Definitions::global_elem_dict{$elid}; -} - -################################################ -### Helper functions for document definition ### -################################################ - -# used by elem when setting the 'valname' key -use constant TYPE_MAP => { - uint => 'uint64_t', - str => 'char *', - binary => 'struct bstr', - ebml_id => 'uint32_t', - float => 'double', - sint => 'int64_t', -}; - -# this will be localized to "MATROSKA" or "EBML" on the elem declarations -our $ELEM_DEFINE_TYPE = undef; - -=method elem($name,$elid,$valtype) - -NOTE: never call this function yourself; it changes data structures -that are considered immutable outside of this package. - -Internal API function that generates the EBML Element Definitions. - -This API function returns an array which first element is C<$elid> -and the second is a generated hash. The generated hash is stored -in the @global_elem_list and %global_elem_dict. - -The generated hash contains: - -=for :list -= name -The EBML Element's name, given through C<$name>. -= elid -The EBML Element's hex id, given through C<$elid>. Used for lookups by L</elem_by_hexid($id)>. -= valtype -The EBML Element's type, given through C<$valtype>, except when C<$valtype> is an arrayref. -= multiple -If C<$name> ends with a C<*>, this is set as true and strips the C<*> from L</name>. Used to -mark elements that may be repeated. -= subelements -An arrayref of elements that may be children of this element, given through C<$valtype> if it -is an arrayref. Sets L</valtype> to C<sub> if there are subelements. -= subids -An arrayref listing all the L</elid>s of subelements, C<uniq>ified. - -The following elements are for mpv compatibility: - -=for :list -= definename -Name used for generating C #defines. -= fieldname -Name used for generating C struct fields. -= structname -Name used for generating C struct names. -= ebmltype -A pre-#defined constant to describe the element's type. -= valname -Typename used when declaring a struct field referring to this element. - -=cut -sub elem { - my %e = (name => shift, elid => shift, valtype => shift); - - # strip * from name, set 'multiple' if there was one - $e{multiple} = scalar $e{name} =~ s/\*$//; - - # ELEM_DEFINE_TYPE is either MATROSKA or EBML - $e{definename} = "${ELEM_DEFINE_TYPE}_ID_".uc($e{name}); - $e{fieldname} = uncamelize $e{name}; - $e{structname} = "ebml_$e{fieldname}"; - - if (ref $e{valtype} eq 'HASH') { - $e{subelements} = $e{valtype}; - $e{subids} = uniq map { $_->{elid} } values %{$e{subelements}}; - $e{valtype} = 'sub'; - $e{ebmltype} = 'EBML_TYPE_SUBELEMENTS'; - $e{valname} = "struct $e{structname}"; - } else { - $e{ebmltype} = "EBML_TYPE_\U$e{valtype}"; - die "Unrecognized value type $e{valtype}" unless - defined ($e{valname} = TYPE_MAP->{$e{valtype}}); - } - my $e = \%e; - push @Parse::Matroska::Definitions::global_elem_list, $e; - $Parse::Matroska::Definitions::global_elem_dict{$e{elid}} = $e; - return ($e{elid}, $e); -} - -############################################# -### EBML and Matroska document definitons ### -############################################# - -=method define_ebml - -Internal function that defines the EBML generic grammar. - -Must not be called from outside the package. - -=cut -sub define_ebml { - local $ELEM_DEFINE_TYPE = 'EBML'; - return ( - elem('EBML', '1a45dfa3', { - elem('EBMLVersion', '4286', 'uint'), - elem('EBMLReadVersion', '42f7', 'uint'), - elem('EBMLMaxIDLength', '42f2', 'uint'), - elem('EBMLMaxSizeLength', '42f3', 'uint'), - elem('DocType', '4282', 'str'), - elem('DocTypeVersion', '4287', 'uint'), - elem('DocTypeReadVersion', '4285', 'uint'), - }), - - elem('CRC32', 'bf', 'binary'), - elem('Void', 'ec', 'binary'), - ); -} - - -=method define_matroska - -Internal function that defines the Matroska-specific EBML grammar. - -Must not be called from outside the package. - -=cut -sub define_matroska { - local $ELEM_DEFINE_TYPE = 'MATROSKA'; - return ( - elem('Segment', '18538067', { - elem('SeekHead*', '114d9b74', { - elem('Seek*', '4dbb', { - elem('SeekID', '53ab', 'ebml_id'), - elem('SeekPosition', '53ac', 'uint'), - }), - }), - - elem('Info*', '1549a966', { - elem('SegmentUID', '73a4', 'binary'), - elem('PrevUID', '3cb923', 'binary'), - elem('NextUID', '3eb923', 'binary'), - elem('TimecodeScale', '2ad7b1', 'uint'), - elem('DateUTC', '4461', 'sint'), - elem('Title', '7ba9', 'str'), - elem('MuxingApp', '4d80', 'str'), - elem('WritingApp', '5741', 'str'), - elem('Duration', '4489', 'float'), - }), - - elem('Cluster*', '1f43b675', { - elem('Timecode', 'e7', 'uint'), - elem('BlockGroup*', 'a0', { - elem('Block', 'a1', 'binary'), - elem('BlockDuration', '9b', 'uint'), - elem('ReferenceBlock*', 'fb', 'sint'), - elem('DiscardPadding', '75A2', 'sint'), - }), - elem('SimpleBlock*', 'a3', 'binary'), - }), - - elem('Tracks*', '1654ae6b', { - elem('TrackEntry*', 'ae', { - elem('TrackNumber', 'd7', 'uint'), - elem('TrackUID', '73c5', 'uint'), - elem('TrackType', '83', 'uint'), - elem('FlagEnabled', 'b9', 'uint'), - elem('FlagDefault', '88', 'uint'), - elem('FlagForced', '55aa', 'uint'), - elem('FlagLacing', '9c', 'uint'), - elem('MinCache', '6de7', 'uint'), - elem('MaxCache', '6df8', 'uint'), - elem('DefaultDuration', '23e383', 'uint'), - elem('TrackTimecodeScale', '23314f', 'float'), - elem('MaxBlockAdditionID', '55ee', 'uint'), - elem('Name', '536e', 'str'), - elem('Language', '22b59c', 'str'), - elem('CodecID', '86', 'str'), - elem('CodecPrivate', '63a2', 'binary'), - elem('CodecName', '258688', 'str'), - elem('CodecDecodeAll', 'aa', 'uint'), - elem('CodecDelay', '56AA', 'uint'), - elem('SeekPreRoll', '56BB', 'uint'), - elem('Video', 'e0', { - elem('FlagInterlaced', '9a', 'uint'), - elem('PixelWidth', 'b0', 'uint'), - elem('PixelHeight', 'ba', 'uint'), - elem('DisplayWidth', '54b0', 'uint'), - elem('DisplayHeight', '54ba', 'uint'), - elem('DisplayUnit', '54b2', 'uint'), - elem('FrameRate', '2383e3', 'float'), - elem('ColourSpace', '2eb524', 'binary'), - elem('StereoMode', '53b8', 'uint'), - elem('Colour', '55B0', { - elem('MatrixCoefficients', '55B1', 'uint'), - elem('BitsPerChannel', '55B2', 'uint'), - elem('ChromaSubsamplingHorz', '55B3', 'uint'), - elem('ChromaSubsamplingVert', '55B4', 'uint'), - elem('CbSubsamplingHorz', '55B5', 'uint'), - elem('CbSubsamplingVert', '55B6', 'uint'), - elem('ChromaSitingHorz', '55B7', 'uint'), - elem('ChromaSitingVert', '55B8', 'uint'), - elem('Range', '55B9', 'uint'), - elem('TransferCharacteristics', '55BA', 'uint'), - elem('Primaries', '55BB', 'uint'), - elem('MaxCLL', '55BC', 'uint'), - elem('MaxFALL', '55BD', 'uint'), - elem('MasteringMetadata', '55D0', { - elem('PrimaryRChromaticityX', '55D1', 'float'), - elem('PrimaryRChromaticityY', '55D2', 'float'), - elem('PrimaryGChromaticityX', '55D3', 'float'), - elem('PrimaryGChromaticityY', '55D4', 'float'), - elem('PrimaryBChromaticityX', '55D5', 'float'), - elem('PrimaryBChromaticityY', '55D6', 'float'), - elem('WhitePointChromaticityX', '55D7', 'float'), - elem('WhitePointChromaticityY', '55D8', 'float'), - elem('LuminanceMax', '55D9', 'float'), - elem('LuminanceMin', '55DA', 'float'), - }), - }), - }), - elem('Audio', 'e1', { - elem('SamplingFrequency', 'b5', 'float'), - elem('OutputSamplingFrequency', '78b5', 'float'), - elem('Channels', '9f', 'uint'), - elem('BitDepth', '6264', 'uint'), - }), - elem('ContentEncodings', '6d80', { - elem('ContentEncoding*', '6240', { - elem('ContentEncodingOrder', '5031', 'uint'), - elem('ContentEncodingScope', '5032', 'uint'), - elem('ContentEncodingType', '5033', 'uint'), - elem('ContentCompression', '5034', { - elem('ContentCompAlgo', '4254', 'uint'), - elem('ContentCompSettings', '4255', 'binary'), - }), - }), - }), - }), - }), - - elem('Cues', '1c53bb6b', { - elem('CuePoint*', 'bb', { - elem('CueTime', 'b3', 'uint'), - elem('CueTrackPositions*', 'b7', { - elem('CueTrack', 'f7', 'uint'), - elem('CueClusterPosition', 'f1', 'uint'), - elem('CueRelativePosition','f0', 'uint'), - elem('CueDuration', 'b2', 'uint'), - }), - }), - }), - - elem('Attachments', '1941a469', { - elem('AttachedFile*', '61a7', { - elem('FileDescription', '467e', 'str'), - elem('FileName', '466e', 'str'), - elem('FileMimeType', '4660', 'str'), - elem('FileData', '465c', 'binary'), - elem('FileUID', '46ae', 'uint'), - }), - }), - - elem('Chapters', '1043a770', { - elem('EditionEntry*', '45b9', { - elem('EditionUID', '45bc', 'uint'), - elem('EditionFlagHidden', '45bd', 'uint'), - elem('EditionFlagDefault', '45db', 'uint'), - elem('EditionFlagOrdered', '45dd', 'uint'), - elem('ChapterAtom*', 'b6', { - elem('ChapterUID', '73c4', 'uint'), - elem('ChapterTimeStart', '91', 'uint'), - elem('ChapterTimeEnd', '92', 'uint'), - elem('ChapterFlagHidden', '98', 'uint'), - elem('ChapterFlagEnabled', '4598', 'uint'), - elem('ChapterSegmentUID', '6e67', 'binary'), - elem('ChapterSegmentEditionUID', '6ebc', 'uint'), - elem('ChapterDisplay*', '80', { - elem('ChapString', '85', 'str'), - elem('ChapLanguage*', '437c', 'str'), - elem('ChapCountry*', '437e', 'str'), - }), - }), - }), - }), - elem('Tags*', '1254c367', { - elem('Tag*', '7373', { - elem('Targets', '63c0', { - elem('TargetTypeValue', '68ca', 'uint'), - elem('TargetTrackUID', '63c5', 'uint'), - elem('TargetEditionUID', '63c9', 'uint'), - elem('TargetChapterUID', '63c4', 'uint'), - elem('TargetAttachmentUID', '63c6', 'uint'), - }), - elem('SimpleTag*', '67c8', { - elem('TagName', '45a3', 'str'), - elem('TagLanguage', '447a', 'str'), - elem('TagString', '4487', 'str'), - }), - }), - }), - }), - ); -} - -1; diff --git a/TOOLS/lib/Parse/Matroska/Element.pm b/TOOLS/lib/Parse/Matroska/Element.pm deleted file mode 100644 index fa0830c11e..0000000000 --- a/TOOLS/lib/Parse/Matroska/Element.pm +++ /dev/null @@ -1,331 +0,0 @@ -use 5.008; -use strict; -use warnings; - -# ABSTRACT: a mid-level representation of an EBML element -package Parse::Matroska::Element; - -use Carp; -use List::Util qw{first}; - -=head1 SYNOPSIS - - use Parse::Matroska::Reader; - my $reader = Parse::Matroska::Reader->new($path); - my $elem = $reader->read_element; - - print "ID: $elem->{elid}\n"; - print "Name: $elem->{name}\n"; - print "Length: $elem->{content_len}\n"; - print "Type: $elem->{type}\n"; - print "Child count: ", scalar(@{$elem->all_children}), "\n"; - if ($elem->{type} eq 'sub') { - while (my $chld = $elem->next_child) { - print "Child Name: $chld->{name}\n"; - } - } else { - print "Value: ", $elem->get_value, "\n"; - } - -=head1 DESCRIPTION - -Represents a single Matroska element as decoded by -L<Parse::Matroska::Reader>. This is essentially a hash -augmented with functions for delay-loading of binary -values and children elements. - -=head1 NOTE - -The API of this module is not yet considered stable. - -=attr elid - -The EBML Element ID, suitable for passing to -L<Parse::Matroska::Definitions/elem_by_hexid>. - -=attr name - -The EBML Element's name. - -=attr type - -The EBML Element's type. Can be C<uint>, C<sint>, -C<float>, C<ebml_id>, C<str> or C<binary>. See L</value> -for details. - -Equivalent to -C<elem_by_hexid($elem-E<gt>{value})-E<gt>{valtype}>. - -=attr value - -The EBML Element's value. Should be obtained through -L</get_value>. - -Is an unicode string if the L</type> is C<str>, that is, -the string has already been decoded by L<Encode/decode>. - -Is C<undef> if the L</type> is C<binary> and the contents -were delay-loaded and not yet read. L</get_value> will -do the delayed load if needed. - -Is an arrayref if the L</type> is C<sub>, containing -the children nodes that were already loaded. - -Is a hashref if the L</type> is C<ebml_id>, containing -the referred element's information as defined in -L<Parse::Matroska::Definitions>. Calling -C<elem_by_hexid($elem-E<gt>{value}-E<gt>{elid})> will -return the same object as $elem->{value}. - -=attr full_len - -The entire length of this EBML Element, including -the header's. - -=attr size_len - -The length of the size marker. Used when calculating -L</full_len> from L</content_len> - -=attr content_len - -The length of the contents of this EBML Element, -which excludes the header. - -=attr reader - -A weakened reference to the associated -L<Parse::Matroska::Reader>. - -=method new(%hash) - -Creates a new Element initialized with the hash -given as argument. - -=cut -sub new { - my $class = shift; - my $self = {}; - bless $self, $class; - - $self->initialize(@_); - return $self; -} - -=method initialize(%hash) - -Called by L</new> on initialization. - -=cut -sub initialize { - my ($self, %args) = @_; - for (keys %args) { - $self->{$_} = $args{$_}; - } - $self->{depth} = 0 unless $self->{depth}; -} - -=method skip - -Called by the user to ignore the contents of this EBML node. -Needed when ignoring the children of a node. - -=cut -sub skip { - my ($self) = @_; - my $reader = $self->{reader}; - return unless $reader; # we don't have to skip if there's no reader - my $pos = $reader->getpos; - croak "Too late to skip, reads were already done" - if $pos ne $self->{data_pos}; - $reader->skip($self->{content_len}); -} - -=method get_value($keep_bin) - -Returns the value contained by this EBML element. - -If the element has children, returns an arrayref to -the children elements that were already encountered. - -If the element's type is C<binary> and the value was -delay-loaded, does the reading now. - -If $keep_bin is true, the delay-loaded data is kept -as the L</value>, otherwise, further calls to -C<get_value> will reread the data from the L</reader>. - -=cut -sub get_value { - my ($self, $keep_bin) = @_; - - return undef if $self->{type} eq 'skip'; - return $self->{value} if $self->{value}; - - my $reader = $self->{reader} or - croak "The associated Reader has been deleted"; - - # delay-loaded 'binary' - if ($self->{type} eq 'binary') { - croak "Cannot seek in the current Reader" unless $self->{data_pos}; - # seek to the data position... - $reader->setpos($self->{data_pos}); - # read the data, keeping it in value if requested - if ($keep_bin) { - $self->{value} = $reader->readlen($self->{content_len}); - return $self->{value}; - } else { - return $reader->readlen($self->{content_len}); - } - } -} - -=method next_child($read_bin) - -Builtin iterator; reads and returns the next child element. -Always returns undef if the type isn't C<sub>. - -Returns undef at the end of the iterator and resets itself to -point to the first element; so calling L</next_child($read_bin)> -after the iterator returned C<undef> will return the first child. - -The optional C<$read_bin> parameter has the children elements -not delay-load their value if their type is C<binary>. - -If all children elements have already been read, return -each element in-order as would be given by -L</all_children($recurse,$read_bin)>. - -=cut -sub next_child { - my ($self, $read_bin) = @_; - return unless $self->{type} eq 'sub'; - - if ($self->{_all_children_read}) { - my $idx = $self->{_last_child} ||= 0; - if ($idx == @{$self->{value}}) { - # reset the iterator, returning undef once - $self->{_last_child} = 0; - return; - } - my $ret = $self->{value}->[$idx]; - - ++$idx; - $self->{_last_child} = $idx; - return $ret; - } - - my $len = defined $self->{remaining_len} - ? $self->{remaining_len} - : $self->{content_len}; - - if ($len == 0) { - # we've read all children; switch into $self->{value} iteration mode - $self->{_all_children_read} = 1; - # return undef since the iterator will reset - return; - } - - $self->{pos_offset} ||= 0; - my $pos = $self->{data_pos}; - my $reader = $self->{reader} or croak "The associated reader has been deleted"; - $reader->setpos($pos); - $reader->{fh}->seek($self->{pos_offset}, 1) if $pos; - - my $chld = $reader->read_element($read_bin); - return undef unless defined $chld; - $self->{pos_offset} += $chld->{full_len}; - - $self->{remaining_len} = $len - $chld->{full_len}; - - if ($self->{remaining_len} < 0) { - croak "Child elements consumed $self->{remaining_len} more bytes than parent $self->{name} contained"; - } - - $chld->{depth} = $self->{depth} + 1; - $self->{value} ||= []; - - push @{$self->{value}}, $chld; - - return $chld; -} - -=method all_children($recurse,$read_bin) - -Calls L</populate_children($recurse,$read_bin)> on self -and returns an arrayref with the children nodes. - -Both C<$recurse> and C<$read_bin> are optional and default -to false. - -=cut -sub all_children { - my ($self, $recurse, $read_bin) = @_; - $self->populate_children($recurse, $read_bin); - return $self->{value}; -} - -=method children_by_name($name) - -Searches in the already read children elements for all -elements with the EBML name C<$name>. Returns an array -containing all found elements. On scalar context, -returns only the first element found. - -Croaks if the element's C<type> isn't C<sub>. - -=cut -sub children_by_name { - my ($self, $name) = @_; - return unless defined wantarray; # don't do work if work isn't wanted - croak "Element can't have children" unless $self->{type} eq 'sub'; - - my @found = grep { $_->{name} eq $name } @{$self->{value}}; - return @found if wantarray; # list - return shift @found if defined wantarray; # scalar -} - -=method populate_children($recurse,$read_bin) - -Populates the internal array of children elements, that is, -requests that the associated L<Matroska::Parser::Reader> reads -all children elements. Returns itself. - -Returns false if the element's C<type> isn't C<sub>. - -If C<$recurse> is provided and is true, the method will call -itself in the children elements with the same parameters it -received; this will build a full EBML tree. - -If C<$read_bin> is provided and is true, disables delay-loading -of the contents of C<binary>-type nodes, reading the contents -to memory. - -If both C<$recurse> and C<$read_bin> are true, entire EBML trees -can be loaded without requiring seeks, thus behaving correctly -on unseekable streams. If C<$read_bin> is false, the entire EBML -tree is still loaded, but calling L</get_value> on C<binary>-type -nodes will produce an error on unseekable streams. - -=cut -sub populate_children { - my ($self, $recurse, $read_bin) = @_; - - return unless $self->{type} eq 'sub'; - - if (@{$self->{value}} && $recurse) { - # only recurse - foreach (@{$self->{value}}) { - $_->populate_children($recurse, $read_bin); - } - return $self; - } - - while (my $chld = $self->next_child($read_bin)) { - $chld->populate_children($recurse, $read_bin) if $recurse; - } - - return $self; -} - -1; diff --git a/TOOLS/lib/Parse/Matroska/Reader.pm b/TOOLS/lib/Parse/Matroska/Reader.pm deleted file mode 100644 index 614b7b12c0..0000000000 --- a/TOOLS/lib/Parse/Matroska/Reader.pm +++ /dev/null @@ -1,426 +0,0 @@ -use 5.008; -use strict; -use warnings; - -# ABSTRACT: a low-level reader for EBML files -package Parse::Matroska::Reader; - -use Parse::Matroska::Definitions qw{elem_by_hexid}; -use Parse::Matroska::Element; - -use Carp; -use Scalar::Util qw{openhandle weaken}; -use IO::Handle; -use IO::File; -use List::Util qw{first}; -use Encode; - -use constant BIGINT_TRY => 'Pari,GMP,FastCalc'; -use Math::BigInt try => BIGINT_TRY; -use Math::BigRat try => BIGINT_TRY; - -=head1 SYNOPSIS - - use Parse::Matroska::Reader; - my $reader = Parse::Matroska::Reader->new($path); - $reader->close; - $reader->open(\$string_with_matroska_data); - - my $elem = $reader->read_element; - print "Element ID: $elem->{elid}\n"; - print "Element name: $elem->{name}\n"; - if ($elem->{type} ne 'sub') { - print "Element value: $elem->get_value\n"; - } else { - while (my $child = $elem->next_child) { - print "Child element: $child->{name}\n"; - } - } - $reader->close; - -=head1 DESCRIPTION - -Reads EBML data, which is used in Matroska files. -This is a low-level reader which is meant to be used as a backend -for higher level readers. TODO: write the high level readers :) - -=head1 NOTE - -The API of this module is not yet considered stable. - -=method new - -Creates a new reader. -Calls L</open($arg)> with its arguments if provided. - -=cut -sub new { - my $class = shift; - my $self = {}; - bless $self, $class; - - $self->open(@_) if @_; - return $self; -} - -=method open($arg) - -Creates the internal filehandle. The argument can be: - -=for :list -* An open filehandle or L<IO::Handle> object. -The filehandle is not C<dup()>ed, so calling L</close> in this -object will close the given filehandle as well. -* A scalar containing a path to a file. -* On perl v5.14 or newer, a scalarref pointing to EBML data. -For similar functionality in older perls, give an L<IO::String> object -or the handle to an already C<open>ed scalarref. - -=cut -sub open { - my ($self, $arg) = @_; - $self->{fh} = openhandle($arg) || IO::File->new($arg, "<:raw") - or croak "Can't open $arg: $!"; -} - -=method close - -Closes the internal filehandle. - -=cut -sub close { - my ($self) = @_; - $self->{fh}->close; - delete $self->{fh}; -} - -# equivalent to $self->readlen(1), possibly faster -sub _getc { - my ($self) = @_; - my $c = $self->{fh}->getc; - croak "Can't do read of length 1: $!" if !defined $c && $!; - return $c; -} - -=method readlen($length) - -Reads C<$length> bytes from the internal filehandle. - -=cut -sub readlen { - my ($self, $len) = @_; - my $data; - my $readlen = $self->{fh}->read($data, $len); - croak "Can't do read of length $len: $!" - unless defined $readlen; - return $data; -} - -# converts a byte string into an integer -# we do so by converting the integer into a hex string (big-endian) -# and then reading the hex-string into an integer -sub _bin2int($) { - my ($bin) = @_; - # if the length is larger than 3 - # the resulting integer might be larger than INT_MAX - if (length($bin) > 3) { - return Math::BigInt->from_hex(unpack("H*", $bin)); - } - return hex(unpack("H*", $bin)); -} - -# creates a floating-point number with the given mantissa and exponent -sub _ldexp { - my ($mantissa, $exponent) = @_; - my $r = new Math::BigRat($mantissa); - return $r * Math::BigRat->new(2)**$exponent; -} - -# NOTE: the read_* functions are hard to read because they're ports -# of even harder to read python functions. -# TODO: make them readable - -=method read_id - -Reads an EBML ID atom in hexadecimal string format, suitable -for passing to L<Parse::Matroska::Definitions/elem_by_hexid($id)>. - -=cut -sub read_id { - my ($self) = @_; - my $t = $self->_getc; - return undef unless defined $t; - my $i = 0; - my $mask = 1<<7; - - if (ord($t) == 0) { - croak "Matroska Syntax error: first byte of ID was \\0" - } - until (ord($t) & $mask) { - ++$i; - $mask >>= 1; - } - # return hex string of the bytes we just read - return unpack "H*", ($t . $self->readlen($i)); -} - -=method read_size - -Reads an EBML Data Size atom, which immediately follows -an EBML ID atom. - -This returns an array consisting of: - -=for :list -0. The length of the Data Size atom. -1. The value encoded in the Data Size atom, which is the length of all the data following it. - -=cut -sub read_size { - my ($self) = @_; - my $t = $self->_getc; - my $i = 0; - my $mask = 1<<7; - - if (ord($t) == 0) { - croak "Matroska Syntax error: first byte of data size was \\0" - } - until (ord($t) & $mask) { - ++$i; - $mask >>= 1; - } - $t = $t & chr($mask-1); # strip length bits (keep only significant bits) - return ($i+1, _bin2int $t . $self->readlen($i)); -} - -=method read_str($length) - -Reads a string of length C<$length> bytes from the internal filehandle. -The string is already L<Encode/decode>d from C<UTF-8>, which is the -standard Matroska string encoding. - -=cut -{ - my $utf8 = find_encoding("UTF-8"); - sub read_str { - my ($self, $length) = @_; - return $utf8->decode($self->readlen($length)); - } -} - -=method read_uint($length) - -Reads an unsigned integer of length C<$length> bytes -from the internal filehandle. - -Returns a L<Math::BigInt> object if C<$length> is greater -than 4. - -=cut -sub read_uint { - my ($self, $length) = @_; - return _bin2int $self->readlen($length); -} - -=method read_sint($length) - -Reads a signed integer of length C<$length> bytes -from the internal filehandle. - -Returns a L<Math::BigInt> object if C<$length> is greater -than 4. - -=cut -sub read_sint { - my ($self, $length) = @_; - my $i = $self->read_uint($length); - - # Apply 2's complement to the unsigned int - my $mask = int(2 ** ($length * 8 - 1)); - # if the most significant bit is set... - if ($i & $mask) { - # subtract the MSB twice - $i -= 2 * $mask; - } - return $i; -} - -=method read_float($length) - -Reads an IEEE floating point number of length C<$length> -bytes from the internal filehandle. - -Only lengths C<4> and C<8> are supported (C C<float> and C<double>). - -=cut -{ - my $b1 = new Math::BigInt 1; - - sub read_float { - my ($self, $length) = @_; - my $i = new Math::BigInt $self->read_uint($length)->bstr; - my $f; - - # These evil expressions reinterpret an unsigned int as IEEE binary floats - if ($length == 4) { - $f = _ldexp(($i & ((1<<23) - 1)) + (1<<23), ($i>>23 & ((1<<8) - 1)) - 150); - $f = -$f if $i & ($b1<<31); - } elsif ($length == 8) { - $f = _ldexp(($i & (($b1<<52) - 1)) + ($b1<<52), ($i>>52 & ((1<<12) - 1)) - 1075); - $f = -$f if $i & ($b1<<63); - } else { - croak "Matroska Syntax error: unsupported IEEE float byte size $length"; - } - - return $f; - } -} - -=method read_ebml_id($length) - -Reads an EBML ID when it's encoded as the data inside another -EBML element, that is, when the enclosing element's C<type> is -C<ebml_id>. - -This returns a hashref with the EBML element description as -defined in L<Parse::Matroska::Definitions>. - -=cut -sub read_ebml_id { - my ($self, $length) = @_; - return elem_by_hexid(unpack("H*", $self->readlen($length))); -} - -=method skip($length) - -Skips C<$length> bytes in the internal filehandle. - -=cut -sub skip { - my ($self, $len) = @_; - return if $self->{fh}->can('seek') && $self->{fh}->seek($len, 1); - $self->readlen($len); - return; -} - -=method getpos - -Wrapper for L<IO::Seekable/$io-E<gt>getpos> in the internal filehandle. - -Returns undef if the internal filehandle can't C<getpos>. - -=cut -sub getpos { - my ($self) = @_; - return undef unless $self->{fh}->can('getpos'); - return $self->{fh}->getpos; -} - -=method setpos($pos) - -Wrapper for L<IO::Seekable/$io-E<gt>setpos> in the internal filehandle. - -Returns C<undef> if the internal filehandle can't C<setpos>. - -Croaks if C<setpos> does not seek to the requested position, -that is, if calling C<getpos> does not yield the same object -as the C<$pos> argument. - -=cut -sub setpos { - my ($self, $pos) = @_; - return undef unless $pos && $self->{fh}->can('setpos'); - - my $ret = $self->{fh}->setpos($pos); - croak "Cannot seek to correct position" - unless $self->getpos eq $pos; - return $ret; -} - -=method read_element($read_bin) - -Reads a full EBML element from the internal filehandle. - -Returns a L<Parse::Matroska::Element> object initialized with -the read data. If C<read_bin> is not present or is false, will -delay-load the contents of C<binary> type elements, that is, -they will only be loaded when calling C<get_value> on the -returned L<Parse::Matroska::Element> object. - -Does not read the children of the element if its type is -C<sub>. Look into the L<Parse::Matroska::Element> interface -for details in how to read children elements. - -Pass a true C<$read_bin> if the stream being read is not -seekable (C<getpos> is undef) and the contents of C<binary> -elements is desired, otherwise seeking errors or internal -filehandle corruption might occur. - -=cut -sub read_element { - my ($self, $read_bin) = @_; - return undef if $self->{fh}->eof; - - my $elem_pos = $self->getpos; - - my $elid = $self->read_id; - my $elem_def = elem_by_hexid($elid); - my ($size_len, $content_len) = $self->read_size; - my $full_len = length($elid)/2 + $size_len + $content_len; - - my $elem = Parse::Matroska::Element->new( - elid => $elid, - name => $elem_def && $elem_def->{name}, - type => $elem_def && $elem_def->{valtype}, - size_len => $size_len, - content_len => $content_len, - full_len => $full_len, - reader => $self, - elem_pos => $elem_pos, - data_pos => $self->getpos, - ); - weaken($elem->{reader}); - - if (defined $elem_def) { - if ($elem->{type} eq 'sub') { - $elem->{value} = []; - } elsif ($elem->{type} eq 'str') { - $elem->{value} = $self->read_str($content_len); - } elsif ($elem->{type} eq 'ebml_id') { - $elem->{value} = $self->read_ebml_id($content_len); - } elsif ($elem->{type} eq 'uint') { - $elem->{value} = $self->read_uint($content_len); - } elsif ($elem->{type} eq 'sint') { - $elem->{value} = $self->read_sint($content_len); - } elsif ($elem->{type} eq 'float') { - $elem->{value} = $self->read_float($content_len); - } elsif ($elem->{type} eq 'skip') { - $self->skip($content_len); - } elsif ($elem->{type} eq 'binary') { - if ($read_bin) { - $elem->{value} = $self->readlen($content_len); - } else { - $self->skip($content_len); - } - } else { - die "Matroska Definition error: type $elem->{valtype} unknown" - } - } else { - $self->skip($content_len); - } - return $elem; -} - -1; - -=head1 CAVEATS - -Children elements have to be processed as soon as an element -with children is found, or their children ignored with -L<Parse::Matroska::Element/skip>. Not doing so doesn't cause -errors but results in an invalid structure, with constant C<0> -depth. - -To work correctly in unseekable streams, either the contents -of C<binary>-type elements has to be ignored or the C<read_bin> -flag to C<read_element> has to be true. diff --git a/TOOLS/lib/Parse/Matroska/Utils.pm b/TOOLS/lib/Parse/Matroska/Utils.pm deleted file mode 100644 index 127d626cb1..0000000000 --- a/TOOLS/lib/Parse/Matroska/Utils.pm +++ /dev/null @@ -1,37 +0,0 @@ -use strict; -use warnings; - -# ABSTRACT: internally-used helper functions -package Parse::Matroska::Utils; - -use Exporter; -our @ISA = qw{Exporter}; -our @EXPORT_OK = qw{uniq uncamelize}; - -=method uniq(@array) - -The same as L<List::MoreUtils/"uniq LIST">. -Included to avoid depending on it since it's -not a core module. - -=cut -sub uniq(@) { - my %seen; - return grep { !$seen{$_}++ } @_; -} - -=method uncamelize($string) - -Converts a "StringLikeTHIS" into a -"string_like_this". - -=cut -sub uncamelize($) { - local $_ = shift; - # lc followed by UC: lc_UC - s/(?<=[a-z])([A-Z])/_\L$1/g; - # UC followed by two lc: _UClclc - s/([A-Z])(?=[a-z]{2})/_\L$1/g; - # strip leading _ that the second regexp might add; lowercase all - s/^_//; lc -} diff --git a/TOOLS/matroska.pl b/TOOLS/matroska.pl deleted file mode 100755 index 41e4f6aa81..0000000000 --- a/TOOLS/matroska.pl +++ /dev/null @@ -1,169 +0,0 @@ -#! /usr/bin/env perl - -# Generate C definitions for parsing Matroska files. - -use strict; -use warnings; - -use FindBin; -use lib "$FindBin::Bin/lib"; -use Parse::Matroska::Definitions; -use Parse::Matroska::Reader; - -use Getopt::Long; -use List::Util qw{max}; - -my @global_elem_list = @Parse::Matroska::Definitions::global_elem_list; - -Getopt::Long::Configure(qw{auto_version auto_help}); -my %opt; -GetOptions(\%opt, - "generate-header", - "generate-definitions", - "full", - ); - -if ($opt{"generate-header"}) { - generate_c_header(); -} elsif ($opt{"generate-definitions"}) { - generate_c_definitions(); -} else { - for (@ARGV) { - my $reader = Parse::Matroska::Reader->new($_ eq '-' ? \*STDIN : $_) or die $!; - while (my $elem = $reader->read_element($_ eq '-')) { - process_elem($elem, $_ eq '-'); - } - } -} - -# Generate declarations for libmpdemux/ebml_types.h -sub generate_c_header { - print "/* Generated by TOOLS/matroska.pl, do not edit manually */\n\n"; - - # Write a #define for the ElementID of each known element - for my $el (@global_elem_list) { - printf "#define %-40s 0x%s\n", $el->{definename}, $el->{elid}; - } - print "\n"; - - # Define a struct for each ElementID that has child elements - for my $el (@global_elem_list) { - next unless $el->{subelements}; - print "\nstruct $el->{structname} {\n"; - - # Figure out the length of the longest variable name - # Used for pretty-printing in the next step - my $l = max(map { length $_->{valname} } values %{$el->{subelements}}); - - # Output each variable, with pointers for array (multiple) elements - for my $subel (sort { $a->{definename} cmp $b->{definename} } values %{$el->{subelements}}) { - printf " %-${l}s %s%s;\n", - $subel->{valname}, $subel->{multiple}?'*':' ', $subel->{fieldname}; - } - print "\n"; - - # Output a counter variable for each element - # (presence/absence for scalars, item count for arrays) - for my $subel (sort values %{$el->{subelements}}) { - print " int n_$subel->{fieldname};\n" - } - print "};\n"; - } - print "\n"; - - # Output extern references for ebml_elem_desc structs for each of the elements - # These are defined by generate_c_definitions - for my $el (@global_elem_list) { - next unless $el->{subelements}; - print "extern const struct ebml_elem_desc $el->{structname}_desc;\n"; - } - print "\n"; - - # Output the max number of sub-elements a known element might have - printf "#define MAX_EBML_SUBELEMENTS %d\n", - max(map { scalar keys %{$_->{subelements}} } - grep { $_->{subelements} } @global_elem_list); -} - -# Generate definitions for libmpdemux/ebml_defs.c -sub generate_c_definitions { - print "/* Generated by TOOLS/matroska.pl, do not edit manually */\n\n"; - # ebml_defs.c uses macros declared in ebml.c - for my $el (@global_elem_list) { - print "\n"; - if ($el->{subelements}) { - # set N for the next macros - print "#define N $el->{fieldname}\n"; - - # define a struct ebml_$N_desc and gets ready to define fields - # this secretly opens two scopes; hence the }}; at the end - print "E_S(\"$el->{name}\", ".scalar(keys %{$el->{subelements}}).")\n"; - - # define a field for each subelement - # also does lots of macro magic, but doesn't open a scope - for my $subel (sort { $a->{definename} cmp $b->{definename} } values %{$el->{subelements}}) { - print "F($subel->{definename}, $subel->{fieldname}, ". - ($subel->{multiple}?'1':'0').")\n"; - } - # close the struct - print "}};\n"; - - # unset N since we've used it - print "#undef N\n"; - } else { - print "E(\"$el->{name}\", $el->{fieldname}, $el->{ebmltype})\n"; - } - } -} - -sub repr { - my @ret; - foreach (@_) { - if (/'/) { - s/"/\\"/g; - push @ret, "\"$_\""; - } else { - push @ret, "'$_'"; - } - } - return @ret if wantarray; - return pop @ret if defined wantarray; - return; -} - -sub process_elem { - my ($elem, $read_bin) = @_; - unless ($opt{full}) { - if ($elem->{name} eq 'Cluster' || $elem->{name} eq 'Cues') { - $elem->skip; - return; - } - } - die unless $elem; - - if ($elem->{type} ne 'skip') { - print "$elem->{depth} $elem->{elid} $elem->{name} size: $elem->{content_len} value: "; - } - - if ($elem->{type} eq 'sub') { - print "subelements:\n"; - while (my $chld = $elem->next_child($read_bin)) { - process_elem($chld); - } - } elsif ($elem->{type} eq 'binary') { - my $t = "<skipped $elem->{content_len} bytes>"; - if ($elem->{content_len} < 20) { - $t = unpack "H*", $elem->get_value; - } - print "binary $t\n"; - delete $elem->{value}; - } elsif ($elem->{type} eq 'ebml_id') { - print "binary $elem->{value}->{elid} (".($elem->{value}->{name}||"UNKNOWN").")\n"; - } elsif ($elem->{type} eq 'skip') { - # skip - } elsif ($elem->{type} eq 'str') { - print "string ". repr($elem->get_value) . "\n"; - } else { - print "$elem->{type} ". $elem->get_value ."\n"; - } -} diff --git a/TOOLS/matroska.py b/TOOLS/matroska.py new file mode 100755 index 0000000000..91e65a26b3 --- /dev/null +++ b/TOOLS/matroska.py @@ -0,0 +1,463 @@ +#!/usr/bin/env python +""" +Generate C definitions for parsing Matroska files. +Can also be used to directly parse Matroska files and display their contents. +""" + +# +# This file is part of MPlayer. +# +# MPlayer is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# MPlayer is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with MPlayer; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# + +# for compatibility with Python 2.x +from __future__ import print_function + +elements_ebml = ( + 'EBML, 1a45dfa3, sub', ( + 'EBMLVersion, 4286, uint', + 'EBMLReadVersion, 42f7, uint', + 'EBMLMaxIDLength, 42f2, uint', + 'EBMLMaxSizeLength, 42f3, uint', + 'DocType, 4282, str', + 'DocTypeVersion, 4287, uint', + 'DocTypeReadVersion, 4285, uint', + ), + + 'CRC32, bf, binary', + 'Void, ec, binary', +) + +elements_matroska = ( + 'Segment, 18538067, sub', ( + + 'SeekHead*, 114d9b74, sub', ( + 'Seek*, 4dbb, sub', ( + 'SeekID, 53ab, ebml_id', + 'SeekPosition, 53ac, uint', + ), + ), + + 'Info*, 1549a966, sub', ( + 'SegmentUID, 73a4, binary', + 'PrevUID, 3cb923, binary', + 'NextUID, 3eb923, binary', + 'TimecodeScale, 2ad7b1, uint', + 'DateUTC, 4461, sint', + 'Title, 7ba9, str', + 'MuxingApp, 4d80, str', + 'WritingApp, 5741, str', + 'Duration, 4489, float', + ), + + 'Cluster*, 1f43b675, sub', ( + 'Timecode, e7, uint', + 'BlockGroup*, a0, sub', ( + 'Block, a1, binary', + 'BlockDuration, 9b, uint', + 'ReferenceBlock*, fb, sint', + 'DiscardPadding, 75A2, sint', + ), + 'SimpleBlock*, a3, binary', + ), + + 'Tracks*, 1654ae6b, sub', ( + 'TrackEntry*, ae, sub', ( + 'TrackNumber, d7, uint', + 'TrackUID, 73c5, uint', + 'TrackType, 83, uint', + 'FlagEnabled, b9, uint', + 'FlagDefault, 88, uint', + 'FlagForced, 55aa, uint', + 'FlagLacing, 9c, uint', + 'MinCache, 6de7, uint', + 'MaxCache, 6df8, uint', + 'DefaultDuration, 23e383, uint', + 'TrackTimecodeScale, 23314f, float', + 'MaxBlockAdditionID, 55ee, uint', + 'Name, 536e, str', + 'Language, 22b59c, str', + 'CodecID, 86, str', + 'CodecPrivate, 63a2, binary', + 'CodecName, 258688, str', + 'CodecDecodeAll, aa, uint', + 'CodecDelay, 56aa, uint', + 'SeekPreRoll, 56bb, uint', + 'Video, e0, sub', ( + 'FlagInterlaced, 9a, uint', + 'PixelWidth, b0, uint', + 'PixelHeight, ba, uint', + 'DisplayWidth, 54b0, uint', + 'DisplayHeight, 54ba, uint', + 'DisplayUnit, 54b2, uint', + 'FrameRate, 2383e3, float', + 'ColourSpace, 2eb524, binary', + 'StereoMode, 53b8, uint', + 'Colour, 55b0, sub', ( + 'MatrixCoefficients, 55B1, uint', + 'BitsPerChannel, 55B2, uint', + 'ChromaSubsamplingHorz, 55B3, uint', + 'ChromaSubsamplingVert, 55B4, uint', + 'CbSubsamplingHorz, 55B5, uint', + 'CbSubsamplingVert, 55B6, uint', + 'ChromaSitingHorz, 55B7, uint', + 'ChromaSitingVert, 55B8, uint', + 'Range, 55B9, uint', + 'TransferCharacteristics, 55BA, uint', + 'Primaries, 55BB, uint', + 'MaxCLL, 55BC, uint', + 'MaxFALL, 55BD, uint', + 'MasteringMetadata, 55D0, sub', ( + 'PrimaryRChromaticityX, 55D1, float', + 'PrimaryRChromaticityY, 55D2, float', + 'PrimaryGChromaticityX, 55D3, float', + 'PrimaryGChromaticityY, 55D4, float', + 'PrimaryBChromaticityX, 55D5, float', + 'PrimaryBChromaticityY, 55D6, float', + 'WhitePointChromaticityX, 55D7, float', + 'WhitePointChromaticityY, 55D8, float', + 'LuminanceMax, 55D9, float', + 'LuminanceMin, 55DA, float', + ), + ), + ), + 'Audio, e1, sub', ( + 'SamplingFrequency, b5, float', + 'OutputSamplingFrequency, 78b5, float', + 'Channels, 9f, uint', + 'BitDepth, 6264, uint', + ), + 'ContentEncodings, 6d80, sub', ( + 'ContentEncoding*, 6240, sub', ( + 'ContentEncodingOrder, 5031, uint', + 'ContentEncodingScope, 5032, uint', + 'ContentEncodingType, 5033, uint', + 'ContentCompression, 5034, sub', ( + 'ContentCompAlgo, 4254, uint', + 'ContentCompSettings, 4255, binary', + ), + ), + ), + ), + ), + + 'Cues, 1c53bb6b, sub', ( + 'CuePoint*, bb, sub', ( + 'CueTime, b3, uint', + 'CueTrackPositions*, b7, sub', ( + 'CueTrack, f7, uint', + 'CueClusterPosition, f1, uint', + 'CueRelativePosition, f0, uint', + 'CueDuration, b2, uint', + ), + ), + ), + + 'Attachments, 1941a469, sub', ( + 'AttachedFile*, 61a7, sub', ( + 'FileDescription, 467e, str', + 'FileName, 466e, str', + 'FileMimeType, 4660, str', + 'FileData, 465c, binary', + 'FileUID, 46ae, uint', + ), + ), + + 'Chapters, 1043a770, sub', ( + 'EditionEntry*, 45b9, sub', ( + 'EditionUID, 45bc, uint', + 'EditionFlagHidden, 45bd, uint', + 'EditionFlagDefault, 45db, uint', + 'EditionFlagOrdered, 45dd, uint', + 'ChapterAtom*, b6, sub', ( + 'ChapterUID, 73c4, uint', + 'ChapterTimeStart, 91, uint', + 'ChapterTimeEnd, 92, uint', + 'ChapterFlagHidden, 98, uint', + 'ChapterFlagEnabled, 4598, uint', + 'ChapterSegmentUID, 6e67, binary', + 'ChapterSegmentEditionUID, 6ebc, uint', + 'ChapterDisplay*, 80, sub', ( + 'ChapString, 85, str', + 'ChapLanguage*, 437c, str', + 'ChapCountry*, 437e, str', + ), + ), + ), + ), + 'Tags*, 1254c367, sub', ( + 'Tag*, 7373, sub', ( + 'Targets, 63c0, sub', ( + 'TargetTypeValue, 68ca, uint', + 'TargetTrackUID, 63c5, uint', + 'TargetEditionUID, 63c9, uint', + 'TargetChapterUID, 63c4, uint', + 'TargetAttachmentUID, 63c6, uint', + ), + 'SimpleTag*, 67c8, sub', ( + 'TagName, 45a3, str', + 'TagLanguage, 447a, str', + 'TagString, 4487, str' + ), + ), + ), + ), +) + + +import sys +from math import ldexp +from binascii import hexlify + +def byte2num(s): + return int(hexlify(s), 16) + +class EOF(Exception): pass + +def camelcase_to_words(name): + parts = [] + start = 0 + for i in range(1, len(name)): + if name[i].isupper() and (name[i-1].islower() or + name[i+1:i+2].islower()): + parts.append(name[start:i]) + start = i + parts.append(name[start:]) + return '_'.join(parts).lower() + +class MatroskaElement(object): + + def __init__(self, name, elid, valtype, namespace): + self.name = name + self.definename = '{0}_ID_{1}'.format(namespace, name.upper()) + self.fieldname = camelcase_to_words(name) + self.structname = 'ebml_' + self.fieldname + self.elid = elid + self.valtype = valtype + if valtype == 'sub': + self.ebmltype = 'EBML_TYPE_SUBELEMENTS' + self.valname = 'struct ' + self.structname + else: + self.ebmltype = 'EBML_TYPE_' + valtype.upper() + try: + self.valname = {'uint': 'uint64_t', 'str': 'char *', + 'binary': 'bstr', 'ebml_id': 'uint32_t', + 'float': 'double', 'sint': 'int64_t', + }[valtype] + except KeyError: + raise SyntaxError('Unrecognized value type ' + valtype) + self.subelements = () + + def add_subelements(self, subelements): + self.subelements = subelements + self.subids = set(x[0].elid for x in subelements) + +elementd = {} +elementlist = [] +def parse_elems(l, namespace): + subelements = [] + for el in l: + if isinstance(el, str): + name, hexid, eltype = [x.strip() for x in el.split(',')] + multiple = name.endswith('*') + name = name.strip('*') + new = MatroskaElement(name, hexid, eltype, namespace) + elementd[hexid] = new + elementlist.append(new) + subelements.append((new, multiple)) + else: + new.add_subelements(parse_elems(el, namespace)) + return subelements + +parse_elems(elements_ebml, 'EBML') +parse_elems(elements_matroska, 'MATROSKA') + +def generate_C_header(): + print('// Generated by TOOLS/matroska.py, do not edit manually') + print() + + for el in elementlist: + print('#define {0.definename:40} 0x{0.elid}'.format(el)) + + print() + + for el in reversed(elementlist): + if not el.subelements: + continue + print() + print('struct {0.structname} {{'.format(el)) + l = max(len(subel.valname) for subel, multiple in el.subelements)+1 + for subel, multiple in el.subelements: + print(' {e.valname:{l}} {star}{e.fieldname};'.format( + e=subel, l=l, star=' *'[multiple])) + print() + for subel, multiple in el.subelements: + print(' int n_{0.fieldname};'.format(subel)) + print('};') + + for el in elementlist: + if not el.subelements: + continue + print('extern const struct ebml_elem_desc {0.structname}_desc;'.format( + el)) + + print() + print('#define MAX_EBML_SUBELEMENTS', max(len(el.subelements) + for el in elementlist)) + + + +def generate_C_definitions(): + print('// Generated by TOOLS/matroska.py, do not edit manually') + print() + for el in reversed(elementlist): + print() + if el.subelements: + print('#define N', el.fieldname) + print('E_S("{0}", {1})'.format(el.name, len(el.subelements))) + for subel, multiple in el.subelements: + print('F({0.definename}, {0.fieldname}, {1})'.format( + subel, int(multiple))) + print('}};') + print('#undef N') + else: + print('E("{0.name}", {0.fieldname}, {0.ebmltype})'.format(el)) + +def read(s, length): + t = s.read(length) + if len(t) != length: + raise EOF + return t + +def read_id(s): + t = read(s, 1) + i = 0 + mask = 128 + if ord(t) == 0: + raise SyntaxError + while not ord(t) & mask: + i += 1 + mask >>= 1 + t += read(s, i) + return t + +def read_vint(s): + t = read(s, 1) + i = 0 + mask = 128 + if ord(t) == 0: + raise SyntaxError + while not ord(t) & mask: + i += 1 + mask >>= 1 + t = bytes((ord(t) & (mask - 1),)) + t += read(s, i) + return i+1, byte2num(t) + +def read_str(s, length): + return read(s, length) + +def read_uint(s, length): + t = read(s, length) + return byte2num(t) + +def read_sint(s, length): + i = read_uint(s, length) + mask = 1 << (length * 8 - 1) + if i & mask: + i -= 2 * mask + return i + +def read_float(s, length): + t = read(s, length) + i = byte2num(t) + if length == 4: + f = ldexp((i & 0x7fffff) + (1 << 23), (i >> 23 & 0xff) - 150) + if i & (1 << 31): + f = -f + elif length == 8: + f = ldexp((i & ((1 << 52) - 1)) + (1 << 52), (i >> 52 & 0x7ff) - 1075) + if i & (1 << 63): + f = -f + else: + raise SyntaxError + return f + +def parse_one(s, depth, parent, maxlen): + elid = hexlify(read_id(s)).decode('ascii') + elem = elementd.get(elid) + if parent is not None and elid not in parent.subids and elid not in ('ec', 'bf'): + print('Unexpected:', elid) + if 1: + raise NotImplementedError + size, length = read_vint(s) + this_length = len(elid) / 2 + size + length + if elem is not None: + if elem.valtype != 'skip': + print(depth, elid, elem.name, 'size:', length, 'value:', end=' ') + if elem.valtype == 'sub': + print('subelements:') + while length > 0: + length -= parse_one(s, depth + 1, elem, length) + if length < 0: + raise SyntaxError + elif elem.valtype == 'str': + print('string', repr(read_str(s, length).decode('utf8', 'replace'))) + elif elem.valtype in ('binary', 'ebml_id'): + t = read_str(s, length) + dec = '' + if elem.valtype == 'ebml_id': + idelem = elementd.get(hexlify(t).decode('ascii')) + if idelem is None: + dec = '(UNKNOWN)' + else: + dec = '({0.name})'.format(idelem) + if len(t) < 20: + t = hexlify(t).decode('ascii') + else: + t = '<skipped {0} bytes>'.format(len(t)) + print('binary', t, dec) + elif elem.valtype == 'uint': + print('uint', read_uint(s, length)) + elif elem.valtype == 'sint': + print('sint', read_sint(s, length)) + elif elem.valtype == 'float': + print('float', read_float(s, length)) + elif elem.valtype == 'skip': + read(s, length) + else: + raise NotImplementedError + else: + print(depth, 'Unknown element:', elid, 'size:', length) + read(s, length) + return this_length + +def parse_toplevel(s): + parse_one(s, 0, None, 1 << 63) + +if sys.argv[1] == '--generate-header': + generate_C_header() +elif sys.argv[1] == '--generate-definitions': + generate_C_definitions() +else: + s = open(sys.argv[1], "rb") + while 1: + start = s.tell() + try: + parse_toplevel(s) + except EOF: + if s.tell() != start: + raise Exception("Unexpected end of file") + break |