aboutsummaryrefslogtreecommitdiffhomepage
path: root/DOCS
diff options
context:
space:
mode:
authorGravatar michael <michael@b3059339-0415-0410-9bf9-f77b7e298cf2>2004-10-05 12:04:56 +0000
committerGravatar michael <michael@b3059339-0415-0410-9bf9-f77b7e298cf2>2004-10-05 12:04:56 +0000
commita155a39028328b691c52bf0c897f3541f612fc63 (patch)
treea5744942775c10c6e5667563279410e56cb2de6e /DOCS
parent2dcda86f34e21e38629ed9b69eb3f90a4e5b1669 (diff)
remove non native codec specific data
move lang to the info packet git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@13560 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'DOCS')
-rw-r--r--DOCS/tech/mncf.txt661
1 files changed, 661 insertions, 0 deletions
diff --git a/DOCS/tech/mncf.txt b/DOCS/tech/mncf.txt
new file mode 100644
index 0000000000..5afb1045be
--- /dev/null
+++ b/DOCS/tech/mncf.txt
@@ -0,0 +1,661 @@
+ NUT Open Container Format DRAFT 20041005 (Michael's experimental fork)
+ ----------------------------------------
+
+
+
+ Intro:
+
+Features / goals:
+ (supported by the format, not necessary by a specific implementation)
+
+Simple
+ use the same encoding for nearly all fields
+ simple decoding, so slow cpus (and embedded systems) can handle it
+Extendible
+ no limit for the possible values for all fields (using universal vlc)
+ allow adding of new headers in the future
+ allow adding more fields at the end of headers
+Compact
+ ~0.2% overhead, for normal bitrates
+ index is <10kb per hour (1 keyframe every 3sec)
+ a usual header for a file is about 100bytes (audio + video headers together)
+ a packet header is about ~1-8 bytes
+Error resistant
+ seeking / playback without an index
+ headers & index can be repeated
+ damaged files can be played back with minimal data lost and fast
+ resyncing times
+
+
+
+ Definitions:
+
+MUST the specific part must be done to conform to this standard
+SHOULD its recommanded to be done that way but its not strictly required
+
+
+
+ Syntax:
+
+ Type definitions:
+
+f(x) n fixed bits in big-endian order
+u(x) unsigned number encoded in x bits in MSB first order
+
+v
+ value=0
+ do{
+ more_data u(1)
+ data u(7)
+ value= 128*value + data
+ }while(more_data)
+
+s
+ temp v
+ temp++
+ if(temp&1) value= -(temp>>1)
+ else value= (temp>>1)
+
+b (binary data or string)
+ for(i=0; i<length; i++){
+ data[i] u(8)
+ }
+ Note: strings MUST be encoded in utf8
+
+vb
+ length v
+ value b
+
+
+ Bitstream syntax:
+packet header
+ forward ptr v
+
+align_byte
+ while(not byte aligned)
+ one f(1)
+
+reserved_bytes
+ for(i=0; i<forward_ptr - length_of_non_reserved; i++)
+ reserved u(8)
+ a demuxer MUST ignore any reserved bytes
+ a muxer MUST NOT write any reserved bytes, as this would make it
+ inpossible to add new fields at the end of packets in the future in
+ a compatible way
+
+main header:
+ main_startcode f(64)
+ packet header
+ version v
+ stream_count v
+ max_distance v
+ max_short_distance v
+ global_time_base_nom v
+ global_time_base_denom v
+ short_startcode v
+ for(i=0; i<256; ){
+ tmp_flag v
+ tmp_fields v
+ if(tmp_fields>0) tmp_timestamp s
+ if(tmp_fields>1) tmp_mul v
+ if(tmp_fields>2) tmp_stream v
+ if(tmp_fields>3) tmp_size v
+ else tmp_size=0
+ if(tmp_fields>4) tmp_res v
+ else tmp_res=0
+ if(tmp_fields>5) count v
+ else count= tmp_mul - tmp_size
+ for(j=6; j<tmp_fields; j++){
+ tmp_reserved[i] v
+ }
+ for(j=0; j<count && i<256; j++, i++){
+ flags[i]= tmp_flag;
+ stream_id_plus1[i]= tmp_stream;
+ data_size_mul[i]= tmp_mul;
+ data_size_lsb[i]= tmp_size + j;
+ timestamp_delta[i]= tmp_timestamp;
+ reserved_count[i]= tmp_res;
+ }
+ }
+ reserved_bytes
+ checksum u(32)
+
+stream_header:
+ stream_startcode f(64)
+ packet_header
+ stream_id v
+ stream_class v
+ fourcc vb
+ average_bitrate v
+ time_base_nom v
+ time_base_denom v
+ msb_timestamp_shift v
+ decode_delay v
+ fixed_fps u(1)
+ reserved u(6)
+ codec_specific_data vb
+
+video_stream_header:
+ stream_header
+ width v
+ height v
+ sample_width v
+ sample_height v
+ colorspace_type v
+ reserved_bytes
+ checksum u(32)
+
+audio_stream_header:
+ stream_header
+ samplerate_nom v
+ samplerate_denom v
+ channel_count v
+ reserved_bytes
+ checksum u(32)
+
+
+frame
+ frame_code f(8)
+ if(stream_id_plus1[frame_code]==0){
+ stream_id v
+ }
+ if(timestamp_delta[frame_code]==0){
+ coded_timestamp v
+ }
+ if(flags[frame_code]&1){
+ data_size_msb v
+ }
+ for(i=0; i<reserved_count[frame_code]; i++)
+ reserved v
+ data
+
+Index:
+ index_startcode f(64)
+ packet header
+ index_length v
+ for(i=0; i<index_length; i++){
+ index_timestamp v
+ index_position v
+ }
+ reserved_bytes
+ checksum u(32)
+
+info_packet: (optional)
+ info_startcode f(64)
+ packet header
+ for(;;){
+ id v
+ if(id==0) break
+ name= info_table[id][0]
+ type= info_table[id][1]
+ if(type==NULL)
+ type vb
+ if(name==NULL)
+ name vb
+ if(type=="v")
+ value v
+ else
+ value vb
+ }
+ reserved_bytes
+ checksum u(32)
+
+sync_point
+ frame_startcode f(64)
+ global_timestamp v
+
+file
+ file_id_string
+ while(!eof && next_code != index_startcode){
+ main_header
+ for(i=0; i<stream_count; i++){
+ if(next_packet==video_stream_header)
+ video_stream_header
+ else
+ audio_stream_header
+ }
+ while(next_code != main_startcode){
+ if(next_code == info_startcode)
+ info_packet
+ else{
+ if(next_code == short_startcode)
+ short_startcode u(24)
+ else if(next_code == frame_startcode)
+ sync_point
+ frame
+ }
+ }
+ }
+ index
+
+forward_ptr
+ size of the packet (exactly the distance from the first byte of the
+ startcode of the current packet to the first byte of the following packet
+
+file_id_string
+ "nut/multimedia container\0"
+
+*_startcode
+ all startcodes start with 'N'
+
+main_startcode
+ 0x7A561F5F04ADULL + (((uint64_t)('N'<<8) + 'M')<<48)
+stream_starcode
+ 0x11405BF2F9DBULL + (((uint64_t)('N'<<8) + 'S')<<48)
+frame_startcode
+ 0xE4ADEECA4569ULL + (((uint64_t)('N'<<8) + 'K')<<48)
+ frame_startcodes SHOULD be placed immedeatly before a keyframe if the
+ previous frame of the same stream was a non-keyframe, unless such
+ non-keyframe - keyframe tansitions are very frequent
+
+index_startcode
+ 0xDD672F23E64EULL + (((uint64_t)('N'<<8) + 'X')<<48)
+info_startcode
+ 0xAB68B596BA78ULL + (((uint64_t)('N'<<8) + 'I')<<48)
+
+version
+ 2 for now
+
+max_distance
+ max distance of frame_startcodes, the distance may only be larger if
+ there is only a single frame between the 2 frame_startcodes
+ this can be used by the demuxer to detect damaged frame headers if the
+ damage results in a too long chain
+ SHOULD be set to <=32768 or at least <=65536 unless there is a very good
+ reason to set it higher otherwise reasonable error recovery will be
+ impossible
+
+max_short_distance
+ max distance of short startcodes or frame_startcodes, the distance may
+ only be larger if there is only a single frame between the 2
+ frame_startcodes/short startcodes this can be used by the demuxer to
+ detect damaged frame headers if the damage results in a too long chain
+ SHOULD be set to <=4096 or at least <=8192 unless there is a very good
+ reason to set it higher otherwise reasonable error recovery will be
+ impossible
+
+
+short_startcode
+ MUST be 3 bytes long and MUST have 'N' as first byte, the second byte
+ MUST not be a printable uppercase letter / must not be within 65..90,
+ default is 0x4EFE79
+
+stream_id
+ Note: streams with a lower relative class MUST have a lower relative id
+ so a stream with class 0 MUST allways have a id which is lower then any
+ stream with class > 0
+ stream_id MUST be < stream_count
+
+stream_class
+ 0 video
+ 32 audio
+ 64 subtiles
+ Note the remaining values are reserved and MUST NOT be used
+ a demuxer MUST ignore streams with reserved classes
+
+fourcc
+ identification for the codec
+ example: "H264"
+ MUST contain 2 or 4 bytes, note, this might be increased in the future
+ if needed
+
+time_base_nom / time_base_denom = time_base
+ the number of timer ticks per second, this MUST be equal to the fps
+ if the fixed_fps is 1
+ time_base_denom MUST not be 0
+ time_base_nom and time_base_denom MUST be relative prime
+ time_base_nom MUST be < 2^31
+ examples:
+ fps time_base_nom time_base_denom
+ 30 30 1
+ 29.97 30000 1001
+ 23.976 24000 1001
+ sample_rate sample_rate_mul time_base_nom time_base_denom
+ 44100 1 44100 1
+ 44100 64 11025 16
+ 48000 1024 375 8
+ Note: the advantage to using a large sample_rate_mul is that the
+ timestamps need fewer bits
+
+global_time_base_nom / global_time_base_denom = global_time_base
+ the number of timer ticks per second
+ global_time_base_denom MUST not be 0
+ global_time_base_nom and global_time_base_denom MUST be relative prime
+ global_time_base_nom MUST be < 2^31
+
+global_timestamp
+ timestamp in global_time_base units
+ when a global_timestamp is encountered the last_timestamp of all streams
+ is set to the following:
+ ln= global_time_base_denom*time_base_nom
+ sn= global_timestamp
+ d1= global_time_base_nom
+ d2= time_base_denom
+ last_timestamp= (ln/d1*sn + ln%d1*sn/d1)/d2
+ Note, this calculation MUST be done with unsigned 64 bit integers, and
+ is equivalent to (ln*sn)/(d1*d2) but this would require a 96bit integer
+
+msb_timestamp_shift
+ amount of bits in lsb_timestamp
+ MUST be <16
+
+decode_delay
+ maximum time between input and output for a codec, used to generate dts
+ from pts
+ is 0 for streams without b frames, and 1 for streams with b frames, may
+ be larger for future codecs
+
+fixed_fps
+ 1 indicates that the fps is fixed
+
+codec_specific_data
+ private global data for a codec (could be huffman tables or ...)
+
+frame_code
+ the meaning of this byte is stored in the main header
+ the value 78 ('N') is forbidden to ensure that the byte is always
+ different from the first byte of any startcode
+
+flags[frame_code]
+ the bits of the flags from MSB to LSB are KD
+ if D is 1 then data_size_msb is coded, otherwise data_size_msb is 0
+ K is the keyframe_type
+ 0-> no keyframe,
+ 1-> keyframe,
+ flags=4 can be used to mark illegal frame_code bytes
+ frame_code=78 must have flags=4
+ * frames MUST not depend(1) upon frames prior to the last
+ frame_startcode
+ depend(1) means dependancy on the container level (NUT) not dependancy
+ on the codec level
+
+stream_id_plus1[frame_code]
+ must be <250
+ if its 0 then the stream_id is coded in the frame
+
+data_size_mul[frame_code]
+ must be <16384
+
+data_size_lsb[frame_code]
+ must be <16384
+
+timestamp_delta[frame_code]
+ must be <16384 and >-16384
+
+data_size
+ data_size= data_size_lsb + data_size_msb*data_size_mul;
+
+coded_timestamp
+ if coded_timestamp < (1<<msb_timestamp_shift) then its a
+ lsb timestamp, otherwise its a full timestamp + (1<<msb_timestamp_shift)
+ lsb timestamps are converted to full timesamps by:
+ mask = (1<<msb_timestamp_shift)-1;
+ delta= last_timestamp - mask/2
+ timestamp= ((timestamp_lsb-delta)&mask) + delta
+ a full timestamp must be used if there is no reference timestamp
+ available after the last frame_startcode with the current stream_id
+
+lsb_timestamp
+ least significant bits of the timestamp in time_base precission
+ Example: IBBP display order
+ keyframe timestamp=0 -> timestamp=0
+ frame lsb_timestamp=3 -> timestamp=3
+ frame lsb_timestamp=1 -> timestamp=1
+ frame lsb_timestamp=2 -> timestamp=2
+ ...
+ keyframe msb_timestamp=257 -> timestamp=257
+ frame lsb_timestamp=255->timestamp=255
+ frame lsb_timestamp=0 -> timestamp=256
+ frame lsb_timestamp=4 -> timestamp=260
+ frame lsb_timestamp=2 -> timestamp=258
+ frame lsb_timestamp=3 -> timestamp=259
+ all timestamps of keyframes of a single stream MUST be monotone
+
+dts
+ dts are calculated by using a decode_delay+1 sized buffer for each
+ stream, into which the current pts is inserted and the element with
+ the smallest value is removed, this is then the current dts
+ this buffer is initalized with decode_delay -1 elements
+ all frames with dts == timestamp must be monotone, that means a frame
+ which occures later in the stream must have a larger or equal dts
+ then an earlier frame
+ FIXME rename timestamp* to pts* ?
+
+width/height
+ MUST be set to the coded width/height
+
+sample_width/sample_height (aspect ratio)
+ sample_width is the horizontal distance between samples
+ sample_width and sample_height MUST be relative prime if not zero
+ MUST be 0 if unknown
+
+colorspace_type
+ 0 unknown
+ 1 ITU Rec 624 / ITU Rec 601 Y range: 16..235 Cb/Cr range: 16..240
+ 2 ITU Rec 709 Y range: 16..235 Cb/Cr range: 16..240
+ 17 ITU Rec 624 / ITU Rec 601 Y range: 0..255 Cb/Cr range: 0..255
+ 18 ITU Rec 709 Y range: 0..255 Cb/Cr range: 0..255
+
+samplerate_nom / samplerate_denom = samplerate
+ the number of samples per second
+
+checksum
+ adler32 checksum
+
+index_timestamp
+ value of the timetamp in a sync point relative to the last sync-point
+
+index_position
+ position in bytes of the first byte of a sync-point, relative to the
+ last sync_point
+
+id
+ the id of the type/name pair, so its more compact
+ 0 means end
+
+type
+ for example: "UTF8" -> String or "JPEG" -> jpeg image
+ Note: nonstandard fields should be prefixed by "X-"
+ Note: MUST be less than 6 byte long (might be increased to 64 later)
+
+name
+ the name of the info entry, valid names are
+ "TotalTime" total length of the stream in msecs
+ "StreamId" the stream(s) to which the info packet applies
+ "StartTimestamp"
+ "EndTimestamp" the time range in msecs to which the info applies
+ "SegmentId" a unique id for the streams + time specified
+ "Author"
+ "Description"
+ "Copyright"
+ "Encoder" the name & version of the software used for encoding
+ "Title"
+ "Cover" an image of the (cd,dvd,vhs,..) cover (preferable PNG or JPEG)
+ "Source" "DVD", "VCD", "CD", "MD", "FM radio", "VHS", "TV",
+ "LD"
+ Optional: appended PAL,NTSC,SECAM, ... in parentheses
+ "CaptureDevice" "BT878", "BT848", "webcam", ... (more exact names are fine too)
+ "CreationTime" "2003-01-20 20:13:15Z", ...
+ (ISO 8601 format, see http://www.cl.cam.ac.uk/~mgk25/iso-time.html)
+ Note: dont forget the timezone
+ "ReplayGain"
+ "Keywords"
+ "Language" ISO 639 and ISO 3166 for language/country code
+ something like "usen" (US english), can be 0 if unknown
+ and "multi" if several languages
+ see http://www.loc.gov/standards/iso639-2/englangn.html
+ and http://www.din.de/gremien/nas/nabd/iso3166ma/codlstp1/en_listp1.htmlthe language code
+ "Disposition" "original", "dub" (translated), "comment", "lyrics", "karaoke"
+ Note: if someone needs some others, please tell us about them, so we can
+ add them to the official standard (if they are sane)
+ Note: nonstandard fields should be prefixed by "X-"
+ Note: MUST be less than 64 bytes long
+
+value
+ value of this name/type pair
+
+stuffing
+ 0x80 can be placed infront of any type v entry for stuffing
+ purposes
+
+info_table[][2]={
+ {NULL , NULL }, // end
+ {NULL , NULL },
+ {NULL , "UTF8"},
+ {NULL , "v"},
+ {NULL , "s"},
+ {"StreamId" , "v"},
+ {"SegmentId" , "v"},
+ {"StartTimestamp" , "v"},
+ {"EndTimestamp" , "v"},
+ {"Author" , "UTF8"},
+ {"Titel" , "UTF8"},
+ {"Language" , "UTF8"},
+ {"Description" , "UTF8"},
+ {"Copyright" , "UTF8"},
+ {"Encoder" , "UTF8"},
+ {"Keyword" , "UTF8"},
+ {"Cover" , "JPEG"},
+ {"Cover" , "PNG"},
+ {"Disposition" , "UTF8"},
+};
+
+ Structure:
+
+the headers MUST be in exactly the following order (to simplify demuxer design)
+main header
+stream_header (id=0)
+stream_header (id=1)
+...
+stream_header (id=n)
+
+headers may be repated, but if they are then they MUST all be repeated together
+and repeated headers MUST be identical
+headers MAY only repeated at the closest possible positions after 2^x where x is
+an integer and the file end, so the headers may be repeated at 4102 if thats the
+closest possition after 2^12=4096 at which the headers can be placed
+
+headers MUST be placed at least at the begin of the file and immedeatly before
+the index or at the file end if there is no index
+headers MUST be repeated at least twice (so they exist 3 times in a file)
+
+a demuxer MUST not demux a stream which contains more than one stream, or which
+is wrapped in a structure to facilitate more than one stream or otherwise
+duplicate the role of a container. any such file is to be considered invalid
+
+info packets which describe the whole file or individual streams/tracks must be
+placed before any video/audio/... frames
+
+ Index
+every sync-point must be exacty once in the index
+Note: in case of realtime streaming there is no end, so no index there either
+
+ Info packets
+the info_packet can be repeated, it can also contain different names & values
+each time but only if allso the time is different
+Info packets can be used to describe the file or some part of it (chapters)
+
+info packets, SHOULD be placed at the begin of the file at least
+for realtime streaming info packets will normally be transmitted when they apply
+for example, the current song title & artist of the currently shown music video
+
+ Unknown packets
+MUST be ignored by the demuxer
+
+ demuxer (non-normative)
+
+in the absence of valid header at beginning, players SHOULD search for backup
+headers starting at offset 2^x for each x players SHOULD end their search from a
+particular offset when any startcode is found (including syncpoint)
+
+
+ Sample code (GPL, & untested)
+
+typedef BufferContext{
+ uint8_t *buf;
+ uint8_t *buf_ptr;
+}BufferContext;
+
+static inline uint64_t get_bytes(BufferContext *bc, int count){
+ uint64_t val=0;
+
+ assert(count>0 && count<9)
+
+ for(i=0; i<count; i++){
+ val <<=8;
+ val += *(bc->buf_ptr++);
+ }
+
+ return val;
+}
+
+static inline void put_bytes(BufferContext *bc, int count, uint64_t val){
+ uint64_t val=0;
+
+ assert(count>0 && count<9)
+
+ for(i=count-1; i>=0; i--){
+ *(bc->buf_ptr++)= val >> (8*i);
+ }
+
+ return val;
+}
+
+static inline uint64_t get_v(BufferContext *bc){
+ uint64_t val= 0;
+
+ for(; space_left(bc) > 0; ){
+ int tmp= *(bc->buf_ptr++);
+ if(tmp&0x80)
+ val= (val<<7) + tmp - 0x80;
+ else
+ return (val<<7) + tmp;
+ }
+
+ return -1;
+}
+
+static inline int put_v(BufferContext *bc, uint64_t val){
+ int i;
+
+ if(space_left(bc) < 9) return -1;
+
+ val &= 0x7FFFFFFFFFFFFFFFULL; // FIXME can only encode upto 63 bits currently
+ for(i=7; ; i+=7){
+ if(val>>i == 0) break;
+ }
+
+ for(i-=7; i>0; i-=7){
+ *(bc->buf_ptr++)= 0x80 | (val>>i);
+ }
+ *(bc->buf_ptr++)= val&0x7F;
+
+ return 0;
+}
+
+static int64_t get_dts(int64_t pts, int64_t *pts_cache, int delay, int reset){
+ if(reset) memset(pts_cache, -1, delay*sizeof(int64_t));
+
+ while(delay--){
+ int64_t t= pts_cache[delay];
+ if(t < pts){
+ pts_cache[delay]= pts;
+ pts= t;
+ }
+ }
+
+ return pts;
+}
+
+ Authors
+
+Folks from MPlayer Developers Mailinglist (http://www.mplayehrq.hu/).
+Authors in ABC-order: (FIXME! Tell us if we left you out)
+ Beregszaszi, Alex (alex@fsn.hu)
+ Bunkus, Moritz (moritz@bunkus.org)
+ Diedrich, Tobias (td@sim.uni-hannover.de)
+ Felker, Rich (dalias@aerifal.cx)
+ Franz, Fabian (FabianFranz@gmx.de)
+ Gereoffy, Arpad (arpi@thot.banki.hu)
+ Hess, Andreas (jaska@gmx.net)
+ Niedermayer, Michael (michaelni@gmx.at)