diff options
author | Enrico Tassi <gareuselesinge@debian.org> | 2015-01-25 14:42:51 +0100 |
---|---|---|
committer | Enrico Tassi <gareuselesinge@debian.org> | 2015-01-25 14:42:51 +0100 |
commit | 7cfc4e5146be5666419451bdd516f1f3f264d24a (patch) | |
tree | e4197645da03dc3c7cc84e434cc31d0a0cca7056 /lib/xml_lexer.mll | |
parent | 420f78b2caeaaddc6fe484565b2d0e49c66888e5 (diff) |
Imported Upstream version 8.5~beta1+dfsg
Diffstat (limited to 'lib/xml_lexer.mll')
-rw-r--r-- | lib/xml_lexer.mll | 419 |
1 files changed, 210 insertions, 209 deletions
diff --git a/lib/xml_lexer.mll b/lib/xml_lexer.mll index 5b06e720..a33be9da 100644 --- a/lib/xml_lexer.mll +++ b/lib/xml_lexer.mll @@ -20,24 +20,24 @@ open Lexing type error = - | EUnterminatedComment - | EUnterminatedString - | EIdentExpected - | ECloseExpected - | ENodeExpected - | EAttributeNameExpected - | EAttributeValueExpected - | EUnterminatedEntity + | EUnterminatedComment + | EUnterminatedString + | EIdentExpected + | ECloseExpected + | ENodeExpected + | EAttributeNameExpected + | EAttributeValueExpected + | EUnterminatedEntity exception Error of error type pos = int * int * int * int type token = - | Tag of string * (string * string) list * bool - | PCData of string - | Endtag of string - | Eof + | Tag of string * (string * string) list * bool + | PCData of string + | Endtag of string + | Eof let last_pos = ref 0 and current_line = ref 0 @@ -48,39 +48,40 @@ let tmp = Buffer.create 200 let idents = Hashtbl.create 0 let _ = begin - Hashtbl.add idents "gt;" ">"; - Hashtbl.add idents "lt;" "<"; - Hashtbl.add idents "amp;" "&"; - Hashtbl.add idents "apos;" "'"; - Hashtbl.add idents "quot;" "\""; + Hashtbl.add idents "nbsp;" " "; + Hashtbl.add idents "gt;" ">"; + Hashtbl.add idents "lt;" "<"; + Hashtbl.add idents "amp;" "&"; + Hashtbl.add idents "apos;" "'"; + Hashtbl.add idents "quot;" "\""; end let init lexbuf = - current_line := 1; - current_line_start := lexeme_start lexbuf; - last_pos := !current_line_start + current_line := 1; + current_line_start := lexeme_start lexbuf; + last_pos := !current_line_start let close lexbuf = - Buffer.reset tmp + Buffer.reset tmp let pos lexbuf = - !current_line , !current_line_start , - !last_pos , - lexeme_start lexbuf + !current_line , !current_line_start , + !last_pos , + lexeme_start lexbuf let restore (cl,cls,lp,_) = - current_line := cl; - current_line_start := cls; - last_pos := lp + current_line := cl; + current_line_start := cls; + last_pos := lp let newline lexbuf = - incr current_line; - last_pos := lexeme_end lexbuf; - current_line_start := !last_pos + incr current_line; + last_pos := lexeme_end lexbuf; + current_line_start := !last_pos let error lexbuf e = - last_pos := lexeme_start lexbuf; - raise (Error e) + last_pos := lexeme_start lexbuf; + raise (Error e) } @@ -92,100 +93,100 @@ let entitychar = ['A'-'Z' 'a'-'z'] let pcchar = [^ '\r' '\n' '<' '>' '&'] rule token = parse - | newline | (newline break) | break - { - newline lexbuf; + | newline | (newline break) | break + { + newline lexbuf; PCData "\n" - } - | "<!--" - { - last_pos := lexeme_start lexbuf; - comment lexbuf; - token lexbuf - } - | "<?" - { - last_pos := lexeme_start lexbuf; - header lexbuf; - token lexbuf; - } - | '<' space* '/' space* - { - last_pos := lexeme_start lexbuf; - let tag = ident_name lexbuf in - ignore_spaces lexbuf; - close_tag lexbuf; - Endtag tag - } - | '<' space* - { - last_pos := lexeme_start lexbuf; - let tag = ident_name lexbuf in - ignore_spaces lexbuf; - let attribs, closed = attributes lexbuf in - Tag(tag, attribs, closed) - } - | "&#" - { - last_pos := lexeme_start lexbuf; - Buffer.reset tmp; - Buffer.add_string tmp (lexeme lexbuf); - PCData (pcdata lexbuf) - } - | '&' - { - last_pos := lexeme_start lexbuf; - Buffer.reset tmp; - Buffer.add_string tmp (entity lexbuf); - PCData (pcdata lexbuf) - } - | pcchar+ - { - last_pos := lexeme_start lexbuf; - Buffer.reset tmp; - Buffer.add_string tmp (lexeme lexbuf); - PCData (pcdata lexbuf) - } - | eof { Eof } - | _ - { error lexbuf ENodeExpected } + } + | "<!--" + { + last_pos := lexeme_start lexbuf; + comment lexbuf; + token lexbuf + } + | "<?" + { + last_pos := lexeme_start lexbuf; + header lexbuf; + token lexbuf; + } + | '<' space* '/' space* + { + last_pos := lexeme_start lexbuf; + let tag = ident_name lexbuf in + ignore_spaces lexbuf; + close_tag lexbuf; + Endtag tag + } + | '<' space* + { + last_pos := lexeme_start lexbuf; + let tag = ident_name lexbuf in + ignore_spaces lexbuf; + let attribs, closed = attributes lexbuf in + Tag(tag, attribs, closed) + } + | "&#" + { + last_pos := lexeme_start lexbuf; + Buffer.reset tmp; + Buffer.add_string tmp (lexeme lexbuf); + PCData (pcdata lexbuf) + } + | '&' + { + last_pos := lexeme_start lexbuf; + Buffer.reset tmp; + Buffer.add_string tmp (entity lexbuf); + PCData (pcdata lexbuf) + } + | pcchar+ + { + last_pos := lexeme_start lexbuf; + Buffer.reset tmp; + Buffer.add_string tmp (lexeme lexbuf); + PCData (pcdata lexbuf) + } + | eof { Eof } + | _ + { error lexbuf ENodeExpected } and ignore_spaces = parse | newline | (newline break) | break - { - newline lexbuf; - ignore_spaces lexbuf - } - | space + - { ignore_spaces lexbuf } - | "" - { () } + { + newline lexbuf; + ignore_spaces lexbuf + } + | space + + { ignore_spaces lexbuf } + | "" + { () } and comment = parse | newline | (newline break) | break - { - newline lexbuf; - comment lexbuf - } - | "-->" - { () } - | eof - { raise (Error EUnterminatedComment) } - | _ - { comment lexbuf } + { + newline lexbuf; + comment lexbuf + } + | "-->" + { () } + | eof + { raise (Error EUnterminatedComment) } + | _ + { comment lexbuf } and header = parse | newline | (newline break) | break - { - newline lexbuf; - header lexbuf - } - | "?>" - { () } - | eof - { error lexbuf ECloseExpected } - | _ - { header lexbuf } + { + newline lexbuf; + header lexbuf + } + | "?>" + { () } + | eof + { error lexbuf ECloseExpected } + | _ + { header lexbuf } and pcdata = parse | newline | (newline break) | break @@ -194,112 +195,112 @@ and pcdata = parse newline lexbuf; pcdata lexbuf } - | pcchar+ - { - Buffer.add_string tmp (lexeme lexbuf); - pcdata lexbuf - } - | "&#" - { - Buffer.add_string tmp (lexeme lexbuf); - pcdata lexbuf; - } - | '&' - { - Buffer.add_string tmp (entity lexbuf); - pcdata lexbuf - } - | "" - { Buffer.contents tmp } + | pcchar+ + { + Buffer.add_string tmp (lexeme lexbuf); + pcdata lexbuf + } + | "&#" + { + Buffer.add_string tmp (lexeme lexbuf); + pcdata lexbuf; + } + | '&' + { + Buffer.add_string tmp (entity lexbuf); + pcdata lexbuf + } + | "" + { Buffer.contents tmp } and entity = parse - | entitychar+ ';' - { - let ident = lexeme lexbuf in - try - Hashtbl.find idents (String.lowercase ident) - with - Not_found -> "&" ^ ident - } - | _ | eof - { raise (Error EUnterminatedEntity) } + | entitychar+ ';' + { + let ident = lexeme lexbuf in + try + Hashtbl.find idents (String.lowercase ident) + with + Not_found -> "&" ^ ident + } + | _ | eof + { raise (Error EUnterminatedEntity) } and ident_name = parse - | identchar+ - { lexeme lexbuf } - | _ | eof - { error lexbuf EIdentExpected } + | identchar+ + { lexeme lexbuf } + | _ | eof + { error lexbuf EIdentExpected } and close_tag = parse - | '>' - { () } - | _ | eof - { error lexbuf ECloseExpected } + | '>' + { () } + | _ | eof + { error lexbuf ECloseExpected } and attributes = parse - | '>' - { [], false } - | "/>" - { [], true } - | "" (* do not read a char ! *) - { - let key = attribute lexbuf in - let data = attribute_data lexbuf in - ignore_spaces lexbuf; - let others, closed = attributes lexbuf in - (key, data) :: others, closed - } + | '>' + { [], false } + | "/>" + { [], true } + | "" (* do not read a char ! *) + { + let key = attribute lexbuf in + let data = attribute_data lexbuf in + ignore_spaces lexbuf; + let others, closed = attributes lexbuf in + (key, data) :: others, closed + } and attribute = parse - | identchar+ - { lexeme lexbuf } - | _ | eof - { error lexbuf EAttributeNameExpected } + | identchar+ + { lexeme lexbuf } + | _ | eof + { error lexbuf EAttributeNameExpected } and attribute_data = parse - | space* '=' space* '"' - { - Buffer.reset tmp; - last_pos := lexeme_end lexbuf; - dq_string lexbuf - } - | space* '=' space* '\'' - { - Buffer.reset tmp; - last_pos := lexeme_end lexbuf; - q_string lexbuf - } - | _ | eof - { error lexbuf EAttributeValueExpected } + | space* '=' space* '"' + { + Buffer.reset tmp; + last_pos := lexeme_end lexbuf; + dq_string lexbuf + } + | space* '=' space* '\'' + { + Buffer.reset tmp; + last_pos := lexeme_end lexbuf; + q_string lexbuf + } + | _ | eof + { error lexbuf EAttributeValueExpected } and dq_string = parse - | '"' - { Buffer.contents tmp } - | '\\' [ '"' '\\' ] - { - Buffer.add_char tmp (lexeme_char lexbuf 1); - dq_string lexbuf - } - | eof - { raise (Error EUnterminatedString) } - | _ - { - Buffer.add_char tmp (lexeme_char lexbuf 0); - dq_string lexbuf - } + | '"' + { Buffer.contents tmp } + | '\\' [ '"' '\\' ] + { + Buffer.add_char tmp (lexeme_char lexbuf 1); + dq_string lexbuf + } + | eof + { raise (Error EUnterminatedString) } + | _ + { + Buffer.add_char tmp (lexeme_char lexbuf 0); + dq_string lexbuf + } and q_string = parse - | '\'' - { Buffer.contents tmp } - | '\\' [ '\'' '\\' ] - { - Buffer.add_char tmp (lexeme_char lexbuf 1); - q_string lexbuf - } - | eof - { raise (Error EUnterminatedString) } - | _ - { - Buffer.add_char tmp (lexeme_char lexbuf 0); - q_string lexbuf - } + | '\'' + { Buffer.contents tmp } + | '\\' [ '\'' '\\' ] + { + Buffer.add_char tmp (lexeme_char lexbuf 1); + q_string lexbuf + } + | eof + { raise (Error EUnterminatedString) } + | _ + { + Buffer.add_char tmp (lexeme_char lexbuf 0); + q_string lexbuf + } |