summaryrefslogtreecommitdiff
path: root/lib/xml_lexer.mll
diff options
context:
space:
mode:
Diffstat (limited to 'lib/xml_lexer.mll')
-rw-r--r--lib/xml_lexer.mll419
1 files changed, 210 insertions, 209 deletions
diff --git a/lib/xml_lexer.mll b/lib/xml_lexer.mll
index 5b06e720..a33be9da 100644
--- a/lib/xml_lexer.mll
+++ b/lib/xml_lexer.mll
@@ -20,24 +20,24 @@
open Lexing
type error =
- | EUnterminatedComment
- | EUnterminatedString
- | EIdentExpected
- | ECloseExpected
- | ENodeExpected
- | EAttributeNameExpected
- | EAttributeValueExpected
- | EUnterminatedEntity
+ | EUnterminatedComment
+ | EUnterminatedString
+ | EIdentExpected
+ | ECloseExpected
+ | ENodeExpected
+ | EAttributeNameExpected
+ | EAttributeValueExpected
+ | EUnterminatedEntity
exception Error of error
type pos = int * int * int * int
type token =
- | Tag of string * (string * string) list * bool
- | PCData of string
- | Endtag of string
- | Eof
+ | Tag of string * (string * string) list * bool
+ | PCData of string
+ | Endtag of string
+ | Eof
let last_pos = ref 0
and current_line = ref 0
@@ -48,39 +48,40 @@ let tmp = Buffer.create 200
let idents = Hashtbl.create 0
let _ = begin
- Hashtbl.add idents "gt;" ">";
- Hashtbl.add idents "lt;" "<";
- Hashtbl.add idents "amp;" "&";
- Hashtbl.add idents "apos;" "'";
- Hashtbl.add idents "quot;" "\"";
+ Hashtbl.add idents "nbsp;" " ";
+ Hashtbl.add idents "gt;" ">";
+ Hashtbl.add idents "lt;" "<";
+ Hashtbl.add idents "amp;" "&";
+ Hashtbl.add idents "apos;" "'";
+ Hashtbl.add idents "quot;" "\"";
end
let init lexbuf =
- current_line := 1;
- current_line_start := lexeme_start lexbuf;
- last_pos := !current_line_start
+ current_line := 1;
+ current_line_start := lexeme_start lexbuf;
+ last_pos := !current_line_start
let close lexbuf =
- Buffer.reset tmp
+ Buffer.reset tmp
let pos lexbuf =
- !current_line , !current_line_start ,
- !last_pos ,
- lexeme_start lexbuf
+ !current_line , !current_line_start ,
+ !last_pos ,
+ lexeme_start lexbuf
let restore (cl,cls,lp,_) =
- current_line := cl;
- current_line_start := cls;
- last_pos := lp
+ current_line := cl;
+ current_line_start := cls;
+ last_pos := lp
let newline lexbuf =
- incr current_line;
- last_pos := lexeme_end lexbuf;
- current_line_start := !last_pos
+ incr current_line;
+ last_pos := lexeme_end lexbuf;
+ current_line_start := !last_pos
let error lexbuf e =
- last_pos := lexeme_start lexbuf;
- raise (Error e)
+ last_pos := lexeme_start lexbuf;
+ raise (Error e)
}
@@ -92,100 +93,100 @@ let entitychar = ['A'-'Z' 'a'-'z']
let pcchar = [^ '\r' '\n' '<' '>' '&']
rule token = parse
- | newline | (newline break) | break
- {
- newline lexbuf;
+ | newline | (newline break) | break
+ {
+ newline lexbuf;
PCData "\n"
- }
- | "<!--"
- {
- last_pos := lexeme_start lexbuf;
- comment lexbuf;
- token lexbuf
- }
- | "<?"
- {
- last_pos := lexeme_start lexbuf;
- header lexbuf;
- token lexbuf;
- }
- | '<' space* '/' space*
- {
- last_pos := lexeme_start lexbuf;
- let tag = ident_name lexbuf in
- ignore_spaces lexbuf;
- close_tag lexbuf;
- Endtag tag
- }
- | '<' space*
- {
- last_pos := lexeme_start lexbuf;
- let tag = ident_name lexbuf in
- ignore_spaces lexbuf;
- let attribs, closed = attributes lexbuf in
- Tag(tag, attribs, closed)
- }
- | "&#"
- {
- last_pos := lexeme_start lexbuf;
- Buffer.reset tmp;
- Buffer.add_string tmp (lexeme lexbuf);
- PCData (pcdata lexbuf)
- }
- | '&'
- {
- last_pos := lexeme_start lexbuf;
- Buffer.reset tmp;
- Buffer.add_string tmp (entity lexbuf);
- PCData (pcdata lexbuf)
- }
- | pcchar+
- {
- last_pos := lexeme_start lexbuf;
- Buffer.reset tmp;
- Buffer.add_string tmp (lexeme lexbuf);
- PCData (pcdata lexbuf)
- }
- | eof { Eof }
- | _
- { error lexbuf ENodeExpected }
+ }
+ | "<!--"
+ {
+ last_pos := lexeme_start lexbuf;
+ comment lexbuf;
+ token lexbuf
+ }
+ | "<?"
+ {
+ last_pos := lexeme_start lexbuf;
+ header lexbuf;
+ token lexbuf;
+ }
+ | '<' space* '/' space*
+ {
+ last_pos := lexeme_start lexbuf;
+ let tag = ident_name lexbuf in
+ ignore_spaces lexbuf;
+ close_tag lexbuf;
+ Endtag tag
+ }
+ | '<' space*
+ {
+ last_pos := lexeme_start lexbuf;
+ let tag = ident_name lexbuf in
+ ignore_spaces lexbuf;
+ let attribs, closed = attributes lexbuf in
+ Tag(tag, attribs, closed)
+ }
+ | "&#"
+ {
+ last_pos := lexeme_start lexbuf;
+ Buffer.reset tmp;
+ Buffer.add_string tmp (lexeme lexbuf);
+ PCData (pcdata lexbuf)
+ }
+ | '&'
+ {
+ last_pos := lexeme_start lexbuf;
+ Buffer.reset tmp;
+ Buffer.add_string tmp (entity lexbuf);
+ PCData (pcdata lexbuf)
+ }
+ | pcchar+
+ {
+ last_pos := lexeme_start lexbuf;
+ Buffer.reset tmp;
+ Buffer.add_string tmp (lexeme lexbuf);
+ PCData (pcdata lexbuf)
+ }
+ | eof { Eof }
+ | _
+ { error lexbuf ENodeExpected }
and ignore_spaces = parse
| newline | (newline break) | break
- {
- newline lexbuf;
- ignore_spaces lexbuf
- }
- | space +
- { ignore_spaces lexbuf }
- | ""
- { () }
+ {
+ newline lexbuf;
+ ignore_spaces lexbuf
+ }
+ | space +
+ { ignore_spaces lexbuf }
+ | ""
+ { () }
and comment = parse
| newline | (newline break) | break
- {
- newline lexbuf;
- comment lexbuf
- }
- | "-->"
- { () }
- | eof
- { raise (Error EUnterminatedComment) }
- | _
- { comment lexbuf }
+ {
+ newline lexbuf;
+ comment lexbuf
+ }
+ | "-->"
+ { () }
+ | eof
+ { raise (Error EUnterminatedComment) }
+ | _
+ { comment lexbuf }
and header = parse
| newline | (newline break) | break
- {
- newline lexbuf;
- header lexbuf
- }
- | "?>"
- { () }
- | eof
- { error lexbuf ECloseExpected }
- | _
- { header lexbuf }
+ {
+ newline lexbuf;
+ header lexbuf
+ }
+ | "?>"
+ { () }
+ | eof
+ { error lexbuf ECloseExpected }
+ | _
+ { header lexbuf }
and pcdata = parse
| newline | (newline break) | break
@@ -194,112 +195,112 @@ and pcdata = parse
newline lexbuf;
pcdata lexbuf
}
- | pcchar+
- {
- Buffer.add_string tmp (lexeme lexbuf);
- pcdata lexbuf
- }
- | "&#"
- {
- Buffer.add_string tmp (lexeme lexbuf);
- pcdata lexbuf;
- }
- | '&'
- {
- Buffer.add_string tmp (entity lexbuf);
- pcdata lexbuf
- }
- | ""
- { Buffer.contents tmp }
+ | pcchar+
+ {
+ Buffer.add_string tmp (lexeme lexbuf);
+ pcdata lexbuf
+ }
+ | "&#"
+ {
+ Buffer.add_string tmp (lexeme lexbuf);
+ pcdata lexbuf;
+ }
+ | '&'
+ {
+ Buffer.add_string tmp (entity lexbuf);
+ pcdata lexbuf
+ }
+ | ""
+ { Buffer.contents tmp }
and entity = parse
- | entitychar+ ';'
- {
- let ident = lexeme lexbuf in
- try
- Hashtbl.find idents (String.lowercase ident)
- with
- Not_found -> "&" ^ ident
- }
- | _ | eof
- { raise (Error EUnterminatedEntity) }
+ | entitychar+ ';'
+ {
+ let ident = lexeme lexbuf in
+ try
+ Hashtbl.find idents (String.lowercase ident)
+ with
+ Not_found -> "&" ^ ident
+ }
+ | _ | eof
+ { raise (Error EUnterminatedEntity) }
and ident_name = parse
- | identchar+
- { lexeme lexbuf }
- | _ | eof
- { error lexbuf EIdentExpected }
+ | identchar+
+ { lexeme lexbuf }
+ | _ | eof
+ { error lexbuf EIdentExpected }
and close_tag = parse
- | '>'
- { () }
- | _ | eof
- { error lexbuf ECloseExpected }
+ | '>'
+ { () }
+ | _ | eof
+ { error lexbuf ECloseExpected }
and attributes = parse
- | '>'
- { [], false }
- | "/>"
- { [], true }
- | "" (* do not read a char ! *)
- {
- let key = attribute lexbuf in
- let data = attribute_data lexbuf in
- ignore_spaces lexbuf;
- let others, closed = attributes lexbuf in
- (key, data) :: others, closed
- }
+ | '>'
+ { [], false }
+ | "/>"
+ { [], true }
+ | "" (* do not read a char ! *)
+ {
+ let key = attribute lexbuf in
+ let data = attribute_data lexbuf in
+ ignore_spaces lexbuf;
+ let others, closed = attributes lexbuf in
+ (key, data) :: others, closed
+ }
and attribute = parse
- | identchar+
- { lexeme lexbuf }
- | _ | eof
- { error lexbuf EAttributeNameExpected }
+ | identchar+
+ { lexeme lexbuf }
+ | _ | eof
+ { error lexbuf EAttributeNameExpected }
and attribute_data = parse
- | space* '=' space* '"'
- {
- Buffer.reset tmp;
- last_pos := lexeme_end lexbuf;
- dq_string lexbuf
- }
- | space* '=' space* '\''
- {
- Buffer.reset tmp;
- last_pos := lexeme_end lexbuf;
- q_string lexbuf
- }
- | _ | eof
- { error lexbuf EAttributeValueExpected }
+ | space* '=' space* '"'
+ {
+ Buffer.reset tmp;
+ last_pos := lexeme_end lexbuf;
+ dq_string lexbuf
+ }
+ | space* '=' space* '\''
+ {
+ Buffer.reset tmp;
+ last_pos := lexeme_end lexbuf;
+ q_string lexbuf
+ }
+ | _ | eof
+ { error lexbuf EAttributeValueExpected }
and dq_string = parse
- | '"'
- { Buffer.contents tmp }
- | '\\' [ '"' '\\' ]
- {
- Buffer.add_char tmp (lexeme_char lexbuf 1);
- dq_string lexbuf
- }
- | eof
- { raise (Error EUnterminatedString) }
- | _
- {
- Buffer.add_char tmp (lexeme_char lexbuf 0);
- dq_string lexbuf
- }
+ | '"'
+ { Buffer.contents tmp }
+ | '\\' [ '"' '\\' ]
+ {
+ Buffer.add_char tmp (lexeme_char lexbuf 1);
+ dq_string lexbuf
+ }
+ | eof
+ { raise (Error EUnterminatedString) }
+ | _
+ {
+ Buffer.add_char tmp (lexeme_char lexbuf 0);
+ dq_string lexbuf
+ }
and q_string = parse
- | '\''
- { Buffer.contents tmp }
- | '\\' [ '\'' '\\' ]
- {
- Buffer.add_char tmp (lexeme_char lexbuf 1);
- q_string lexbuf
- }
- | eof
- { raise (Error EUnterminatedString) }
- | _
- {
- Buffer.add_char tmp (lexeme_char lexbuf 0);
- q_string lexbuf
- }
+ | '\''
+ { Buffer.contents tmp }
+ | '\\' [ '\'' '\\' ]
+ {
+ Buffer.add_char tmp (lexeme_char lexbuf 1);
+ q_string lexbuf
+ }
+ | eof
+ { raise (Error EUnterminatedString) }
+ | _
+ {
+ Buffer.add_char tmp (lexeme_char lexbuf 0);
+ q_string lexbuf
+ }