aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar ppedrot <ppedrot@85f007b7-540e-0410-9357-904b9bb8a0f7>2011-11-24 17:12:59 +0000
committerGravatar ppedrot <ppedrot@85f007b7-540e-0410-9357-904b9bb8a0f7>2011-11-24 17:12:59 +0000
commit2ed4b1e88e3e304c5146d74124d7057ac62c59a2 (patch)
tree766fd5553916b90d06eb253b72192942d80ca692
parentfe891a11536b64cd9be0ea9ad3e7de026031ae57 (diff)
Fixed the XML parser CDATA handling (and changed the EOL convention of these files which where Windows-like, whoever knows why).
git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/coq/trunk@14726 85f007b7-540e-0410-9357-904b9bb8a0f7
-rw-r--r--lib/xml_lexer.mli86
-rw-r--r--lib/xml_lexer.mll603
-rw-r--r--lib/xml_parser.ml434
-rw-r--r--lib/xml_parser.mli224
-rw-r--r--lib/xml_utils.ml458
-rw-r--r--lib/xml_utils.mli186
6 files changed, 1000 insertions, 991 deletions
diff --git a/lib/xml_lexer.mli b/lib/xml_lexer.mli
index 6edc414bf..a1ca05765 100644
--- a/lib/xml_lexer.mli
+++ b/lib/xml_lexer.mli
@@ -1,44 +1,44 @@
-(*
- * Xml Light, an small Xml parser/printer with DTD support.
- * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *)
-
-type error =
- | EUnterminatedComment
- | EUnterminatedString
- | EIdentExpected
- | ECloseExpected
- | ENodeExpected
- | EAttributeNameExpected
- | EAttributeValueExpected
- | EUnterminatedEntity
-
-exception Error of error
-
-type token =
- | Tag of string * (string * string) list * bool
- | PCData of string
- | Endtag of string
- | Eof
-
-type pos = int * int * int * int
-
-val init : Lexing.lexbuf -> unit
-val close : Lexing.lexbuf -> unit
-val token : Lexing.lexbuf -> token
-val pos : Lexing.lexbuf -> pos
+(*
+ * Xml Light, an small Xml parser/printer with DTD support.
+ * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *)
+
+type error =
+ | EUnterminatedComment
+ | EUnterminatedString
+ | EIdentExpected
+ | ECloseExpected
+ | ENodeExpected
+ | EAttributeNameExpected
+ | EAttributeValueExpected
+ | EUnterminatedEntity
+
+exception Error of error
+
+type token =
+ | Tag of string * (string * string) list * bool
+ | PCData of string
+ | Endtag of string
+ | Eof
+
+type pos = int * int * int * int
+
+val init : Lexing.lexbuf -> unit
+val close : Lexing.lexbuf -> unit
+val token : Lexing.lexbuf -> token
+val pos : Lexing.lexbuf -> pos
val restore : pos -> unit \ No newline at end of file
diff --git a/lib/xml_lexer.mll b/lib/xml_lexer.mll
index 3e5a47ed9..2be4bf98b 100644
--- a/lib/xml_lexer.mll
+++ b/lib/xml_lexer.mll
@@ -1,304 +1,299 @@
-{(*
- * Xml Light, an small Xml parser/printer with DTD support.
- * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *)
-
-open Lexing
-
-type error =
- | EUnterminatedComment
- | EUnterminatedString
- | EIdentExpected
- | ECloseExpected
- | ENodeExpected
- | EAttributeNameExpected
- | EAttributeValueExpected
- | EUnterminatedEntity
-
-exception Error of error
-
-type pos = int * int * int * int
-
-type token =
- | Tag of string * (string * string) list * bool
- | PCData of string
- | Endtag of string
- | Eof
-
-let last_pos = ref 0
-and current_line = ref 0
-and current_line_start = ref 0
-
-let tmp = Buffer.create 200
-
-let idents = Hashtbl.create 0
-
-let _ = begin
- Hashtbl.add idents "gt;" ">";
- Hashtbl.add idents "lt;" "<";
- Hashtbl.add idents "amp;" "&";
- Hashtbl.add idents "apos;" "'";
- Hashtbl.add idents "quot;" "\"";
-end
-
-let init lexbuf =
- current_line := 1;
- current_line_start := lexeme_start lexbuf;
- last_pos := !current_line_start
-
-let close lexbuf =
- Buffer.reset tmp
-
-let pos lexbuf =
- !current_line , !current_line_start ,
- !last_pos ,
- lexeme_start lexbuf
-
-let restore (cl,cls,lp,_) =
- current_line := cl;
- current_line_start := cls;
- last_pos := lp
-
-let newline lexbuf =
- incr current_line;
- last_pos := lexeme_end lexbuf;
- current_line_start := !last_pos
-
-let error lexbuf e =
- last_pos := lexeme_start lexbuf;
- raise (Error e)
-
-}
-
-let newline = ['\n']
-let break = ['\r']
-let space = [' ' '\t']
-let identchar = ['A'-'Z' 'a'-'z' '_' '0'-'9' ':' '-']
-let entitychar = ['A'-'Z' 'a'-'z']
-let pcchar = [^ '\r' '\n' '<' '>' '&']
-
-rule token = parse
- | newline
- {
- newline lexbuf;
- token lexbuf
- }
- | (space | break) +
- {
- last_pos := lexeme_end lexbuf;
- token lexbuf
- }
- | "<!--"
- {
- last_pos := lexeme_start lexbuf;
- comment lexbuf;
- token lexbuf
- }
- | "<?"
- {
- last_pos := lexeme_start lexbuf;
- header lexbuf;
- token lexbuf;
- }
- | '<' space* '/' space*
- {
- last_pos := lexeme_start lexbuf;
- let tag = ident_name lexbuf in
- ignore_spaces lexbuf;
- close_tag lexbuf;
- Endtag tag
- }
- | '<' space*
- {
- last_pos := lexeme_start lexbuf;
- let tag = ident_name lexbuf in
- ignore_spaces lexbuf;
- let attribs, closed = attributes lexbuf in
- Tag(tag, attribs, closed)
- }
- | "&#"
- {
- last_pos := lexeme_start lexbuf;
- Buffer.reset tmp;
- Buffer.add_string tmp (lexeme lexbuf);
- PCData (pcdata lexbuf)
- }
- | '&'
- {
- last_pos := lexeme_start lexbuf;
- Buffer.reset tmp;
- Buffer.add_string tmp (entity lexbuf);
- PCData (pcdata lexbuf)
- }
- | (space | newline | break)* pcchar+
- {
- last_pos := lexeme_start lexbuf;
- Buffer.reset tmp;
- Buffer.add_string tmp (lexeme lexbuf);
- PCData (pcdata lexbuf)
- }
- | eof { Eof }
- | _
- { error lexbuf ENodeExpected }
-
-and ignore_spaces = parse
- | newline
- {
- newline lexbuf;
- ignore_spaces lexbuf
- }
- | (space | break) +
- { ignore_spaces lexbuf }
- | ""
- { () }
-
-and comment = parse
- | newline
- {
- newline lexbuf;
- comment lexbuf
- }
- | "-->"
- { () }
- | eof
- { raise (Error EUnterminatedComment) }
- | _
- { comment lexbuf }
-
-and header = parse
- | newline
- {
- newline lexbuf;
- header lexbuf
- }
- | "?>"
- { () }
- | eof
- { error lexbuf ECloseExpected }
- | _
- { header lexbuf }
-
-and pcdata = parse
- | pcchar+
- {
- Buffer.add_string tmp (lexeme lexbuf);
- pcdata lexbuf
- }
- | "&#"
- {
- Buffer.add_string tmp (lexeme lexbuf);
- pcdata lexbuf;
- }
- | '&'
- {
- Buffer.add_string tmp (entity lexbuf);
- pcdata lexbuf
- }
- | ""
- { Buffer.contents tmp }
-
-and entity = parse
- | entitychar+ ';'
- {
- let ident = lexeme lexbuf in
- try
- Hashtbl.find idents (String.lowercase ident)
- with
- Not_found -> "&" ^ ident
- }
- | _ | eof
- { raise (Error EUnterminatedEntity) }
-
-and ident_name = parse
- | identchar+
- { lexeme lexbuf }
- | _ | eof
- { error lexbuf EIdentExpected }
-
-and close_tag = parse
- | '>'
- { () }
- | _ | eof
- { error lexbuf ECloseExpected }
-
-and attributes = parse
- | '>'
- { [], false }
- | "/>"
- { [], true }
- | "" (* do not read a char ! *)
- {
- let key = attribute lexbuf in
- let data = attribute_data lexbuf in
- ignore_spaces lexbuf;
- let others, closed = attributes lexbuf in
- (key, data) :: others, closed
- }
-
-and attribute = parse
- | identchar+
- { lexeme lexbuf }
- | _ | eof
- { error lexbuf EAttributeNameExpected }
-
-and attribute_data = parse
- | space* '=' space* '"'
- {
- Buffer.reset tmp;
- last_pos := lexeme_end lexbuf;
- dq_string lexbuf
- }
- | space* '=' space* '\''
- {
- Buffer.reset tmp;
- last_pos := lexeme_end lexbuf;
- q_string lexbuf
- }
- | _ | eof
- { error lexbuf EAttributeValueExpected }
-
-and dq_string = parse
- | '"'
- { Buffer.contents tmp }
- | '\\' [ '"' '\\' ]
- {
- Buffer.add_char tmp (lexeme_char lexbuf 1);
- dq_string lexbuf
- }
- | eof
- { raise (Error EUnterminatedString) }
- | _
- {
- Buffer.add_char tmp (lexeme_char lexbuf 0);
- dq_string lexbuf
- }
-
-and q_string = parse
- | '\''
- { Buffer.contents tmp }
- | '\\' [ '\'' '\\' ]
- {
- Buffer.add_char tmp (lexeme_char lexbuf 1);
- q_string lexbuf
- }
- | eof
- { raise (Error EUnterminatedString) }
- | _
- {
- Buffer.add_char tmp (lexeme_char lexbuf 0);
- q_string lexbuf
- }
+{(*
+ * Xml Light, an small Xml parser/printer with DTD support.
+ * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *)
+
+open Lexing
+
+type error =
+ | EUnterminatedComment
+ | EUnterminatedString
+ | EIdentExpected
+ | ECloseExpected
+ | ENodeExpected
+ | EAttributeNameExpected
+ | EAttributeValueExpected
+ | EUnterminatedEntity
+
+exception Error of error
+
+type pos = int * int * int * int
+
+type token =
+ | Tag of string * (string * string) list * bool
+ | PCData of string
+ | Endtag of string
+ | Eof
+
+let last_pos = ref 0
+and current_line = ref 0
+and current_line_start = ref 0
+
+let tmp = Buffer.create 200
+
+let idents = Hashtbl.create 0
+
+let _ = begin
+ Hashtbl.add idents "gt;" ">";
+ Hashtbl.add idents "lt;" "<";
+ Hashtbl.add idents "amp;" "&";
+ Hashtbl.add idents "apos;" "'";
+ Hashtbl.add idents "quot;" "\"";
+end
+
+let init lexbuf =
+ current_line := 1;
+ current_line_start := lexeme_start lexbuf;
+ last_pos := !current_line_start
+
+let close lexbuf =
+ Buffer.reset tmp
+
+let pos lexbuf =
+ !current_line , !current_line_start ,
+ !last_pos ,
+ lexeme_start lexbuf
+
+let restore (cl,cls,lp,_) =
+ current_line := cl;
+ current_line_start := cls;
+ last_pos := lp
+
+let newline lexbuf =
+ incr current_line;
+ last_pos := lexeme_end lexbuf;
+ current_line_start := !last_pos
+
+let error lexbuf e =
+ last_pos := lexeme_start lexbuf;
+ raise (Error e)
+
+}
+
+let newline = ['\n']
+let break = ['\r']
+let space = [' ' '\t']
+let identchar = ['A'-'Z' 'a'-'z' '_' '0'-'9' ':' '-']
+let entitychar = ['A'-'Z' 'a'-'z']
+let pcchar = [^ '\r' '\n' '<' '>' '&']
+
+rule token = parse
+ | newline | (newline break) | break
+ {
+ newline lexbuf;
+ PCData "\n"
+ }
+ | "<!--"
+ {
+ last_pos := lexeme_start lexbuf;
+ comment lexbuf;
+ token lexbuf
+ }
+ | "<?"
+ {
+ last_pos := lexeme_start lexbuf;
+ header lexbuf;
+ token lexbuf;
+ }
+ | '<' space* '/' space*
+ {
+ last_pos := lexeme_start lexbuf;
+ let tag = ident_name lexbuf in
+ ignore_spaces lexbuf;
+ close_tag lexbuf;
+ Endtag tag
+ }
+ | '<' space*
+ {
+ last_pos := lexeme_start lexbuf;
+ let tag = ident_name lexbuf in
+ ignore_spaces lexbuf;
+ let attribs, closed = attributes lexbuf in
+ Tag(tag, attribs, closed)
+ }
+ | "&#"
+ {
+ last_pos := lexeme_start lexbuf;
+ Buffer.reset tmp;
+ Buffer.add_string tmp (lexeme lexbuf);
+ PCData (pcdata lexbuf)
+ }
+ | '&'
+ {
+ last_pos := lexeme_start lexbuf;
+ Buffer.reset tmp;
+ Buffer.add_string tmp (entity lexbuf);
+ PCData (pcdata lexbuf)
+ }
+ | pcchar+
+ {
+ last_pos := lexeme_start lexbuf;
+ Buffer.reset tmp;
+ Buffer.add_string tmp (lexeme lexbuf);
+ PCData (pcdata lexbuf)
+ }
+ | eof { Eof }
+ | _
+ { error lexbuf ENodeExpected }
+
+and ignore_spaces = parse
+ | newline | (newline break) | break
+ {
+ newline lexbuf;
+ ignore_spaces lexbuf
+ }
+ | space +
+ { ignore_spaces lexbuf }
+ | ""
+ { () }
+
+and comment = parse
+ | newline | (newline break) | break
+ {
+ newline lexbuf;
+ comment lexbuf
+ }
+ | "-->"
+ { () }
+ | eof
+ { raise (Error EUnterminatedComment) }
+ | _
+ { comment lexbuf }
+
+and header = parse
+ | newline | (newline break) | break
+ {
+ newline lexbuf;
+ header lexbuf
+ }
+ | "?>"
+ { () }
+ | eof
+ { error lexbuf ECloseExpected }
+ | _
+ { header lexbuf }
+
+and pcdata = parse
+ | pcchar+
+ {
+ Buffer.add_string tmp (lexeme lexbuf);
+ pcdata lexbuf
+ }
+ | "&#"
+ {
+ Buffer.add_string tmp (lexeme lexbuf);
+ pcdata lexbuf;
+ }
+ | '&'
+ {
+ Buffer.add_string tmp (entity lexbuf);
+ pcdata lexbuf
+ }
+ | ""
+ { Buffer.contents tmp }
+
+and entity = parse
+ | entitychar+ ';'
+ {
+ let ident = lexeme lexbuf in
+ try
+ Hashtbl.find idents (String.lowercase ident)
+ with
+ Not_found -> "&" ^ ident
+ }
+ | _ | eof
+ { raise (Error EUnterminatedEntity) }
+
+and ident_name = parse
+ | identchar+
+ { lexeme lexbuf }
+ | _ | eof
+ { error lexbuf EIdentExpected }
+
+and close_tag = parse
+ | '>'
+ { () }
+ | _ | eof
+ { error lexbuf ECloseExpected }
+
+and attributes = parse
+ | '>'
+ { [], false }
+ | "/>"
+ { [], true }
+ | "" (* do not read a char ! *)
+ {
+ let key = attribute lexbuf in
+ let data = attribute_data lexbuf in
+ ignore_spaces lexbuf;
+ let others, closed = attributes lexbuf in
+ (key, data) :: others, closed
+ }
+
+and attribute = parse
+ | identchar+
+ { lexeme lexbuf }
+ | _ | eof
+ { error lexbuf EAttributeNameExpected }
+
+and attribute_data = parse
+ | space* '=' space* '"'
+ {
+ Buffer.reset tmp;
+ last_pos := lexeme_end lexbuf;
+ dq_string lexbuf
+ }
+ | space* '=' space* '\''
+ {
+ Buffer.reset tmp;
+ last_pos := lexeme_end lexbuf;
+ q_string lexbuf
+ }
+ | _ | eof
+ { error lexbuf EAttributeValueExpected }
+
+and dq_string = parse
+ | '"'
+ { Buffer.contents tmp }
+ | '\\' [ '"' '\\' ]
+ {
+ Buffer.add_char tmp (lexeme_char lexbuf 1);
+ dq_string lexbuf
+ }
+ | eof
+ { raise (Error EUnterminatedString) }
+ | _
+ {
+ Buffer.add_char tmp (lexeme_char lexbuf 0);
+ dq_string lexbuf
+ }
+
+and q_string = parse
+ | '\''
+ { Buffer.contents tmp }
+ | '\\' [ '\'' '\\' ]
+ {
+ Buffer.add_char tmp (lexeme_char lexbuf 1);
+ q_string lexbuf
+ }
+ | eof
+ { raise (Error EUnterminatedString) }
+ | _
+ {
+ Buffer.add_char tmp (lexeme_char lexbuf 0);
+ q_string lexbuf
+ }
diff --git a/lib/xml_parser.ml b/lib/xml_parser.ml
index 16d9922ae..0463c6e30 100644
--- a/lib/xml_parser.ml
+++ b/lib/xml_parser.ml
@@ -1,207 +1,227 @@
-(*
- * Xml Light, an small Xml parser/printer with DTD support.
- * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
- * Copyright (C) 2003 Jacques Garrigue
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *)
-
-open Printf
-
-type xml =
- | Element of (string * (string * string) list * xml list)
- | PCData of string
-
-type error_pos = {
- eline : int;
- eline_start : int;
- emin : int;
- emax : int;
-}
-
-type error_msg =
- | UnterminatedComment
- | UnterminatedString
- | UnterminatedEntity
- | IdentExpected
- | CloseExpected
- | NodeExpected
- | AttributeNameExpected
- | AttributeValueExpected
- | EndOfTagExpected of string
- | EOFExpected
-
-type error = error_msg * error_pos
-
-exception Error of error
-
-exception File_not_found of string
-
-type t = {
- mutable check_eof : bool;
- mutable concat_pcdata : bool;
-}
-
-type source =
- | SFile of string
- | SChannel of in_channel
- | SString of string
- | SLexbuf of Lexing.lexbuf
-
-type state = {
- source : Lexing.lexbuf;
- stack : Xml_lexer.token Stack.t;
- xparser : t;
-}
-
-exception Internal_error of error_msg
-exception NoMoreData
-
-let xml_error = ref (fun _ -> assert false)
-let file_not_found = ref (fun _ -> assert false)
-
-let _raises e f =
- xml_error := e;
- file_not_found := f
-
-let make () =
- {
- check_eof = true;
- concat_pcdata = true;
- }
-
-let check_eof p v = p.check_eof <- v
-let concat_pcdata p v = p.concat_pcdata <- v
-
-let pop s =
- try
- Stack.pop s.stack
- with
- Stack.Empty ->
- Xml_lexer.token s.source
-
-let push t s =
- Stack.push t s.stack
-
-let rec read_node s =
- match pop s with
- | Xml_lexer.PCData s -> PCData s
- | Xml_lexer.Tag (tag, attr, true) -> Element (tag, attr, [])
- | Xml_lexer.Tag (tag, attr, false) -> Element (tag, attr, read_elems ~tag s)
- | t ->
- push t s;
- raise NoMoreData
-and
- read_elems ?tag s =
- let elems = ref [] in
- (try
- while true do
- match s.xparser.concat_pcdata , read_node s , !elems with
- | true , PCData c , (PCData c2) :: q ->
- elems := PCData (sprintf "%s\n%s" c2 c) :: q
- | _ , x , l ->
- elems := x :: l
- done
- with
- NoMoreData -> ());
- match pop s with
- | Xml_lexer.Endtag s when Some s = tag -> List.rev !elems
- | Xml_lexer.Eof when tag = None -> List.rev !elems
- | t ->
- match tag with
- | None -> raise (Internal_error EOFExpected)
- | Some s -> raise (Internal_error (EndOfTagExpected s))
-
-let read_xml s = read_node s
-
-let convert = function
- | Xml_lexer.EUnterminatedComment -> UnterminatedComment
- | Xml_lexer.EUnterminatedString -> UnterminatedString
- | Xml_lexer.EIdentExpected -> IdentExpected
- | Xml_lexer.ECloseExpected -> CloseExpected
- | Xml_lexer.ENodeExpected -> NodeExpected
- | Xml_lexer.EAttributeNameExpected -> AttributeNameExpected
- | Xml_lexer.EAttributeValueExpected -> AttributeValueExpected
- | Xml_lexer.EUnterminatedEntity -> UnterminatedEntity
-
-let do_parse xparser source =
- try
- Xml_lexer.init source;
- let s = { source = source; xparser = xparser; stack = Stack.create(); } in
- let x = read_xml s in
- if xparser.check_eof && pop s <> Xml_lexer.Eof then raise (Internal_error EOFExpected);
- Xml_lexer.close source;
- x
- with
- | NoMoreData ->
- Xml_lexer.close source;
- raise (!xml_error NodeExpected source)
- | Internal_error e ->
- Xml_lexer.close source;
- raise (!xml_error e source)
- | Xml_lexer.Error e ->
- Xml_lexer.close source;
- raise (!xml_error (convert e) source)
-
-let parse p = function
- | SChannel ch -> do_parse p (Lexing.from_channel ch)
- | SString str -> do_parse p (Lexing.from_string str)
- | SLexbuf lex -> do_parse p lex
- | SFile fname ->
- let ch = (try open_in fname with Sys_error _ -> raise (!file_not_found fname)) in
- try
- let x = do_parse p (Lexing.from_channel ch) in
- close_in ch;
- x
- with
- e ->
- close_in ch;
- raise e
-
-
-let error_msg = function
- | UnterminatedComment -> "Unterminated comment"
- | UnterminatedString -> "Unterminated string"
- | UnterminatedEntity -> "Unterminated entity"
- | IdentExpected -> "Ident expected"
- | CloseExpected -> "Element close expected"
- | NodeExpected -> "Xml node expected"
- | AttributeNameExpected -> "Attribute name expected"
- | AttributeValueExpected -> "Attribute value expected"
- | EndOfTagExpected tag -> sprintf "End of tag expected : '%s'" tag
- | EOFExpected -> "End of file expected"
-
-let error (msg,pos) =
- if pos.emin = pos.emax then
- sprintf "%s line %d character %d" (error_msg msg) pos.eline (pos.emin - pos.eline_start)
- else
- sprintf "%s line %d characters %d-%d" (error_msg msg) pos.eline (pos.emin - pos.eline_start) (pos.emax - pos.eline_start)
-
-let line e = e.eline
-
-let range e =
- e.emin - e.eline_start , e.emax - e.eline_start
-
-let abs_range e =
- e.emin , e.emax
-
-let pos source =
- let line, lstart, min, max = Xml_lexer.pos source in
- {
- eline = line;
- eline_start = lstart;
- emin = min;
- emax = max;
- } \ No newline at end of file
+(*
+ * Xml Light, an small Xml parser/printer with DTD support.
+ * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
+ * Copyright (C) 2003 Jacques Garrigue
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *)
+
+open Printf
+
+type xml =
+ | Element of (string * (string * string) list * xml list)
+ | PCData of string
+
+type error_pos = {
+ eline : int;
+ eline_start : int;
+ emin : int;
+ emax : int;
+}
+
+type error_msg =
+ | UnterminatedComment
+ | UnterminatedString
+ | UnterminatedEntity
+ | IdentExpected
+ | CloseExpected
+ | NodeExpected
+ | AttributeNameExpected
+ | AttributeValueExpected
+ | EndOfTagExpected of string
+ | EOFExpected
+
+type error = error_msg * error_pos
+
+exception Error of error
+
+exception File_not_found of string
+
+type t = {
+ mutable check_eof : bool;
+ mutable concat_pcdata : bool;
+}
+
+type source =
+ | SFile of string
+ | SChannel of in_channel
+ | SString of string
+ | SLexbuf of Lexing.lexbuf
+
+type state = {
+ source : Lexing.lexbuf;
+ stack : Xml_lexer.token Stack.t;
+ xparser : t;
+}
+
+exception Internal_error of error_msg
+exception NoMoreData
+
+let xml_error = ref (fun _ -> assert false)
+let file_not_found = ref (fun _ -> assert false)
+
+let is_blank s =
+ let len = String.length s in
+ let break = ref true in
+ let i = ref 0 in
+ while !break && !i < len do
+ let c = s.[!i] in
+ (* no '\r' because we replaced them in the lexer *)
+ if c = ' ' || c = '\n' || c = '\t' then incr i
+ else break := false
+ done;
+ !i = len
+
+let _raises e f =
+ xml_error := e;
+ file_not_found := f
+
+let make () =
+ {
+ check_eof = true;
+ concat_pcdata = true;
+ }
+
+let check_eof p v = p.check_eof <- v
+let concat_pcdata p v = p.concat_pcdata <- v
+
+let pop s =
+ try
+ Stack.pop s.stack
+ with
+ Stack.Empty ->
+ Xml_lexer.token s.source
+
+let push t s =
+ Stack.push t s.stack
+
+let canonicalize l =
+ let has_elt = List.exists (function Element _ -> true | _ -> false) l in
+ if has_elt then List.filter (function PCData s -> not (is_blank s) | _ -> true) l
+ else l
+
+let rec read_node s =
+ match pop s with
+ | Xml_lexer.PCData s -> PCData s
+ | Xml_lexer.Tag (tag, attr, true) -> Element (tag, attr, [])
+ | Xml_lexer.Tag (tag, attr, false) ->
+ let elements = read_elems ~tag s in
+ Element (tag, attr, canonicalize elements)
+ | t ->
+ push t s;
+ raise NoMoreData
+and
+ read_elems ?tag s =
+ let elems = ref [] in
+ (try
+ while true do
+ let node = read_node s in
+ match node, !elems with
+ | PCData c , (PCData c2) :: q ->
+ elems := PCData (c2 ^ c) :: q
+ | _, l ->
+ elems := node :: l
+ done
+ with
+ NoMoreData -> ());
+ match pop s with
+ | Xml_lexer.Endtag s when Some s = tag -> List.rev !elems
+ | Xml_lexer.Eof when tag = None -> List.rev !elems
+ | t ->
+ match tag with
+ | None -> raise (Internal_error EOFExpected)
+ | Some s -> raise (Internal_error (EndOfTagExpected s))
+
+let read_xml s = read_node s
+
+let convert = function
+ | Xml_lexer.EUnterminatedComment -> UnterminatedComment
+ | Xml_lexer.EUnterminatedString -> UnterminatedString
+ | Xml_lexer.EIdentExpected -> IdentExpected
+ | Xml_lexer.ECloseExpected -> CloseExpected
+ | Xml_lexer.ENodeExpected -> NodeExpected
+ | Xml_lexer.EAttributeNameExpected -> AttributeNameExpected
+ | Xml_lexer.EAttributeValueExpected -> AttributeValueExpected
+ | Xml_lexer.EUnterminatedEntity -> UnterminatedEntity
+
+let do_parse xparser source =
+ try
+ Xml_lexer.init source;
+ let s = { source = source; xparser = xparser; stack = Stack.create(); } in
+ let x = read_xml s in
+ if xparser.check_eof && pop s <> Xml_lexer.Eof then raise (Internal_error EOFExpected);
+ Xml_lexer.close source;
+ x
+ with
+ | NoMoreData ->
+ Xml_lexer.close source;
+ raise (!xml_error NodeExpected source)
+ | Internal_error e ->
+ Xml_lexer.close source;
+ raise (!xml_error e source)
+ | Xml_lexer.Error e ->
+ Xml_lexer.close source;
+ raise (!xml_error (convert e) source)
+
+let parse p = function
+ | SChannel ch -> do_parse p (Lexing.from_channel ch)
+ | SString str -> do_parse p (Lexing.from_string str)
+ | SLexbuf lex -> do_parse p lex
+ | SFile fname ->
+ let ch = (try open_in fname with Sys_error _ -> raise (!file_not_found fname)) in
+ try
+ let x = do_parse p (Lexing.from_channel ch) in
+ close_in ch;
+ x
+ with
+ e ->
+ close_in ch;
+ raise e
+
+
+let error_msg = function
+ | UnterminatedComment -> "Unterminated comment"
+ | UnterminatedString -> "Unterminated string"
+ | UnterminatedEntity -> "Unterminated entity"
+ | IdentExpected -> "Ident expected"
+ | CloseExpected -> "Element close expected"
+ | NodeExpected -> "Xml node expected"
+ | AttributeNameExpected -> "Attribute name expected"
+ | AttributeValueExpected -> "Attribute value expected"
+ | EndOfTagExpected tag -> sprintf "End of tag expected : '%s'" tag
+ | EOFExpected -> "End of file expected"
+
+let error (msg,pos) =
+ if pos.emin = pos.emax then
+ sprintf "%s line %d character %d" (error_msg msg) pos.eline (pos.emin - pos.eline_start)
+ else
+ sprintf "%s line %d characters %d-%d" (error_msg msg) pos.eline (pos.emin - pos.eline_start) (pos.emax - pos.eline_start)
+
+let line e = e.eline
+
+let range e =
+ e.emin - e.eline_start , e.emax - e.eline_start
+
+let abs_range e =
+ e.emin , e.emax
+
+let pos source =
+ let line, lstart, min, max = Xml_lexer.pos source in
+ {
+ eline = line;
+ eline_start = lstart;
+ emin = min;
+ emax = max;
+ }
diff --git a/lib/xml_parser.mli b/lib/xml_parser.mli
index 34dec792d..7f501986b 100644
--- a/lib/xml_parser.mli
+++ b/lib/xml_parser.mli
@@ -1,115 +1,109 @@
-(*
- * Xml Light, an small Xml parser/printer with DTD support.
- * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *)
-
-(** Xml Light Parser
-
- While basic parsing functions can be used in the {!Xml} module, this module
- is providing a way to create, configure and run an Xml parser.
-
-*)
-
-
-(** An Xml node is either
- [Element (tag-name, attributes, children)] or [PCData text] *)
-type xml =
- | Element of (string * (string * string) list * xml list)
- | PCData of string
-
-(** Abstract type for an Xml parser. *)
-type t
-
-(** {6:exc Xml Exceptions} *)
-
-(** Several exceptions can be raised when parsing an Xml document : {ul
- {li {!Xml.Error} is raised when an xml parsing error occurs. the
- {!Xml.error_msg} tells you which error occured during parsing
- and the {!Xml.error_pos} can be used to retreive the document
- location where the error occured at.}
- {li {!Xml.File_not_found} is raised when and error occured while
- opening a file with the {!Xml.parse_file} function.
- }
- *)
-
-type error_pos
-
-type error_msg =
- | UnterminatedComment
- | UnterminatedString
- | UnterminatedEntity
- | IdentExpected
- | CloseExpected
- | NodeExpected
- | AttributeNameExpected
- | AttributeValueExpected
- | EndOfTagExpected of string
- | EOFExpected
-
-type error = error_msg * error_pos
-
-exception Error of error
-
-exception File_not_found of string
-
-(** Get a full error message from an Xml error. *)
-val error : error -> string
-
-(** Get the Xml error message as a string. *)
-val error_msg : error_msg -> string
-
-(** Get the line the error occured at. *)
-val line : error_pos -> int
-
-(** Get the relative character range (in current line) the error occured at.*)
-val range : error_pos -> int * int
-
-(** Get the absolute character range the error occured at. *)
-val abs_range : error_pos -> int * int
-
-val pos : Lexing.lexbuf -> error_pos
-
-(** Several kind of resources can contain Xml documents. *)
-type source =
- | SFile of string
- | SChannel of in_channel
- | SString of string
- | SLexbuf of Lexing.lexbuf
-
-(** This function returns a new parser with default options. *)
-val make : unit -> t
-
-(** When a Xml document is parsed, the parser will check that the end of the
- document is reached, so for example parsing ["<A/><B/>"] will fail instead
- of returning only the A element. You can turn off this check by setting
- [check_eof] to [false] {i (by default, check_eof is true)}. *)
-val check_eof : t -> bool -> unit
-
-(** Once the parser is configurated, you can run the parser on a any kind
- of xml document source to parse its contents into an Xml data structure. *)
-val parse : t -> source -> xml
-
-(** When several PCData elements are separed by a \n (or \r\n), you can
- either split the PCData in two distincts PCData or merge them with \n
- as seperator into one PCData. The default behavior is to concat the
- PCData, but this can be changed for a given parser with this flag. *)
-val concat_pcdata : t -> bool -> unit
-
-(**/**)
-
-(* internal usage only... *)
-val _raises : (error_msg -> Lexing.lexbuf -> exn) -> (string -> exn) -> unit
+(*
+ * Xml Light, an small Xml parser/printer with DTD support.
+ * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *)
+
+(** Xml Light Parser
+
+ While basic parsing functions can be used in the {!Xml} module, this module
+ is providing a way to create, configure and run an Xml parser.
+
+*)
+
+
+(** An Xml node is either
+ [Element (tag-name, attributes, children)] or [PCData text] *)
+type xml =
+ | Element of (string * (string * string) list * xml list)
+ | PCData of string
+
+(** Abstract type for an Xml parser. *)
+type t
+
+(** {6:exc Xml Exceptions} *)
+
+(** Several exceptions can be raised when parsing an Xml document : {ul
+ {li {!Xml.Error} is raised when an xml parsing error occurs. the
+ {!Xml.error_msg} tells you which error occured during parsing
+ and the {!Xml.error_pos} can be used to retreive the document
+ location where the error occured at.}
+ {li {!Xml.File_not_found} is raised when and error occured while
+ opening a file with the {!Xml.parse_file} function.
+ }
+ *)
+
+type error_pos
+
+type error_msg =
+ | UnterminatedComment
+ | UnterminatedString
+ | UnterminatedEntity
+ | IdentExpected
+ | CloseExpected
+ | NodeExpected
+ | AttributeNameExpected
+ | AttributeValueExpected
+ | EndOfTagExpected of string
+ | EOFExpected
+
+type error = error_msg * error_pos
+
+exception Error of error
+
+exception File_not_found of string
+
+(** Get a full error message from an Xml error. *)
+val error : error -> string
+
+(** Get the Xml error message as a string. *)
+val error_msg : error_msg -> string
+
+(** Get the line the error occured at. *)
+val line : error_pos -> int
+
+(** Get the relative character range (in current line) the error occured at.*)
+val range : error_pos -> int * int
+
+(** Get the absolute character range the error occured at. *)
+val abs_range : error_pos -> int * int
+
+val pos : Lexing.lexbuf -> error_pos
+
+(** Several kind of resources can contain Xml documents. *)
+type source =
+ | SFile of string
+ | SChannel of in_channel
+ | SString of string
+ | SLexbuf of Lexing.lexbuf
+
+(** This function returns a new parser with default options. *)
+val make : unit -> t
+
+(** When a Xml document is parsed, the parser will check that the end of the
+ document is reached, so for example parsing ["<A/><B/>"] will fail instead
+ of returning only the A element. You can turn off this check by setting
+ [check_eof] to [false] {i (by default, check_eof is true)}. *)
+val check_eof : t -> bool -> unit
+
+(** Once the parser is configurated, you can run the parser on a any kind
+ of xml document source to parse its contents into an Xml data structure. *)
+val parse : t -> source -> xml
+
+(**/**)
+
+(* internal usage only... *)
+val _raises : (error_msg -> Lexing.lexbuf -> exn) -> (string -> exn) -> unit
diff --git a/lib/xml_utils.ml b/lib/xml_utils.ml
index 0a73cec09..2fc1b9065 100644
--- a/lib/xml_utils.ml
+++ b/lib/xml_utils.ml
@@ -1,229 +1,229 @@
-(*
- * Xml Light, an small Xml parser/printer with DTD support.
- * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *)
-
-open Printf
-open Xml_parser
-
-exception Not_element of xml
-exception Not_pcdata of xml
-exception No_attribute of string
-
-let default_parser = Xml_parser.make()
-
-let parse (p:Xml_parser.t) (source:Xml_parser.source) =
- (* local cast Xml.xml -> xml *)
- (Obj.magic Xml_parser.parse p source : xml)
-
-let parse_in ch = parse default_parser (Xml_parser.SChannel ch)
-let parse_string str = parse default_parser (Xml_parser.SString str)
-
-let parse_file f = parse default_parser (Xml_parser.SFile f)
-
-let tag = function
- | Element (tag,_,_) -> tag
- | x -> raise (Not_element x)
-
-let pcdata = function
- | PCData text -> text
- | x -> raise (Not_pcdata x)
-
-let attribs = function
- | Element (_,attr,_) -> attr
- | x -> raise (Not_element x)
-
-let attrib x att =
- match x with
- | Element (_,attr,_) ->
- (try
- let att = String.lowercase att in
- snd (List.find (fun (n,_) -> String.lowercase n = att) attr)
- with
- Not_found ->
- raise (No_attribute att))
- | x ->
- raise (Not_element x)
-
-let children = function
- | Element (_,_,clist) -> clist
- | x -> raise (Not_element x)
-
-(*let enum = function
- | Element (_,_,clist) -> List.to_enum clist
- | x -> raise (Not_element x)
-*)
-
-let iter f = function
- | Element (_,_,clist) -> List.iter f clist
- | x -> raise (Not_element x)
-
-let map f = function
- | Element (_,_,clist) -> List.map f clist
- | x -> raise (Not_element x)
-
-let fold f v = function
- | Element (_,_,clist) -> List.fold_left f v clist
- | x -> raise (Not_element x)
-
-let tmp = Buffer.create 200
-
-let buffer_pcdata text =
- let l = String.length text in
- for p = 0 to l-1 do
- match text.[p] with
- | '>' -> Buffer.add_string tmp "&gt;"
- | '<' -> Buffer.add_string tmp "&lt;"
- | '&' ->
- if p < l-1 && text.[p+1] = '#' then
- Buffer.add_char tmp '&'
- else
- Buffer.add_string tmp "&amp;"
- | '\'' -> Buffer.add_string tmp "&apos;"
- | '"' -> Buffer.add_string tmp "&quot;"
- | c -> Buffer.add_char tmp c
- done
-
-let print_pcdata chan text =
- let l = String.length text in
- for p = 0 to l-1 do
- match text.[p] with
- | '>' -> Printf.fprintf chan "&gt;"
- | '<' -> Printf.fprintf chan "&lt;"
- | '&' ->
- if p < l-1 && text.[p+1] = '#' then
- Printf.fprintf chan "&"
- else
- Printf.fprintf chan "&amp;"
- | '\'' -> Printf.fprintf chan "&apos;"
- | '"' -> Printf.fprintf chan "&quot;"
- | c -> Printf.fprintf chan "%c" c
- done
-
-let buffer_attr (n,v) =
- Buffer.add_char tmp ' ';
- Buffer.add_string tmp n;
- Buffer.add_string tmp "=\"";
- let l = String.length v in
- for p = 0 to l-1 do
- match v.[p] with
- | '\\' -> Buffer.add_string tmp "\\\\"
- | '"' -> Buffer.add_string tmp "\\\""
- | c -> Buffer.add_char tmp c
- done;
- Buffer.add_char tmp '"'
-
-let rec print_attr chan (n, v) =
- Printf.fprintf chan " %s=\"" n;
- let l = String.length v in
- for p = 0 to l-1 do
- match v.[p] with
- | '\\' -> Printf.fprintf chan "\\\\"
- | '"' -> Printf.fprintf chan "\\\""
- | c -> Printf.fprintf chan "%c" c
- done;
- Printf.fprintf chan "\""
-
-let print_attrs chan l = List.iter (print_attr chan) l
-
-let rec print_xml chan = function
-| Element (tag, alist, []) ->
- Printf.fprintf chan "<%s%a/>" tag print_attrs alist;
-| Element (tag, alist, l) ->
- Printf.fprintf chan "<%s%a>%a</%s>" tag print_attrs alist
- (fun chan -> List.iter (print_xml chan)) l tag
-| PCData text ->
- print_pcdata chan text
-
-let to_string x =
- let pcdata = ref false in
- let rec loop = function
- | Element (tag,alist,[]) ->
- Buffer.add_char tmp '<';
- Buffer.add_string tmp tag;
- List.iter buffer_attr alist;
- Buffer.add_string tmp "/>";
- pcdata := false;
- | Element (tag,alist,l) ->
- Buffer.add_char tmp '<';
- Buffer.add_string tmp tag;
- List.iter buffer_attr alist;
- Buffer.add_char tmp '>';
- pcdata := false;
- List.iter loop l;
- Buffer.add_string tmp "</";
- Buffer.add_string tmp tag;
- Buffer.add_char tmp '>';
- pcdata := false;
- | PCData text ->
- if !pcdata then Buffer.add_char tmp ' ';
- buffer_pcdata text;
- pcdata := true;
- in
- Buffer.reset tmp;
- loop x;
- let s = Buffer.contents tmp in
- Buffer.reset tmp;
- s
-
-let to_string_fmt x =
- let rec loop ?(newl=false) tab = function
- | Element (tag,alist,[]) ->
- Buffer.add_string tmp tab;
- Buffer.add_char tmp '<';
- Buffer.add_string tmp tag;
- List.iter buffer_attr alist;
- Buffer.add_string tmp "/>";
- if newl then Buffer.add_char tmp '\n';
- | Element (tag,alist,[PCData text]) ->
- Buffer.add_string tmp tab;
- Buffer.add_char tmp '<';
- Buffer.add_string tmp tag;
- List.iter buffer_attr alist;
- Buffer.add_string tmp ">";
- buffer_pcdata text;
- Buffer.add_string tmp "</";
- Buffer.add_string tmp tag;
- Buffer.add_char tmp '>';
- if newl then Buffer.add_char tmp '\n';
- | Element (tag,alist,l) ->
- Buffer.add_string tmp tab;
- Buffer.add_char tmp '<';
- Buffer.add_string tmp tag;
- List.iter buffer_attr alist;
- Buffer.add_string tmp ">\n";
- List.iter (loop ~newl:true (tab^" ")) l;
- Buffer.add_string tmp tab;
- Buffer.add_string tmp "</";
- Buffer.add_string tmp tag;
- Buffer.add_char tmp '>';
- if newl then Buffer.add_char tmp '\n';
- | PCData text ->
- buffer_pcdata text;
- if newl then Buffer.add_char tmp '\n';
- in
- Buffer.reset tmp;
- loop "" x;
- let s = Buffer.contents tmp in
- Buffer.reset tmp;
- s
-
-;;
-Xml_parser._raises (fun x p ->
- (* local cast : Xml.error_msg -> error_msg *)
- Error (x, pos p))
- (fun f -> File_not_found f);;
+(*
+ * Xml Light, an small Xml parser/printer with DTD support.
+ * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *)
+
+open Printf
+open Xml_parser
+
+exception Not_element of xml
+exception Not_pcdata of xml
+exception No_attribute of string
+
+let default_parser = Xml_parser.make()
+
+let parse (p:Xml_parser.t) (source:Xml_parser.source) =
+ (* local cast Xml.xml -> xml *)
+ (Obj.magic Xml_parser.parse p source : xml)
+
+let parse_in ch = parse default_parser (Xml_parser.SChannel ch)
+let parse_string str = parse default_parser (Xml_parser.SString str)
+
+let parse_file f = parse default_parser (Xml_parser.SFile f)
+
+let tag = function
+ | Element (tag,_,_) -> tag
+ | x -> raise (Not_element x)
+
+let pcdata = function
+ | PCData text -> text
+ | x -> raise (Not_pcdata x)
+
+let attribs = function
+ | Element (_,attr,_) -> attr
+ | x -> raise (Not_element x)
+
+let attrib x att =
+ match x with
+ | Element (_,attr,_) ->
+ (try
+ let att = String.lowercase att in
+ snd (List.find (fun (n,_) -> String.lowercase n = att) attr)
+ with
+ Not_found ->
+ raise (No_attribute att))
+ | x ->
+ raise (Not_element x)
+
+let children = function
+ | Element (_,_,clist) -> clist
+ | x -> raise (Not_element x)
+
+(*let enum = function
+ | Element (_,_,clist) -> List.to_enum clist
+ | x -> raise (Not_element x)
+*)
+
+let iter f = function
+ | Element (_,_,clist) -> List.iter f clist
+ | x -> raise (Not_element x)
+
+let map f = function
+ | Element (_,_,clist) -> List.map f clist
+ | x -> raise (Not_element x)
+
+let fold f v = function
+ | Element (_,_,clist) -> List.fold_left f v clist
+ | x -> raise (Not_element x)
+
+let tmp = Buffer.create 200
+
+let buffer_pcdata text =
+ let l = String.length text in
+ for p = 0 to l-1 do
+ match text.[p] with
+ | '>' -> Buffer.add_string tmp "&gt;"
+ | '<' -> Buffer.add_string tmp "&lt;"
+ | '&' ->
+ if p < l-1 && text.[p+1] = '#' then
+ Buffer.add_char tmp '&'
+ else
+ Buffer.add_string tmp "&amp;"
+ | '\'' -> Buffer.add_string tmp "&apos;"
+ | '"' -> Buffer.add_string tmp "&quot;"
+ | c -> Buffer.add_char tmp c
+ done
+
+let print_pcdata chan text =
+ let l = String.length text in
+ for p = 0 to l-1 do
+ match text.[p] with
+ | '>' -> Printf.fprintf chan "&gt;"
+ | '<' -> Printf.fprintf chan "&lt;"
+ | '&' ->
+ if p < l-1 && text.[p+1] = '#' then
+ Printf.fprintf chan "&"
+ else
+ Printf.fprintf chan "&amp;"
+ | '\'' -> Printf.fprintf chan "&apos;"
+ | '"' -> Printf.fprintf chan "&quot;"
+ | c -> Printf.fprintf chan "%c" c
+ done
+
+let buffer_attr (n,v) =
+ Buffer.add_char tmp ' ';
+ Buffer.add_string tmp n;
+ Buffer.add_string tmp "=\"";
+ let l = String.length v in
+ for p = 0 to l-1 do
+ match v.[p] with
+ | '\\' -> Buffer.add_string tmp "\\\\"
+ | '"' -> Buffer.add_string tmp "\\\""
+ | c -> Buffer.add_char tmp c
+ done;
+ Buffer.add_char tmp '"'
+
+let rec print_attr chan (n, v) =
+ Printf.fprintf chan " %s=\"" n;
+ let l = String.length v in
+ for p = 0 to l-1 do
+ match v.[p] with
+ | '\\' -> Printf.fprintf chan "\\\\"
+ | '"' -> Printf.fprintf chan "\\\""
+ | c -> Printf.fprintf chan "%c" c
+ done;
+ Printf.fprintf chan "\""
+
+let print_attrs chan l = List.iter (print_attr chan) l
+
+let rec print_xml chan = function
+| Element (tag, alist, []) ->
+ Printf.fprintf chan "<%s%a/>" tag print_attrs alist;
+| Element (tag, alist, l) ->
+ Printf.fprintf chan "<%s%a>%a</%s>" tag print_attrs alist
+ (fun chan -> List.iter (print_xml chan)) l tag
+| PCData text ->
+ print_pcdata chan text
+
+let to_string x =
+ let pcdata = ref false in
+ let rec loop = function
+ | Element (tag,alist,[]) ->
+ Buffer.add_char tmp '<';
+ Buffer.add_string tmp tag;
+ List.iter buffer_attr alist;
+ Buffer.add_string tmp "/>";
+ pcdata := false;
+ | Element (tag,alist,l) ->
+ Buffer.add_char tmp '<';
+ Buffer.add_string tmp tag;
+ List.iter buffer_attr alist;
+ Buffer.add_char tmp '>';
+ pcdata := false;
+ List.iter loop l;
+ Buffer.add_string tmp "</";
+ Buffer.add_string tmp tag;
+ Buffer.add_char tmp '>';
+ pcdata := false;
+ | PCData text ->
+ if !pcdata then Buffer.add_char tmp ' ';
+ buffer_pcdata text;
+ pcdata := true;
+ in
+ Buffer.reset tmp;
+ loop x;
+ let s = Buffer.contents tmp in
+ Buffer.reset tmp;
+ s
+
+let to_string_fmt x =
+ let rec loop ?(newl=false) tab = function
+ | Element (tag,alist,[]) ->
+ Buffer.add_string tmp tab;
+ Buffer.add_char tmp '<';
+ Buffer.add_string tmp tag;
+ List.iter buffer_attr alist;
+ Buffer.add_string tmp "/>";
+ if newl then Buffer.add_char tmp '\n';
+ | Element (tag,alist,[PCData text]) ->
+ Buffer.add_string tmp tab;
+ Buffer.add_char tmp '<';
+ Buffer.add_string tmp tag;
+ List.iter buffer_attr alist;
+ Buffer.add_string tmp ">";
+ buffer_pcdata text;
+ Buffer.add_string tmp "</";
+ Buffer.add_string tmp tag;
+ Buffer.add_char tmp '>';
+ if newl then Buffer.add_char tmp '\n';
+ | Element (tag,alist,l) ->
+ Buffer.add_string tmp tab;
+ Buffer.add_char tmp '<';
+ Buffer.add_string tmp tag;
+ List.iter buffer_attr alist;
+ Buffer.add_string tmp ">\n";
+ List.iter (loop ~newl:true (tab^" ")) l;
+ Buffer.add_string tmp tab;
+ Buffer.add_string tmp "</";
+ Buffer.add_string tmp tag;
+ Buffer.add_char tmp '>';
+ if newl then Buffer.add_char tmp '\n';
+ | PCData text ->
+ buffer_pcdata text;
+ if newl then Buffer.add_char tmp '\n';
+ in
+ Buffer.reset tmp;
+ loop "" x;
+ let s = Buffer.contents tmp in
+ Buffer.reset tmp;
+ s
+
+;;
+Xml_parser._raises (fun x p ->
+ (* local cast : Xml.error_msg -> error_msg *)
+ Error (x, pos p))
+ (fun f -> File_not_found f);;
diff --git a/lib/xml_utils.mli b/lib/xml_utils.mli
index 606e3182d..4a4a1309b 100644
--- a/lib/xml_utils.mli
+++ b/lib/xml_utils.mli
@@ -1,93 +1,93 @@
-(*
- * Xml Light, an small Xml parser/printer with DTD support.
- * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *)
-
-(** Xml Light
-
- Xml Light is a minimal Xml parser & printer for OCaml.
- It provide few functions to parse a basic Xml document into
- an OCaml data structure and to print back the data structures
- to an Xml document.
-
- Xml Light has also support for {b DTD} (Document Type Definition).
-
- {i (c)Copyright 2002-2003 Nicolas Cannasse}
-*)
-
-open Xml_parser
-
-(** {6 Xml Functions} *)
-
-exception Not_element of xml
-exception Not_pcdata of xml
-exception No_attribute of string
-
-(** [tag xdata] returns the tag value of the xml node.
- Raise {!Xml.Not_element} if the xml is not an element *)
-val tag : xml -> string
-
-(** [pcdata xdata] returns the PCData value of the xml node.
- Raise {!Xml.Not_pcdata} if the xml is not a PCData *)
-val pcdata : xml -> string
-
-(** [attribs xdata] returns the attribute list of the xml node.
- First string if the attribute name, second string is attribute value.
- Raise {!Xml.Not_element} if the xml is not an element *)
-val attribs : xml -> (string * string) list
-
-(** [attrib xdata "href"] returns the value of the ["href"]
- attribute of the xml node (attribute matching is case-insensitive).
- Raise {!Xml.No_attribute} if the attribute does not exists in the node's
- attribute list
- Raise {!Xml.Not_element} if the xml is not an element *)
-val attrib : xml -> string -> string
-
-(** [children xdata] returns the children list of the xml node
- Raise {!Xml.Not_element} if the xml is not an element *)
-val children : xml -> xml list
-
-(*** [enum xdata] returns the children enumeration of the xml node
- Raise {!Xml.Not_element} if the xml is not an element *)
-(* val enum : xml -> xml Enum.t *)
-
-(** [iter f xdata] calls f on all children of the xml node.
- Raise {!Xml.Not_element} if the xml is not an element *)
-val iter : (xml -> unit) -> xml -> unit
-
-(** [map f xdata] is equivalent to [List.map f (Xml.children xdata)]
- Raise {!Xml.Not_element} if the xml is not an element *)
-val map : (xml -> 'a) -> xml -> 'a list
-
-(** [fold f init xdata] is equivalent to
- [List.fold_left f init (Xml.children xdata)]
- Raise {!Xml.Not_element} if the xml is not an element *)
-val fold : ('a -> xml -> 'a) -> 'a -> xml -> 'a
-
-(** {6 Xml Printing} *)
-
-(** Print the xml data structure to a channel into a compact xml string (without
- any user-readable formating ). *)
-val print_xml : out_channel -> xml -> unit
-
-(** Print the xml data structure into a compact xml string (without
- any user-readable formating ). *)
-val to_string : xml -> string
-
-(** Print the xml data structure into an user-readable string with
- tabs and lines break between different nodes. *)
-val to_string_fmt : xml -> string
+(*
+ * Xml Light, an small Xml parser/printer with DTD support.
+ * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *)
+
+(** Xml Light
+
+ Xml Light is a minimal Xml parser & printer for OCaml.
+ It provide few functions to parse a basic Xml document into
+ an OCaml data structure and to print back the data structures
+ to an Xml document.
+
+ Xml Light has also support for {b DTD} (Document Type Definition).
+
+ {i (c)Copyright 2002-2003 Nicolas Cannasse}
+*)
+
+open Xml_parser
+
+(** {6 Xml Functions} *)
+
+exception Not_element of xml
+exception Not_pcdata of xml
+exception No_attribute of string
+
+(** [tag xdata] returns the tag value of the xml node.
+ Raise {!Xml.Not_element} if the xml is not an element *)
+val tag : xml -> string
+
+(** [pcdata xdata] returns the PCData value of the xml node.
+ Raise {!Xml.Not_pcdata} if the xml is not a PCData *)
+val pcdata : xml -> string
+
+(** [attribs xdata] returns the attribute list of the xml node.
+ First string if the attribute name, second string is attribute value.
+ Raise {!Xml.Not_element} if the xml is not an element *)
+val attribs : xml -> (string * string) list
+
+(** [attrib xdata "href"] returns the value of the ["href"]
+ attribute of the xml node (attribute matching is case-insensitive).
+ Raise {!Xml.No_attribute} if the attribute does not exists in the node's
+ attribute list
+ Raise {!Xml.Not_element} if the xml is not an element *)
+val attrib : xml -> string -> string
+
+(** [children xdata] returns the children list of the xml node
+ Raise {!Xml.Not_element} if the xml is not an element *)
+val children : xml -> xml list
+
+(*** [enum xdata] returns the children enumeration of the xml node
+ Raise {!Xml.Not_element} if the xml is not an element *)
+(* val enum : xml -> xml Enum.t *)
+
+(** [iter f xdata] calls f on all children of the xml node.
+ Raise {!Xml.Not_element} if the xml is not an element *)
+val iter : (xml -> unit) -> xml -> unit
+
+(** [map f xdata] is equivalent to [List.map f (Xml.children xdata)]
+ Raise {!Xml.Not_element} if the xml is not an element *)
+val map : (xml -> 'a) -> xml -> 'a list
+
+(** [fold f init xdata] is equivalent to
+ [List.fold_left f init (Xml.children xdata)]
+ Raise {!Xml.Not_element} if the xml is not an element *)
+val fold : ('a -> xml -> 'a) -> 'a -> xml -> 'a
+
+(** {6 Xml Printing} *)
+
+(** Print the xml data structure to a channel into a compact xml string (without
+ any user-readable formating ). *)
+val print_xml : out_channel -> xml -> unit
+
+(** Print the xml data structure into a compact xml string (without
+ any user-readable formating ). *)
+val to_string : xml -> string
+
+(** Print the xml data structure into an user-readable string with
+ tabs and lines break between different nodes. *)
+val to_string_fmt : xml -> string