summaryrefslogtreecommitdiff
path: root/lib/xml_parser.ml
diff options
context:
space:
mode:
Diffstat (limited to 'lib/xml_parser.ml')
-rw-r--r--lib/xml_parser.ml238
1 files changed, 238 insertions, 0 deletions
diff --git a/lib/xml_parser.ml b/lib/xml_parser.ml
new file mode 100644
index 00000000..bf931d75
--- /dev/null
+++ b/lib/xml_parser.ml
@@ -0,0 +1,238 @@
+(*
+ * Xml Light, an small Xml parser/printer with DTD support.
+ * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
+ * Copyright (C) 2003 Jacques Garrigue
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *)
+
+open Printf
+
+type xml =
+ | Element of (string * (string * string) list * xml list)
+ | PCData of string
+
+type error_pos = {
+ eline : int;
+ eline_start : int;
+ emin : int;
+ emax : int;
+}
+
+type error_msg =
+ | UnterminatedComment
+ | UnterminatedString
+ | UnterminatedEntity
+ | IdentExpected
+ | CloseExpected
+ | NodeExpected
+ | AttributeNameExpected
+ | AttributeValueExpected
+ | EndOfTagExpected of string
+ | EOFExpected
+
+type error = error_msg * error_pos
+
+exception Error of error
+
+exception File_not_found of string
+
+type t = {
+ mutable check_eof : bool;
+ mutable concat_pcdata : bool;
+}
+
+type source =
+ | SFile of string
+ | SChannel of in_channel
+ | SString of string
+ | SLexbuf of Lexing.lexbuf
+
+type state = {
+ source : Lexing.lexbuf;
+ stack : Xml_lexer.token Stack.t;
+ xparser : t;
+}
+
+exception Internal_error of error_msg
+exception NoMoreData
+
+let xml_error = ref (fun _ -> assert false)
+let file_not_found = ref (fun _ -> assert false)
+
+let is_blank s =
+ let len = String.length s in
+ let break = ref true in
+ let i = ref 0 in
+ while !break && !i < len do
+ let c = s.[!i] in
+ (* no '\r' because we replaced them in the lexer *)
+ if c = ' ' || c = '\n' || c = '\t' then incr i
+ else break := false
+ done;
+ !i = len
+
+let _raises e f =
+ xml_error := e;
+ file_not_found := f
+
+let make () =
+ {
+ check_eof = true;
+ concat_pcdata = true;
+ }
+
+let check_eof p v = p.check_eof <- v
+let concat_pcdata p v = p.concat_pcdata <- v
+
+let pop s =
+ try
+ Stack.pop s.stack
+ with
+ Stack.Empty ->
+ Xml_lexer.token s.source
+
+let push t s =
+ Stack.push t s.stack
+
+let canonicalize l =
+ let has_elt = List.exists (function Element _ -> true | _ -> false) l in
+ if has_elt then List.filter (function PCData s -> not (is_blank s) | _ -> true) l
+ else l
+
+let rec read_node s =
+ match pop s with
+ | Xml_lexer.PCData s -> PCData s
+ | Xml_lexer.Tag (tag, attr, true) -> Element (tag, attr, [])
+ | Xml_lexer.Tag (tag, attr, false) ->
+ let elements = read_elems ~tag s in
+ Element (tag, attr, canonicalize elements)
+ | t ->
+ push t s;
+ raise NoMoreData
+and
+ read_elems ?tag s =
+ let elems = ref [] in
+ (try
+ while true do
+ let node = read_node s in
+ match node, !elems with
+ | PCData c , (PCData c2) :: q ->
+ elems := PCData (c2 ^ c) :: q
+ | _, l ->
+ elems := node :: l
+ done
+ with
+ NoMoreData -> ());
+ match pop s with
+ | Xml_lexer.Endtag s when Some s = tag -> List.rev !elems
+ | Xml_lexer.Eof when tag = None -> List.rev !elems
+ | t ->
+ match tag with
+ | None -> raise (Internal_error EOFExpected)
+ | Some s -> raise (Internal_error (EndOfTagExpected s))
+
+let rec read_xml s =
+ let node = read_node s in
+ match node with
+ | Element _ -> node
+ | PCData c ->
+ if is_blank c then read_xml s
+ else raise (Xml_lexer.Error Xml_lexer.ENodeExpected)
+
+let convert = function
+ | Xml_lexer.EUnterminatedComment -> UnterminatedComment
+ | Xml_lexer.EUnterminatedString -> UnterminatedString
+ | Xml_lexer.EIdentExpected -> IdentExpected
+ | Xml_lexer.ECloseExpected -> CloseExpected
+ | Xml_lexer.ENodeExpected -> NodeExpected
+ | Xml_lexer.EAttributeNameExpected -> AttributeNameExpected
+ | Xml_lexer.EAttributeValueExpected -> AttributeValueExpected
+ | Xml_lexer.EUnterminatedEntity -> UnterminatedEntity
+
+let do_parse xparser source =
+ try
+ Xml_lexer.init source;
+ let s = { source = source; xparser = xparser; stack = Stack.create(); } in
+ let x = read_xml s in
+ if xparser.check_eof && pop s <> Xml_lexer.Eof then raise (Internal_error EOFExpected);
+ Xml_lexer.close source;
+ x
+ with
+ | NoMoreData ->
+ Xml_lexer.close source;
+ raise (!xml_error NodeExpected source)
+ | Internal_error e ->
+ Xml_lexer.close source;
+ raise (!xml_error e source)
+ | Xml_lexer.Error e ->
+ Xml_lexer.close source;
+ raise (!xml_error (convert e) source)
+
+let parse p = function
+ | SChannel ch -> do_parse p (Lexing.from_channel ch)
+ | SString str -> do_parse p (Lexing.from_string str)
+ | SLexbuf lex -> do_parse p lex
+ | SFile fname ->
+ let ch = (try open_in fname with Sys_error _ -> raise (!file_not_found fname)) in
+ try
+ let x = do_parse p (Lexing.from_channel ch) in
+ close_in ch;
+ x
+ with
+ e ->
+ close_in ch;
+ raise e
+
+
+let error_msg = function
+ | UnterminatedComment -> "Unterminated comment"
+ | UnterminatedString -> "Unterminated string"
+ | UnterminatedEntity -> "Unterminated entity"
+ | IdentExpected -> "Ident expected"
+ | CloseExpected -> "Element close expected"
+ | NodeExpected -> "Xml node expected"
+ | AttributeNameExpected -> "Attribute name expected"
+ | AttributeValueExpected -> "Attribute value expected"
+ | EndOfTagExpected tag -> sprintf "End of tag expected : '%s'" tag
+ | EOFExpected -> "End of file expected"
+
+let error (msg,pos) =
+ if pos.emin = pos.emax then
+ sprintf "%s line %d character %d" (error_msg msg) pos.eline (pos.emin - pos.eline_start)
+ else
+ sprintf "%s line %d characters %d-%d" (error_msg msg) pos.eline (pos.emin - pos.eline_start) (pos.emax - pos.eline_start)
+
+let line e = e.eline
+
+let range e =
+ e.emin - e.eline_start , e.emax - e.eline_start
+
+let abs_range e =
+ e.emin , e.emax
+
+let pos source =
+ let line, lstart, min, max = Xml_lexer.pos source in
+ {
+ eline = line;
+ eline_start = lstart;
+ emin = min;
+ emax = max;
+ }
+
+let () = _raises (fun x p ->
+ (* local cast : Xml.error_msg -> error_msg *)
+ Error (x, pos p))
+ (fun f -> File_not_found f)