summaryrefslogtreecommitdiff
path: root/lib/xml_parser.ml
diff options
context:
space:
mode:
Diffstat (limited to 'lib/xml_parser.ml')
-rw-r--r--lib/xml_parser.ml232
1 files changed, 0 insertions, 232 deletions
diff --git a/lib/xml_parser.ml b/lib/xml_parser.ml
deleted file mode 100644
index 8db3f9e8..00000000
--- a/lib/xml_parser.ml
+++ /dev/null
@@ -1,232 +0,0 @@
-(*
- * Xml Light, an small Xml parser/printer with DTD support.
- * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
- * Copyright (C) 2003 Jacques Garrigue
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *)
-
-open Printf
-open Xml_datatype
-
-type xml = Xml_datatype.xml
-
-type error_pos = {
- eline : int;
- eline_start : int;
- emin : int;
- emax : int;
-}
-
-type error_msg =
- | UnterminatedComment
- | UnterminatedString
- | UnterminatedEntity
- | IdentExpected
- | CloseExpected
- | NodeExpected
- | AttributeNameExpected
- | AttributeValueExpected
- | EndOfTagExpected of string
- | EOFExpected
- | Empty
-
-type error = error_msg * error_pos
-
-exception Error of error
-
-exception File_not_found of string
-
-type t = {
- mutable check_eof : bool;
- mutable concat_pcdata : bool;
- source : Lexing.lexbuf;
- stack : Xml_lexer.token Stack.t;
-}
-
-type source =
- | SChannel of in_channel
- | SString of string
- | SLexbuf of Lexing.lexbuf
-
-exception Internal_error of error_msg
-exception NoMoreData
-
-let xml_error = ref (fun _ -> assert false)
-let file_not_found = ref (fun _ -> assert false)
-
-let is_blank s =
- let len = String.length s in
- let break = ref true in
- let i = ref 0 in
- while !break && !i < len do
- let c = s.[!i] in
- (* no '\r' because we replaced them in the lexer *)
- if c = ' ' || c = '\n' || c = '\t' then incr i
- else break := false
- done;
- !i = len
-
-let _raises e f =
- xml_error := e;
- file_not_found := f
-
-let make source =
- let source = match source with
- | SChannel chan -> Lexing.from_channel chan
- | SString s -> Lexing.from_string s
- | SLexbuf lexbuf -> lexbuf
- in
- let () = Xml_lexer.init source in
- {
- check_eof = false;
- concat_pcdata = true;
- source = source;
- stack = Stack.create ();
- }
-
-let check_eof p v = p.check_eof <- v
-
-let pop s =
- try
- Stack.pop s.stack
- with
- Stack.Empty ->
- Xml_lexer.token s.source
-
-let push t s =
- Stack.push t s.stack
-
-let canonicalize l =
- let has_elt = List.exists (function Element _ -> true | _ -> false) l in
- if has_elt then List.filter (function PCData s -> not (is_blank s) | _ -> true) l
- else l
-
-let rec read_xml do_not_canonicalize s =
- let rec read_node s =
- match pop s with
- | Xml_lexer.PCData s -> PCData s
- | Xml_lexer.Tag (tag, attr, true) -> Element (tag, attr, [])
- | Xml_lexer.Tag (tag, attr, false) ->
- let elements = read_elems tag s in
- let elements =
- if do_not_canonicalize then elements else canonicalize elements
- in
- Element (tag, attr, elements)
- | t ->
- push t s;
- raise NoMoreData
-
- and read_elems tag s =
- let elems = ref [] in
- (try
- while true do
- let node = read_node s in
- match node, !elems with
- | PCData c , (PCData c2) :: q ->
- elems := PCData (c2 ^ c) :: q
- | _, l ->
- elems := node :: l
- done
- with
- NoMoreData -> ());
- match pop s with
- | Xml_lexer.Endtag s when s = tag -> List.rev !elems
- | t -> raise (Internal_error (EndOfTagExpected tag))
- in
- match read_node s with
- | (Element _) as node ->
- node
- | PCData c ->
- if is_blank c then
- read_xml do_not_canonicalize s
- else
- raise (Xml_lexer.Error Xml_lexer.ENodeExpected)
-
-let convert = function
- | Xml_lexer.EUnterminatedComment -> UnterminatedComment
- | Xml_lexer.EUnterminatedString -> UnterminatedString
- | Xml_lexer.EIdentExpected -> IdentExpected
- | Xml_lexer.ECloseExpected -> CloseExpected
- | Xml_lexer.ENodeExpected -> NodeExpected
- | Xml_lexer.EAttributeNameExpected -> AttributeNameExpected
- | Xml_lexer.EAttributeValueExpected -> AttributeValueExpected
- | Xml_lexer.EUnterminatedEntity -> UnterminatedEntity
-
-let error_of_exn xparser = function
- | NoMoreData when pop xparser = Xml_lexer.Eof -> Empty
- | NoMoreData -> NodeExpected
- | Internal_error e -> e
- | Xml_lexer.Error e -> convert e
- | e ->
- (*let e = Errors.push e in: We do not record backtrace here. *)
- raise e
-
-let do_parse do_not_canonicalize xparser =
- try
- Xml_lexer.init xparser.source;
- let x = read_xml do_not_canonicalize xparser in
- if xparser.check_eof && pop xparser <> Xml_lexer.Eof then raise (Internal_error EOFExpected);
- Xml_lexer.close ();
- x
- with any ->
- Xml_lexer.close ();
- raise (!xml_error (error_of_exn xparser any) xparser.source)
-
-let parse ?(do_not_canonicalize=false) p =
- do_parse do_not_canonicalize p
-
-let error_msg = function
- | UnterminatedComment -> "Unterminated comment"
- | UnterminatedString -> "Unterminated string"
- | UnterminatedEntity -> "Unterminated entity"
- | IdentExpected -> "Ident expected"
- | CloseExpected -> "Element close expected"
- | NodeExpected -> "Xml node expected"
- | AttributeNameExpected -> "Attribute name expected"
- | AttributeValueExpected -> "Attribute value expected"
- | EndOfTagExpected tag -> sprintf "End of tag expected : '%s'" tag
- | EOFExpected -> "End of file expected"
- | Empty -> "Empty"
-
-let error (msg,pos) =
- if pos.emin = pos.emax then
- sprintf "%s line %d character %d" (error_msg msg) pos.eline
- (pos.emin - pos.eline_start)
- else
- sprintf "%s line %d characters %d-%d" (error_msg msg) pos.eline
- (pos.emin - pos.eline_start) (pos.emax - pos.eline_start)
-
-let line e = e.eline
-
-let range e =
- e.emin - e.eline_start , e.emax - e.eline_start
-
-let abs_range e =
- e.emin , e.emax
-
-let pos source =
- let line, lstart, min, max = Xml_lexer.pos source in
- {
- eline = line;
- eline_start = lstart;
- emin = min;
- emax = max;
- }
-
-let () = _raises (fun x p ->
- (* local cast : Xml.error_msg -> error_msg *)
- Error (x, pos p))
- (fun f -> File_not_found f)