diff options
Diffstat (limited to 'lib/xml_parser.ml')
-rw-r--r-- | lib/xml_parser.ml | 232 |
1 files changed, 0 insertions, 232 deletions
diff --git a/lib/xml_parser.ml b/lib/xml_parser.ml deleted file mode 100644 index 8db3f9e8..00000000 --- a/lib/xml_parser.ml +++ /dev/null @@ -1,232 +0,0 @@ -(* - * Xml Light, an small Xml parser/printer with DTD support. - * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com) - * Copyright (C) 2003 Jacques Garrigue - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - *) - -open Printf -open Xml_datatype - -type xml = Xml_datatype.xml - -type error_pos = { - eline : int; - eline_start : int; - emin : int; - emax : int; -} - -type error_msg = - | UnterminatedComment - | UnterminatedString - | UnterminatedEntity - | IdentExpected - | CloseExpected - | NodeExpected - | AttributeNameExpected - | AttributeValueExpected - | EndOfTagExpected of string - | EOFExpected - | Empty - -type error = error_msg * error_pos - -exception Error of error - -exception File_not_found of string - -type t = { - mutable check_eof : bool; - mutable concat_pcdata : bool; - source : Lexing.lexbuf; - stack : Xml_lexer.token Stack.t; -} - -type source = - | SChannel of in_channel - | SString of string - | SLexbuf of Lexing.lexbuf - -exception Internal_error of error_msg -exception NoMoreData - -let xml_error = ref (fun _ -> assert false) -let file_not_found = ref (fun _ -> assert false) - -let is_blank s = - let len = String.length s in - let break = ref true in - let i = ref 0 in - while !break && !i < len do - let c = s.[!i] in - (* no '\r' because we replaced them in the lexer *) - if c = ' ' || c = '\n' || c = '\t' then incr i - else break := false - done; - !i = len - -let _raises e f = - xml_error := e; - file_not_found := f - -let make source = - let source = match source with - | SChannel chan -> Lexing.from_channel chan - | SString s -> Lexing.from_string s - | SLexbuf lexbuf -> lexbuf - in - let () = Xml_lexer.init source in - { - check_eof = false; - concat_pcdata = true; - source = source; - stack = Stack.create (); - } - -let check_eof p v = p.check_eof <- v - -let pop s = - try - Stack.pop s.stack - with - Stack.Empty -> - Xml_lexer.token s.source - -let push t s = - Stack.push t s.stack - -let canonicalize l = - let has_elt = List.exists (function Element _ -> true | _ -> false) l in - if has_elt then List.filter (function PCData s -> not (is_blank s) | _ -> true) l - else l - -let rec read_xml do_not_canonicalize s = - let rec read_node s = - match pop s with - | Xml_lexer.PCData s -> PCData s - | Xml_lexer.Tag (tag, attr, true) -> Element (tag, attr, []) - | Xml_lexer.Tag (tag, attr, false) -> - let elements = read_elems tag s in - let elements = - if do_not_canonicalize then elements else canonicalize elements - in - Element (tag, attr, elements) - | t -> - push t s; - raise NoMoreData - - and read_elems tag s = - let elems = ref [] in - (try - while true do - let node = read_node s in - match node, !elems with - | PCData c , (PCData c2) :: q -> - elems := PCData (c2 ^ c) :: q - | _, l -> - elems := node :: l - done - with - NoMoreData -> ()); - match pop s with - | Xml_lexer.Endtag s when s = tag -> List.rev !elems - | t -> raise (Internal_error (EndOfTagExpected tag)) - in - match read_node s with - | (Element _) as node -> - node - | PCData c -> - if is_blank c then - read_xml do_not_canonicalize s - else - raise (Xml_lexer.Error Xml_lexer.ENodeExpected) - -let convert = function - | Xml_lexer.EUnterminatedComment -> UnterminatedComment - | Xml_lexer.EUnterminatedString -> UnterminatedString - | Xml_lexer.EIdentExpected -> IdentExpected - | Xml_lexer.ECloseExpected -> CloseExpected - | Xml_lexer.ENodeExpected -> NodeExpected - | Xml_lexer.EAttributeNameExpected -> AttributeNameExpected - | Xml_lexer.EAttributeValueExpected -> AttributeValueExpected - | Xml_lexer.EUnterminatedEntity -> UnterminatedEntity - -let error_of_exn xparser = function - | NoMoreData when pop xparser = Xml_lexer.Eof -> Empty - | NoMoreData -> NodeExpected - | Internal_error e -> e - | Xml_lexer.Error e -> convert e - | e -> - (*let e = Errors.push e in: We do not record backtrace here. *) - raise e - -let do_parse do_not_canonicalize xparser = - try - Xml_lexer.init xparser.source; - let x = read_xml do_not_canonicalize xparser in - if xparser.check_eof && pop xparser <> Xml_lexer.Eof then raise (Internal_error EOFExpected); - Xml_lexer.close (); - x - with any -> - Xml_lexer.close (); - raise (!xml_error (error_of_exn xparser any) xparser.source) - -let parse ?(do_not_canonicalize=false) p = - do_parse do_not_canonicalize p - -let error_msg = function - | UnterminatedComment -> "Unterminated comment" - | UnterminatedString -> "Unterminated string" - | UnterminatedEntity -> "Unterminated entity" - | IdentExpected -> "Ident expected" - | CloseExpected -> "Element close expected" - | NodeExpected -> "Xml node expected" - | AttributeNameExpected -> "Attribute name expected" - | AttributeValueExpected -> "Attribute value expected" - | EndOfTagExpected tag -> sprintf "End of tag expected : '%s'" tag - | EOFExpected -> "End of file expected" - | Empty -> "Empty" - -let error (msg,pos) = - if pos.emin = pos.emax then - sprintf "%s line %d character %d" (error_msg msg) pos.eline - (pos.emin - pos.eline_start) - else - sprintf "%s line %d characters %d-%d" (error_msg msg) pos.eline - (pos.emin - pos.eline_start) (pos.emax - pos.eline_start) - -let line e = e.eline - -let range e = - e.emin - e.eline_start , e.emax - e.eline_start - -let abs_range e = - e.emin , e.emax - -let pos source = - let line, lstart, min, max = Xml_lexer.pos source in - { - eline = line; - eline_start = lstart; - emin = min; - emax = max; - } - -let () = _raises (fun x p -> - (* local cast : Xml.error_msg -> error_msg *) - Error (x, pos p)) - (fun f -> File_not_found f) |