From 9ebf44d84754adc5b64fcf612c6816c02c80462d Mon Sep 17 00:00:00 2001 From: Benjamin Barenblat Date: Sat, 2 Feb 2019 19:29:23 -0500 Subject: Imported Upstream version 8.9.0 --- ide/protocol/xml_parser.ml | 232 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 232 insertions(+) create mode 100644 ide/protocol/xml_parser.ml (limited to 'ide/protocol/xml_parser.ml') diff --git a/ide/protocol/xml_parser.ml b/ide/protocol/xml_parser.ml new file mode 100644 index 00000000..8db3f9e8 --- /dev/null +++ b/ide/protocol/xml_parser.ml @@ -0,0 +1,232 @@ +(* + * Xml Light, an small Xml parser/printer with DTD support. + * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com) + * Copyright (C) 2003 Jacques Garrigue + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +open Printf +open Xml_datatype + +type xml = Xml_datatype.xml + +type error_pos = { + eline : int; + eline_start : int; + emin : int; + emax : int; +} + +type error_msg = + | UnterminatedComment + | UnterminatedString + | UnterminatedEntity + | IdentExpected + | CloseExpected + | NodeExpected + | AttributeNameExpected + | AttributeValueExpected + | EndOfTagExpected of string + | EOFExpected + | Empty + +type error = error_msg * error_pos + +exception Error of error + +exception File_not_found of string + +type t = { + mutable check_eof : bool; + mutable concat_pcdata : bool; + source : Lexing.lexbuf; + stack : Xml_lexer.token Stack.t; +} + +type source = + | SChannel of in_channel + | SString of string + | SLexbuf of Lexing.lexbuf + +exception Internal_error of error_msg +exception NoMoreData + +let xml_error = ref (fun _ -> assert false) +let file_not_found = ref (fun _ -> assert false) + +let is_blank s = + let len = String.length s in + let break = ref true in + let i = ref 0 in + while !break && !i < len do + let c = s.[!i] in + (* no '\r' because we replaced them in the lexer *) + if c = ' ' || c = '\n' || c = '\t' then incr i + else break := false + done; + !i = len + +let _raises e f = + xml_error := e; + file_not_found := f + +let make source = + let source = match source with + | SChannel chan -> Lexing.from_channel chan + | SString s -> Lexing.from_string s + | SLexbuf lexbuf -> lexbuf + in + let () = Xml_lexer.init source in + { + check_eof = false; + concat_pcdata = true; + source = source; + stack = Stack.create (); + } + +let check_eof p v = p.check_eof <- v + +let pop s = + try + Stack.pop s.stack + with + Stack.Empty -> + Xml_lexer.token s.source + +let push t s = + Stack.push t s.stack + +let canonicalize l = + let has_elt = List.exists (function Element _ -> true | _ -> false) l in + if has_elt then List.filter (function PCData s -> not (is_blank s) | _ -> true) l + else l + +let rec read_xml do_not_canonicalize s = + let rec read_node s = + match pop s with + | Xml_lexer.PCData s -> PCData s + | Xml_lexer.Tag (tag, attr, true) -> Element (tag, attr, []) + | Xml_lexer.Tag (tag, attr, false) -> + let elements = read_elems tag s in + let elements = + if do_not_canonicalize then elements else canonicalize elements + in + Element (tag, attr, elements) + | t -> + push t s; + raise NoMoreData + + and read_elems tag s = + let elems = ref [] in + (try + while true do + let node = read_node s in + match node, !elems with + | PCData c , (PCData c2) :: q -> + elems := PCData (c2 ^ c) :: q + | _, l -> + elems := node :: l + done + with + NoMoreData -> ()); + match pop s with + | Xml_lexer.Endtag s when s = tag -> List.rev !elems + | t -> raise (Internal_error (EndOfTagExpected tag)) + in + match read_node s with + | (Element _) as node -> + node + | PCData c -> + if is_blank c then + read_xml do_not_canonicalize s + else + raise (Xml_lexer.Error Xml_lexer.ENodeExpected) + +let convert = function + | Xml_lexer.EUnterminatedComment -> UnterminatedComment + | Xml_lexer.EUnterminatedString -> UnterminatedString + | Xml_lexer.EIdentExpected -> IdentExpected + | Xml_lexer.ECloseExpected -> CloseExpected + | Xml_lexer.ENodeExpected -> NodeExpected + | Xml_lexer.EAttributeNameExpected -> AttributeNameExpected + | Xml_lexer.EAttributeValueExpected -> AttributeValueExpected + | Xml_lexer.EUnterminatedEntity -> UnterminatedEntity + +let error_of_exn xparser = function + | NoMoreData when pop xparser = Xml_lexer.Eof -> Empty + | NoMoreData -> NodeExpected + | Internal_error e -> e + | Xml_lexer.Error e -> convert e + | e -> + (*let e = Errors.push e in: We do not record backtrace here. *) + raise e + +let do_parse do_not_canonicalize xparser = + try + Xml_lexer.init xparser.source; + let x = read_xml do_not_canonicalize xparser in + if xparser.check_eof && pop xparser <> Xml_lexer.Eof then raise (Internal_error EOFExpected); + Xml_lexer.close (); + x + with any -> + Xml_lexer.close (); + raise (!xml_error (error_of_exn xparser any) xparser.source) + +let parse ?(do_not_canonicalize=false) p = + do_parse do_not_canonicalize p + +let error_msg = function + | UnterminatedComment -> "Unterminated comment" + | UnterminatedString -> "Unterminated string" + | UnterminatedEntity -> "Unterminated entity" + | IdentExpected -> "Ident expected" + | CloseExpected -> "Element close expected" + | NodeExpected -> "Xml node expected" + | AttributeNameExpected -> "Attribute name expected" + | AttributeValueExpected -> "Attribute value expected" + | EndOfTagExpected tag -> sprintf "End of tag expected : '%s'" tag + | EOFExpected -> "End of file expected" + | Empty -> "Empty" + +let error (msg,pos) = + if pos.emin = pos.emax then + sprintf "%s line %d character %d" (error_msg msg) pos.eline + (pos.emin - pos.eline_start) + else + sprintf "%s line %d characters %d-%d" (error_msg msg) pos.eline + (pos.emin - pos.eline_start) (pos.emax - pos.eline_start) + +let line e = e.eline + +let range e = + e.emin - e.eline_start , e.emax - e.eline_start + +let abs_range e = + e.emin , e.emax + +let pos source = + let line, lstart, min, max = Xml_lexer.pos source in + { + eline = line; + eline_start = lstart; + emin = min; + emax = max; + } + +let () = _raises (fun x p -> + (* local cast : Xml.error_msg -> error_msg *) + Error (x, pos p)) + (fun f -> File_not_found f) -- cgit v1.2.3