diff options
author | Emilio Jesus Gallego Arias <e+git@x80.org> | 2016-06-01 17:06:25 +0200 |
---|---|---|
committer | Emilio Jesus Gallego Arias <e+git@x80.org> | 2016-06-02 16:45:39 +0200 |
commit | 318fc2c04df1e73cc8a178d4fc1ce8bf5543649b (patch) | |
tree | 440fb8e51d1fe118d866d0c620a86724e3c6eae8 /ide | |
parent | ffd89ea323937b7d323e24a2b6d53cdc857117dd (diff) |
Move ide serialization libraries from lib/ to ide/
This makes the core free from particular protocol choices.
It should help with the ppx serialization project and shrinks clib.cma a
bit.
Diffstat (limited to 'ide')
-rw-r--r-- | ide/coqidetop.mllib | 6 | ||||
-rw-r--r-- | ide/ide.mllib | 7 | ||||
-rw-r--r-- | ide/richprinter.ml | 24 | ||||
-rw-r--r-- | ide/richprinter.mli | 36 | ||||
-rw-r--r-- | ide/serialize.ml | 120 | ||||
-rw-r--r-- | ide/serialize.mli | 39 | ||||
-rw-r--r-- | ide/xml_lexer.mli | 44 | ||||
-rw-r--r-- | ide/xml_lexer.mll | 317 | ||||
-rw-r--r-- | ide/xml_parser.ml | 232 | ||||
-rw-r--r-- | ide/xml_parser.mli | 106 | ||||
-rw-r--r-- | ide/xml_printer.ml | 145 | ||||
-rw-r--r-- | ide/xml_printer.mli | 29 |
12 files changed, 1104 insertions, 1 deletions
diff --git a/ide/coqidetop.mllib b/ide/coqidetop.mllib index c97c6d1cd..ed1fa465d 100644 --- a/ide/coqidetop.mllib +++ b/ide/coqidetop.mllib @@ -1,3 +1,9 @@ +Xml_lexer +Xml_parser +Xml_printer +Serialize +Richprinter Xmlprotocol Texmacspp +Document Ide_slave diff --git a/ide/ide.mllib b/ide/ide.mllib index 83b314283..b2f32fcf7 100644 --- a/ide/ide.mllib +++ b/ide/ide.mllib @@ -14,8 +14,13 @@ Config_lexer Utf8_convert Preferences Project_file -Ideutils +Serialize +Richprinter +Xml_lexer +Xml_parser +Xml_printer Xmlprotocol +Ideutils Coq Coq_lex Sentence diff --git a/ide/richprinter.ml b/ide/richprinter.ml new file mode 100644 index 000000000..5f39f36ea --- /dev/null +++ b/ide/richprinter.ml @@ -0,0 +1,24 @@ +open Richpp + +module RichppConstr = Ppconstr.Richpp +module RichppVernac = Ppvernac.Richpp +module RichppTactic = Pptactic.Richpp + +type rich_pp = + Ppannotation.t Richpp.located Xml_datatype.gxml + * Xml_datatype.xml + +let get_annotations obj = Pp.Tag.prj obj Ppannotation.tag + +let make_richpp pr ast = + let rich_pp = + rich_pp get_annotations (pr ast) + in + let xml = Ppannotation.( + xml_of_rich_pp tag_of_annotation attributes_of_annotation rich_pp + ) + in + (rich_pp, xml) + +let richpp_vernac = make_richpp RichppVernac.pr_vernac +let richpp_constr = make_richpp RichppConstr.pr_constr_expr diff --git a/ide/richprinter.mli b/ide/richprinter.mli new file mode 100644 index 000000000..c9e84e3eb --- /dev/null +++ b/ide/richprinter.mli @@ -0,0 +1,36 @@ +(************************************************************************) +(* v * The Coq Proof Assistant / The Coq Development Team *) +(* <O___,, * INRIA - CNRS - LIX - LRI - PPS - Copyright 1999-2016 *) +(* \VV/ **************************************************************) +(* // * This file is distributed under the terms of the *) +(* * GNU Lesser General Public License Version 2.1 *) +(************************************************************************) + +(** This module provides an entry point to "rich" pretty-printers that + produce pretty-printing as done by {!Printer} but with additional + annotations represented as a semi-structured document. + + To understand what are these annotations and how they are represented + as standard XML attributes, please refer to {!Ppannotation}. + + In addition to these annotations, each node of the semi-structured + document contains a [startpos] and an [endpos] attribute that + relate this node to the raw pretty-printing. + Please refer to {!Richpp} for more details. *) + +(** A rich pretty-print is composed of: *) +type rich_pp = + + (** - a generalized semi-structured document whose attributes are + annotations ; *) + Ppannotation.t Richpp.located Xml_datatype.gxml + + (** - an XML document, representing annotations as usual textual + XML attributes. *) + * Xml_datatype.xml + +(** [richpp_vernac phrase] produces a rich pretty-printing of [phrase]. *) +val richpp_vernac : Vernacexpr.vernac_expr -> rich_pp + +(** [richpp_constr constr] produces a rich pretty-printing of [constr]. *) +val richpp_constr : Constrexpr.constr_expr -> rich_pp diff --git a/ide/serialize.ml b/ide/serialize.ml new file mode 100644 index 000000000..685ec6049 --- /dev/null +++ b/ide/serialize.ml @@ -0,0 +1,120 @@ +(************************************************************************) +(* v * The Coq Proof Assistant / The Coq Development Team *) +(* <O___,, * INRIA - CNRS - LIX - LRI - PPS - Copyright 1999-2016 *) +(* \VV/ **************************************************************) +(* // * This file is distributed under the terms of the *) +(* * GNU Lesser General Public License Version 2.1 *) +(************************************************************************) + +open Xml_datatype + +exception Marshal_error + +(** Utility functions *) + +let rec get_attr attr = function + | [] -> raise Not_found + | (k, v) :: l when CString.equal k attr -> v + | _ :: l -> get_attr attr l + +let massoc x l = + try get_attr x l + with Not_found -> raise Marshal_error + +let constructor t c args = Element (t, ["val", c], args) +let do_match t mf = function + | Element (s, attrs, args) when CString.equal s t -> + let c = massoc "val" attrs in + mf c args + | _ -> raise Marshal_error + +let singleton = function + | [x] -> x + | _ -> raise Marshal_error + +let raw_string = function + | [] -> "" + | [PCData s] -> s + | _ -> raise Marshal_error + +(** Base types *) + +let of_unit () = Element ("unit", [], []) +let to_unit : xml -> unit = function + | Element ("unit", [], []) -> () + | _ -> raise Marshal_error + +let of_bool (b : bool) : xml = + if b then constructor "bool" "true" [] + else constructor "bool" "false" [] +let to_bool : xml -> bool = do_match "bool" (fun s _ -> match s with + | "true" -> true + | "false" -> false + | _ -> raise Marshal_error) + +let of_list (f : 'a -> xml) (l : 'a list) = + Element ("list", [], List.map f l) +let to_list (f : xml -> 'a) : xml -> 'a list = function + | Element ("list", [], l) -> List.map f l + | _ -> raise Marshal_error + +let of_option (f : 'a -> xml) : 'a option -> xml = function + | None -> Element ("option", ["val", "none"], []) + | Some x -> Element ("option", ["val", "some"], [f x]) +let to_option (f : xml -> 'a) : xml -> 'a option = function + | Element ("option", ["val", "none"], []) -> None + | Element ("option", ["val", "some"], [x]) -> Some (f x) + | _ -> raise Marshal_error + +let of_string (s : string) : xml = Element ("string", [], [PCData s]) +let to_string : xml -> string = function + | Element ("string", [], l) -> raw_string l + | _ -> raise Marshal_error + +let of_int (i : int) : xml = Element ("int", [], [PCData (string_of_int i)]) +let to_int : xml -> int = function + | Element ("int", [], [PCData s]) -> + (try int_of_string s with Failure _ -> raise Marshal_error) + | _ -> raise Marshal_error + +let of_pair (f : 'a -> xml) (g : 'b -> xml) (x : 'a * 'b) : xml = + Element ("pair", [], [f (fst x); g (snd x)]) +let to_pair (f : xml -> 'a) (g : xml -> 'b) : xml -> 'a * 'b = function + | Element ("pair", [], [x; y]) -> (f x, g y) + | _ -> raise Marshal_error + +let of_union (f : 'a -> xml) (g : 'b -> xml) : ('a,'b) CSig.union -> xml = function + | CSig.Inl x -> Element ("union", ["val","in_l"], [f x]) + | CSig.Inr x -> Element ("union", ["val","in_r"], [g x]) +let to_union (f : xml -> 'a) (g : xml -> 'b) : xml -> ('a,'b) CSig.union = function + | Element ("union", ["val","in_l"], [x]) -> CSig.Inl (f x) + | Element ("union", ["val","in_r"], [x]) -> CSig.Inr (g x) + | _ -> raise Marshal_error + +(** More elaborate types *) + +let of_edit_id i = Element ("edit_id",["val",string_of_int i],[]) +let to_edit_id = function + | Element ("edit_id",["val",i],[]) -> + let id = int_of_string i in + assert (id <= 0 ); + id + | _ -> raise Marshal_error + +let of_loc loc = + let start, stop = Loc.unloc loc in + Element ("loc",[("start",string_of_int start);("stop",string_of_int stop)],[]) +let to_loc xml = + match xml with + | Element ("loc", l,[]) -> + (try + let start = massoc "start" l in + let stop = massoc "stop" l in + Loc.make_loc (int_of_string start, int_of_string stop) + with Not_found | Invalid_argument _ -> raise Marshal_error) + | _ -> raise Marshal_error + +let of_xml x = Element ("xml", [], [x]) +let to_xml xml = match xml with +| Element ("xml", [], [x]) -> x +| _ -> raise Marshal_error diff --git a/ide/serialize.mli b/ide/serialize.mli new file mode 100644 index 000000000..d7c14e7e7 --- /dev/null +++ b/ide/serialize.mli @@ -0,0 +1,39 @@ +(************************************************************************) +(* v * The Coq Proof Assistant / The Coq Development Team *) +(* <O___,, * INRIA - CNRS - LIX - LRI - PPS - Copyright 1999-2016 *) +(* \VV/ **************************************************************) +(* // * This file is distributed under the terms of the *) +(* * GNU Lesser General Public License Version 2.1 *) +(************************************************************************) + +open Xml_datatype + +exception Marshal_error + +val massoc: string -> (string * string) list -> string +val constructor: string -> string -> xml list -> xml +val do_match: string -> (string -> xml list -> 'b) -> xml -> 'b +val singleton: 'a list -> 'a +val raw_string: xml list -> string +val of_unit: unit -> xml +val to_unit: xml -> unit +val of_bool: bool -> xml +val to_bool: xml -> bool +val of_list: ('a -> xml) -> 'a list -> xml +val to_list: (xml -> 'a) -> xml -> 'a list +val of_option: ('a -> xml) -> 'a option -> xml +val to_option: (xml -> 'a) -> xml -> 'a option +val of_string: string -> xml +val to_string: xml -> string +val of_int: int -> xml +val to_int: xml -> int +val of_pair: ('a -> xml) -> ('b -> xml) -> 'a * 'b -> xml +val to_pair: (xml -> 'a) -> (xml -> 'b) -> xml -> 'a * 'b +val of_union: ('a -> xml) -> ('b -> xml) -> ('a, 'b) CSig.union -> xml +val to_union: (xml -> 'a) -> (xml -> 'b) -> xml -> ('a, 'b) CSig.union +val of_edit_id: int -> xml +val to_edit_id: xml -> int +val of_loc : Loc.t -> xml +val to_loc : xml -> Loc.t +val of_xml : xml -> xml +val to_xml : xml -> xml diff --git a/ide/xml_lexer.mli b/ide/xml_lexer.mli new file mode 100644 index 000000000..e61cb055f --- /dev/null +++ b/ide/xml_lexer.mli @@ -0,0 +1,44 @@ +(* + * Xml Light, an small Xml parser/printer with DTD support. + * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +type error = + | EUnterminatedComment + | EUnterminatedString + | EIdentExpected + | ECloseExpected + | ENodeExpected + | EAttributeNameExpected + | EAttributeValueExpected + | EUnterminatedEntity + +exception Error of error + +type token = + | Tag of string * (string * string) list * bool + | PCData of string + | Endtag of string + | Eof + +type pos = int * int * int * int + +val init : Lexing.lexbuf -> unit +val close : unit -> unit +val token : Lexing.lexbuf -> token +val pos : Lexing.lexbuf -> pos +val restore : pos -> unit diff --git a/ide/xml_lexer.mll b/ide/xml_lexer.mll new file mode 100644 index 000000000..290f2c89a --- /dev/null +++ b/ide/xml_lexer.mll @@ -0,0 +1,317 @@ +{(* + * Xml Light, an small Xml parser/printer with DTD support. + * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +open Lexing + +type error = + | EUnterminatedComment + | EUnterminatedString + | EIdentExpected + | ECloseExpected + | ENodeExpected + | EAttributeNameExpected + | EAttributeValueExpected + | EUnterminatedEntity + +exception Error of error + +type pos = int * int * int * int + +type token = + | Tag of string * (string * string) list * bool + | PCData of string + | Endtag of string + | Eof + +let last_pos = ref 0 +and current_line = ref 0 +and current_line_start = ref 0 + +let tmp = Buffer.create 200 + +let idents = Hashtbl.create 0 + +let _ = begin + Hashtbl.add idents "nbsp;" " "; + Hashtbl.add idents "gt;" ">"; + Hashtbl.add idents "lt;" "<"; + Hashtbl.add idents "amp;" "&"; + Hashtbl.add idents "apos;" "'"; + Hashtbl.add idents "quot;" "\""; +end + +let init lexbuf = + current_line := 1; + current_line_start := lexeme_start lexbuf; + last_pos := !current_line_start + +let close lexbuf = + Buffer.reset tmp + +let pos lexbuf = + !current_line , !current_line_start , + !last_pos , + lexeme_start lexbuf + +let restore (cl,cls,lp,_) = + current_line := cl; + current_line_start := cls; + last_pos := lp + +let newline lexbuf = + incr current_line; + last_pos := lexeme_end lexbuf; + current_line_start := !last_pos + +let error lexbuf e = + last_pos := lexeme_start lexbuf; + raise (Error e) + +} + +let newline = ['\n'] +let break = ['\r'] +let space = [' ' '\t'] +let identchar = ['A'-'Z' 'a'-'z' '_' '0'-'9' ':' '-' '.'] +let ident = ['A'-'Z' 'a'-'z' '_' ':'] identchar* +let entitychar = ['A'-'Z' 'a'-'z'] +let pcchar = [^ '\r' '\n' '<' '>' '&'] + +rule token = parse + | newline | (newline break) | break + { + newline lexbuf; + PCData "\n" + } + | "<!--" + { + last_pos := lexeme_start lexbuf; + comment lexbuf; + token lexbuf + } + | "<?" + { + last_pos := lexeme_start lexbuf; + header lexbuf; + token lexbuf; + } + | '<' space* '/' space* + { + last_pos := lexeme_start lexbuf; + let tag = ident_name lexbuf in + ignore_spaces lexbuf; + close_tag lexbuf; + Endtag tag + } + | '<' space* + { + last_pos := lexeme_start lexbuf; + let tag = ident_name lexbuf in + ignore_spaces lexbuf; + let attribs, closed = attributes lexbuf in + Tag(tag, attribs, closed) + } + | "&#" + { + last_pos := lexeme_start lexbuf; + Buffer.reset tmp; + Buffer.add_string tmp (lexeme lexbuf); + PCData (pcdata lexbuf) + } + | '&' + { + last_pos := lexeme_start lexbuf; + Buffer.reset tmp; + Buffer.add_string tmp (entity lexbuf); + PCData (pcdata lexbuf) + } + | pcchar+ + { + last_pos := lexeme_start lexbuf; + Buffer.reset tmp; + Buffer.add_string tmp (lexeme lexbuf); + PCData (pcdata lexbuf) + } + | eof { Eof } + | _ + { error lexbuf ENodeExpected } + +and ignore_spaces = parse + | newline | (newline break) | break + { + newline lexbuf; + ignore_spaces lexbuf + } + | space + + { ignore_spaces lexbuf } + | "" + { () } + +and comment = parse + | newline | (newline break) | break + { + newline lexbuf; + comment lexbuf + } + | "-->" + { () } + | eof + { raise (Error EUnterminatedComment) } + | _ + { comment lexbuf } + +and header = parse + | newline | (newline break) | break + { + newline lexbuf; + header lexbuf + } + | "?>" + { () } + | eof + { error lexbuf ECloseExpected } + | _ + { header lexbuf } + +and pcdata = parse + | newline | (newline break) | break + { + Buffer.add_char tmp '\n'; + newline lexbuf; + pcdata lexbuf + } + | pcchar+ + { + Buffer.add_string tmp (lexeme lexbuf); + pcdata lexbuf + } + | "&#" + { + Buffer.add_string tmp (lexeme lexbuf); + pcdata lexbuf; + } + | '&' + { + Buffer.add_string tmp (entity lexbuf); + pcdata lexbuf + } + | "" + { Buffer.contents tmp } + +and entity = parse + | entitychar+ ';' + { + let ident = lexeme lexbuf in + try + Hashtbl.find idents (String.lowercase ident) + with + Not_found -> "&" ^ ident + } + | _ | eof + { raise (Error EUnterminatedEntity) } + +and ident_name = parse + | ident + { lexeme lexbuf } + | _ | eof + { error lexbuf EIdentExpected } + +and close_tag = parse + | '>' + { () } + | _ | eof + { error lexbuf ECloseExpected } + +and attributes = parse + | '>' + { [], false } + | "/>" + { [], true } + | "" (* do not read a char ! *) + { + let key = attribute lexbuf in + let data = attribute_data lexbuf in + ignore_spaces lexbuf; + let others, closed = attributes lexbuf in + (key, data) :: others, closed + } + +and attribute = parse + | ident + { lexeme lexbuf } + | _ | eof + { error lexbuf EAttributeNameExpected } + +and attribute_data = parse + | space* '=' space* '"' + { + Buffer.reset tmp; + last_pos := lexeme_end lexbuf; + dq_string lexbuf + } + | space* '=' space* '\'' + { + Buffer.reset tmp; + last_pos := lexeme_end lexbuf; + q_string lexbuf + } + | _ | eof + { error lexbuf EAttributeValueExpected } + +and dq_string = parse + | '"' + { Buffer.contents tmp } + | '\\' [ '"' '\\' ] + { + Buffer.add_char tmp (lexeme_char lexbuf 1); + dq_string lexbuf + } + | '&' + { + Buffer.add_string tmp (entity lexbuf); + dq_string lexbuf + } + | eof + { raise (Error EUnterminatedString) } + | _ + { + Buffer.add_char tmp (lexeme_char lexbuf 0); + dq_string lexbuf + } + +and q_string = parse + | '\'' + { Buffer.contents tmp } + | '\\' [ '\'' '\\' ] + { + Buffer.add_char tmp (lexeme_char lexbuf 1); + q_string lexbuf + } + | '&' + { + Buffer.add_string tmp (entity lexbuf); + q_string lexbuf + } + | eof + { raise (Error EUnterminatedString) } + | _ + { + Buffer.add_char tmp (lexeme_char lexbuf 0); + q_string lexbuf + } diff --git a/ide/xml_parser.ml b/ide/xml_parser.ml new file mode 100644 index 000000000..8db3f9e8b --- /dev/null +++ b/ide/xml_parser.ml @@ -0,0 +1,232 @@ +(* + * Xml Light, an small Xml parser/printer with DTD support. + * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com) + * Copyright (C) 2003 Jacques Garrigue + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +open Printf +open Xml_datatype + +type xml = Xml_datatype.xml + +type error_pos = { + eline : int; + eline_start : int; + emin : int; + emax : int; +} + +type error_msg = + | UnterminatedComment + | UnterminatedString + | UnterminatedEntity + | IdentExpected + | CloseExpected + | NodeExpected + | AttributeNameExpected + | AttributeValueExpected + | EndOfTagExpected of string + | EOFExpected + | Empty + +type error = error_msg * error_pos + +exception Error of error + +exception File_not_found of string + +type t = { + mutable check_eof : bool; + mutable concat_pcdata : bool; + source : Lexing.lexbuf; + stack : Xml_lexer.token Stack.t; +} + +type source = + | SChannel of in_channel + | SString of string + | SLexbuf of Lexing.lexbuf + +exception Internal_error of error_msg +exception NoMoreData + +let xml_error = ref (fun _ -> assert false) +let file_not_found = ref (fun _ -> assert false) + +let is_blank s = + let len = String.length s in + let break = ref true in + let i = ref 0 in + while !break && !i < len do + let c = s.[!i] in + (* no '\r' because we replaced them in the lexer *) + if c = ' ' || c = '\n' || c = '\t' then incr i + else break := false + done; + !i = len + +let _raises e f = + xml_error := e; + file_not_found := f + +let make source = + let source = match source with + | SChannel chan -> Lexing.from_channel chan + | SString s -> Lexing.from_string s + | SLexbuf lexbuf -> lexbuf + in + let () = Xml_lexer.init source in + { + check_eof = false; + concat_pcdata = true; + source = source; + stack = Stack.create (); + } + +let check_eof p v = p.check_eof <- v + +let pop s = + try + Stack.pop s.stack + with + Stack.Empty -> + Xml_lexer.token s.source + +let push t s = + Stack.push t s.stack + +let canonicalize l = + let has_elt = List.exists (function Element _ -> true | _ -> false) l in + if has_elt then List.filter (function PCData s -> not (is_blank s) | _ -> true) l + else l + +let rec read_xml do_not_canonicalize s = + let rec read_node s = + match pop s with + | Xml_lexer.PCData s -> PCData s + | Xml_lexer.Tag (tag, attr, true) -> Element (tag, attr, []) + | Xml_lexer.Tag (tag, attr, false) -> + let elements = read_elems tag s in + let elements = + if do_not_canonicalize then elements else canonicalize elements + in + Element (tag, attr, elements) + | t -> + push t s; + raise NoMoreData + + and read_elems tag s = + let elems = ref [] in + (try + while true do + let node = read_node s in + match node, !elems with + | PCData c , (PCData c2) :: q -> + elems := PCData (c2 ^ c) :: q + | _, l -> + elems := node :: l + done + with + NoMoreData -> ()); + match pop s with + | Xml_lexer.Endtag s when s = tag -> List.rev !elems + | t -> raise (Internal_error (EndOfTagExpected tag)) + in + match read_node s with + | (Element _) as node -> + node + | PCData c -> + if is_blank c then + read_xml do_not_canonicalize s + else + raise (Xml_lexer.Error Xml_lexer.ENodeExpected) + +let convert = function + | Xml_lexer.EUnterminatedComment -> UnterminatedComment + | Xml_lexer.EUnterminatedString -> UnterminatedString + | Xml_lexer.EIdentExpected -> IdentExpected + | Xml_lexer.ECloseExpected -> CloseExpected + | Xml_lexer.ENodeExpected -> NodeExpected + | Xml_lexer.EAttributeNameExpected -> AttributeNameExpected + | Xml_lexer.EAttributeValueExpected -> AttributeValueExpected + | Xml_lexer.EUnterminatedEntity -> UnterminatedEntity + +let error_of_exn xparser = function + | NoMoreData when pop xparser = Xml_lexer.Eof -> Empty + | NoMoreData -> NodeExpected + | Internal_error e -> e + | Xml_lexer.Error e -> convert e + | e -> + (*let e = Errors.push e in: We do not record backtrace here. *) + raise e + +let do_parse do_not_canonicalize xparser = + try + Xml_lexer.init xparser.source; + let x = read_xml do_not_canonicalize xparser in + if xparser.check_eof && pop xparser <> Xml_lexer.Eof then raise (Internal_error EOFExpected); + Xml_lexer.close (); + x + with any -> + Xml_lexer.close (); + raise (!xml_error (error_of_exn xparser any) xparser.source) + +let parse ?(do_not_canonicalize=false) p = + do_parse do_not_canonicalize p + +let error_msg = function + | UnterminatedComment -> "Unterminated comment" + | UnterminatedString -> "Unterminated string" + | UnterminatedEntity -> "Unterminated entity" + | IdentExpected -> "Ident expected" + | CloseExpected -> "Element close expected" + | NodeExpected -> "Xml node expected" + | AttributeNameExpected -> "Attribute name expected" + | AttributeValueExpected -> "Attribute value expected" + | EndOfTagExpected tag -> sprintf "End of tag expected : '%s'" tag + | EOFExpected -> "End of file expected" + | Empty -> "Empty" + +let error (msg,pos) = + if pos.emin = pos.emax then + sprintf "%s line %d character %d" (error_msg msg) pos.eline + (pos.emin - pos.eline_start) + else + sprintf "%s line %d characters %d-%d" (error_msg msg) pos.eline + (pos.emin - pos.eline_start) (pos.emax - pos.eline_start) + +let line e = e.eline + +let range e = + e.emin - e.eline_start , e.emax - e.eline_start + +let abs_range e = + e.emin , e.emax + +let pos source = + let line, lstart, min, max = Xml_lexer.pos source in + { + eline = line; + eline_start = lstart; + emin = min; + emax = max; + } + +let () = _raises (fun x p -> + (* local cast : Xml.error_msg -> error_msg *) + Error (x, pos p)) + (fun f -> File_not_found f) diff --git a/ide/xml_parser.mli b/ide/xml_parser.mli new file mode 100644 index 000000000..ac2eab352 --- /dev/null +++ b/ide/xml_parser.mli @@ -0,0 +1,106 @@ +(* + * Xml Light, an small Xml parser/printer with DTD support. + * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +(** Xml Light Parser + + While basic parsing functions can be used in the {!Xml} module, this module + is providing a way to create, configure and run an Xml parser. + +*) + + +(** An Xml node is either + [Element (tag-name, attributes, children)] or [PCData text] *) +type xml = Xml_datatype.xml + +(** Abstract type for an Xml parser. *) +type t + +(** {6:exc Xml Exceptions} *) + +(** Several exceptions can be raised when parsing an Xml document : {ul + {li {!Xml.Error} is raised when an xml parsing error occurs. the + {!Xml.error_msg} tells you which error occurred during parsing + and the {!Xml.error_pos} can be used to retrieve the document + location where the error occurred at.} + {li {!Xml.File_not_found} is raised when an error occurred while + opening a file with the {!Xml.parse_file} function.} + } + *) + +type error_pos + +type error_msg = + | UnterminatedComment + | UnterminatedString + | UnterminatedEntity + | IdentExpected + | CloseExpected + | NodeExpected + | AttributeNameExpected + | AttributeValueExpected + | EndOfTagExpected of string + | EOFExpected + | Empty + +type error = error_msg * error_pos + +exception Error of error + +exception File_not_found of string + +(** Get a full error message from an Xml error. *) +val error : error -> string + +(** Get the Xml error message as a string. *) +val error_msg : error_msg -> string + +(** Get the line the error occurred at. *) +val line : error_pos -> int + +(** Get the relative character range (in current line) the error occurred at.*) +val range : error_pos -> int * int + +(** Get the absolute character range the error occurred at. *) +val abs_range : error_pos -> int * int + +val pos : Lexing.lexbuf -> error_pos + +(** Several kind of resources can contain Xml documents. *) +type source = +| SChannel of in_channel +| SString of string +| SLexbuf of Lexing.lexbuf + +(** This function returns a new parser with default options. *) +val make : source -> t + +(** When a Xml document is parsed, the parser may check that the end of the + document is reached, so for example parsing ["<A/><B/>"] will fail instead + of returning only the A element. You can turn on this check by setting + [check_eof] to [true] {i (by default, check_eof is false, unlike + in the original Xmllight)}. *) +val check_eof : t -> bool -> unit + +(** Once the parser is configured, you can run the parser on a any kind + of xml document source to parse its contents into an Xml data structure. + + When [do_not_canonicalize] is set, the XML document is given as + is, without trying to remove blank PCDATA elements. *) +val parse : ?do_not_canonicalize:bool -> t -> xml diff --git a/ide/xml_printer.ml b/ide/xml_printer.ml new file mode 100644 index 000000000..e7e4d0ceb --- /dev/null +++ b/ide/xml_printer.ml @@ -0,0 +1,145 @@ +(************************************************************************) +(* v * The Coq Proof Assistant / The Coq Development Team *) +(* <O___,, * INRIA - CNRS - LIX - LRI - PPS - Copyright 1999-2016 *) +(* \VV/ **************************************************************) +(* // * This file is distributed under the terms of the *) +(* * GNU Lesser General Public License Version 2.1 *) +(************************************************************************) + +open Xml_datatype + +type xml = Xml_datatype.xml + +type target = TChannel of out_channel | TBuffer of Buffer.t + +type t = target + +let make x = x + +let buffer_pcdata tmp text = + let output = Buffer.add_string tmp in + let output' = Buffer.add_char tmp in + let l = String.length text in + for p = 0 to l-1 do + match text.[p] with + | ' ' -> output " "; + | '>' -> output ">" + | '<' -> output "<" + | '&' -> + if p < l - 1 && text.[p + 1] = '#' then + output' '&' + else + output "&" + | '\'' -> output "'" + | '"' -> output """ + | c -> output' c + done + +let buffer_attr tmp (n,v) = + let output = Buffer.add_string tmp in + let output' = Buffer.add_char tmp in + output' ' '; + output n; + output "=\""; + let l = String.length v in + for p = 0 to l - 1 do + match v.[p] with + | '\\' -> output "\\\\" + | '"' -> output "\\\"" + | '<' -> output "<" + | '&' -> output "&" + | c -> output' c + done; + output' '"' + +let to_buffer tmp x = + let pcdata = ref false in + let output = Buffer.add_string tmp in + let output' = Buffer.add_char tmp in + let rec loop = function + | Element (tag,alist,[]) -> + output' '<'; + output tag; + List.iter (buffer_attr tmp) alist; + output "/>"; + pcdata := false; + | Element (tag,alist,l) -> + output' '<'; + output tag; + List.iter (buffer_attr tmp) alist; + output' '>'; + pcdata := false; + List.iter loop l; + output "</"; + output tag; + output' '>'; + pcdata := false; + | PCData text -> + if !pcdata then output' ' '; + buffer_pcdata tmp text; + pcdata := true; + in + loop x + +let pcdata_to_string s = + let b = Buffer.create 13 in + buffer_pcdata b s; + Buffer.contents b + +let to_string x = + let b = Buffer.create 200 in + to_buffer b x; + Buffer.contents b + +let to_string_fmt x = + let tmp = Buffer.create 200 in + let output = Buffer.add_string tmp in + let output' = Buffer.add_char tmp in + let rec loop ?(newl=false) tab = function + | Element (tag, alist, []) -> + output tab; + output' '<'; + output tag; + List.iter (buffer_attr tmp) alist; + output "/>"; + if newl then output' '\n'; + | Element (tag, alist, [PCData text]) -> + output tab; + output' '<'; + output tag; + List.iter (buffer_attr tmp) alist; + output ">"; + buffer_pcdata tmp text; + output "</"; + output tag; + output' '>'; + if newl then output' '\n'; + | Element (tag, alist, l) -> + output tab; + output' '<'; + output tag; + List.iter (buffer_attr tmp) alist; + output ">\n"; + List.iter (loop ~newl:true (tab^" ")) l; + output tab; + output "</"; + output tag; + output' '>'; + if newl then output' '\n'; + | PCData text -> + buffer_pcdata tmp text; + if newl then output' '\n'; + in + loop "" x; + Buffer.contents tmp + +let print t xml = + let tmp, flush = match t with + | TChannel oc -> + let b = Buffer.create 200 in + b, (fun () -> Buffer.output_buffer oc b; flush oc) + | TBuffer b -> + b, (fun () -> ()) + in + to_buffer tmp xml; + flush () diff --git a/ide/xml_printer.mli b/ide/xml_printer.mli new file mode 100644 index 000000000..f24f51fff --- /dev/null +++ b/ide/xml_printer.mli @@ -0,0 +1,29 @@ +(************************************************************************) +(* v * The Coq Proof Assistant / The Coq Development Team *) +(* <O___,, * INRIA - CNRS - LIX - LRI - PPS - Copyright 1999-2016 *) +(* \VV/ **************************************************************) +(* // * This file is distributed under the terms of the *) +(* * GNU Lesser General Public License Version 2.1 *) +(************************************************************************) + +type xml = Xml_datatype.xml + +type t +type target = TChannel of out_channel | TBuffer of Buffer.t + +val make : target -> t + +(** Print the xml data structure to a source into a compact xml string (without + any user-readable formating ). *) +val print : t -> xml -> unit + +(** Print the xml data structure into a compact xml string (without + any user-readable formating ). *) +val to_string : xml -> string + +(** Print the xml data structure into an user-readable string with + tabs and lines break between different nodes. *) +val to_string_fmt : xml -> string + +(** Print PCDATA as a string by escaping XML entities. *) +val pcdata_to_string : string -> string |