diff options
author | Regis-Gianas <yrg@pps.univ-paris-diderot.fr> | 2014-10-30 22:13:21 +0100 |
---|---|---|
committer | Regis-Gianas <yrg@pps.univ-paris-diderot.fr> | 2014-11-04 22:51:35 +0100 |
commit | 812c611bdc8532b7cd25d9368a8356be3eb1d34a (patch) | |
tree | 14c49e92d23052afcb4a802dd531d2bdf23f2118 /lib | |
parent | d21183a81fb73cd20ace5213cfe46c82250db38b (diff) |
Xml_datatype.gxml: New type for semi-structured documents.
Xml_parser, Xml_printer: Use it.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/xml_datatype.mli | 13 | ||||
-rw-r--r-- | lib/xml_parser.ml | 93 | ||||
-rw-r--r-- | lib/xml_printer.ml | 182 |
3 files changed, 147 insertions, 141 deletions
diff --git a/lib/xml_datatype.mli b/lib/xml_datatype.mli index 7b18c73bf..3d0e897ac 100644 --- a/lib/xml_datatype.mli +++ b/lib/xml_datatype.mli @@ -6,7 +6,14 @@ (* * GNU Lesser General Public License Version 2.1 *) (************************************************************************) -type xml = - | Element of (string * (string * string) list * xml list) - | PCData of string +(** ['a gxml] is the type for semi-structured documents. They generalize + XML by allowing any kind of attributes. *) +type 'a gxml = + | Element of (string * 'a * 'a gxml list) + | PCData of string + +(** [xml] is a semi-structured documents where attributes are association + lists from string to string. *) +type xml = (string * string) list gxml + diff --git a/lib/xml_parser.ml b/lib/xml_parser.ml index b9990c75a..f3283c659 100644 --- a/lib/xml_parser.ml +++ b/lib/xml_parser.ml @@ -19,10 +19,9 @@ *) open Printf +open Xml_datatype -type xml = Xml_datatype.xml = - | Element of (string * (string * string) list * xml list) - | PCData of string +type xml = Xml_datatype.xml type error_pos = { eline : int; @@ -81,8 +80,8 @@ let is_blank s = !i = len let _raises e f = - xml_error := e; - file_not_found := f + xml_error := e; + file_not_found := f let make source = let source = match source with @@ -101,14 +100,14 @@ let make source = let check_eof p v = p.check_eof <- v let pop s = - try - Stack.pop s.stack - with - Stack.Empty -> - Xml_lexer.token s.source + try + Stack.pop s.stack + with + Stack.Empty -> + Xml_lexer.token s.source let push t s = - Stack.push t s.stack + Stack.push t s.stack let canonicalize l = let has_elt = List.exists (function Element _ -> true | _ -> false) l in @@ -116,19 +115,19 @@ let canonicalize l = else l let rec read_node s = - match pop s with - | Xml_lexer.PCData s -> PCData s - | Xml_lexer.Tag (tag, attr, true) -> Element (tag, attr, []) - | Xml_lexer.Tag (tag, attr, false) -> + match pop s with + | Xml_lexer.PCData s -> PCData s + | Xml_lexer.Tag (tag, attr, true) -> Element (tag, attr, []) + | Xml_lexer.Tag (tag, attr, false) -> let elements = read_elems tag s in Element (tag, attr, canonicalize elements) - | t -> - push t s; - raise NoMoreData + | t -> + push t s; + raise NoMoreData and - read_elems tag s = - let elems = ref [] in - (try + read_elems tag s = + let elems = ref [] in + (try while true do let node = read_node s in match node, !elems with @@ -137,11 +136,11 @@ and | _, l -> elems := node :: l done - with - NoMoreData -> ()); - match pop s with - | Xml_lexer.Endtag s when s = tag -> List.rev !elems - | t -> raise (Internal_error (EndOfTagExpected tag)) + with + NoMoreData -> ()); + match pop s with + | Xml_lexer.Endtag s when s = tag -> List.rev !elems + | t -> raise (Internal_error (EndOfTagExpected tag)) let rec read_xml s = let node = read_node s in @@ -152,14 +151,14 @@ let rec read_xml s = else raise (Xml_lexer.Error Xml_lexer.ENodeExpected) let convert = function - | Xml_lexer.EUnterminatedComment -> UnterminatedComment - | Xml_lexer.EUnterminatedString -> UnterminatedString - | Xml_lexer.EIdentExpected -> IdentExpected - | Xml_lexer.ECloseExpected -> CloseExpected - | Xml_lexer.ENodeExpected -> NodeExpected - | Xml_lexer.EAttributeNameExpected -> AttributeNameExpected - | Xml_lexer.EAttributeValueExpected -> AttributeValueExpected - | Xml_lexer.EUnterminatedEntity -> UnterminatedEntity + | Xml_lexer.EUnterminatedComment -> UnterminatedComment + | Xml_lexer.EUnterminatedString -> UnterminatedString + | Xml_lexer.EIdentExpected -> IdentExpected + | Xml_lexer.ECloseExpected -> CloseExpected + | Xml_lexer.ENodeExpected -> NodeExpected + | Xml_lexer.EAttributeNameExpected -> AttributeNameExpected + | Xml_lexer.EAttributeValueExpected -> AttributeValueExpected + | Xml_lexer.EUnterminatedEntity -> UnterminatedEntity let error_of_exn xparser = function | NoMoreData when pop xparser = Xml_lexer.Eof -> Empty @@ -171,15 +170,15 @@ let error_of_exn xparser = function raise e let do_parse xparser = - try - Xml_lexer.init xparser.source; - let x = read_xml xparser in - if xparser.check_eof && pop xparser <> Xml_lexer.Eof then raise (Internal_error EOFExpected); - Xml_lexer.close (); - x - with any -> - Xml_lexer.close (); - raise (!xml_error (error_of_exn xparser any) xparser.source) + try + Xml_lexer.init xparser.source; + let x = read_xml xparser in + if xparser.check_eof && pop xparser <> Xml_lexer.Eof then raise (Internal_error EOFExpected); + Xml_lexer.close (); + x + with any -> + Xml_lexer.close (); + raise (!xml_error (error_of_exn xparser any) xparser.source) let parse p = do_parse p @@ -194,17 +193,17 @@ let error_msg = function | AttributeValueExpected -> "Attribute value expected" | EndOfTagExpected tag -> sprintf "End of tag expected : '%s'" tag | EOFExpected -> "End of file expected" - | Empty -> "Empty" + | Empty -> "Empty" let error (msg,pos) = if pos.emin = pos.emax then sprintf "%s line %d character %d" (error_msg msg) pos.eline (pos.emin - pos.eline_start) else sprintf "%s line %d characters %d-%d" (error_msg msg) pos.eline (pos.emin - pos.eline_start) (pos.emax - pos.eline_start) - + let line e = e.eline -let range e = +let range e = e.emin - e.eline_start , e.emax - e.eline_start let abs_range e = @@ -219,7 +218,7 @@ let pos source = emax = max; } -let () = _raises (fun x p -> +let () = _raises (fun x p -> (* local cast : Xml.error_msg -> error_msg *) Error (x, pos p)) (fun f -> File_not_found f) diff --git a/lib/xml_printer.ml b/lib/xml_printer.ml index 001b48ed9..af328acbd 100644 --- a/lib/xml_printer.ml +++ b/lib/xml_printer.ml @@ -6,9 +6,9 @@ (* * GNU Lesser General Public License Version 2.1 *) (************************************************************************) -type xml = Xml_datatype.xml = - | Element of (string * (string * string) list * xml list) - | PCData of string +open Xml_datatype + +type xml = Xml_datatype.xml type target = TChannel of out_channel | TBuffer of Buffer.t @@ -17,60 +17,60 @@ type t = target let make x = x let buffer_pcdata tmp text = - let l = String.length text in - for p = 0 to l-1 do - match text.[p] with - | '>' -> Buffer.add_string tmp ">" - | '<' -> Buffer.add_string tmp "<" - | '&' -> - if p < l-1 && text.[p+1] = '#' then - Buffer.add_char tmp '&' - else - Buffer.add_string tmp "&" - | '\'' -> Buffer.add_string tmp "'" - | '"' -> Buffer.add_string tmp """ - | c -> Buffer.add_char tmp c - done + let l = String.length text in + for p = 0 to l-1 do + match text.[p] with + | '>' -> Buffer.add_string tmp ">" + | '<' -> Buffer.add_string tmp "<" + | '&' -> + if p < l-1 && text.[p+1] = '#' then + Buffer.add_char tmp '&' + else + Buffer.add_string tmp "&" + | '\'' -> Buffer.add_string tmp "'" + | '"' -> Buffer.add_string tmp """ + | c -> Buffer.add_char tmp c + done let buffer_attr tmp (n,v) = - Buffer.add_char tmp ' '; - Buffer.add_string tmp n; - Buffer.add_string tmp "=\""; - let l = String.length v in - for p = 0 to l-1 do - match v.[p] with - | '\\' -> Buffer.add_string tmp "\\\\" - | '"' -> Buffer.add_string tmp "\\\"" - | c -> Buffer.add_char tmp c - done; - Buffer.add_char tmp '"' + Buffer.add_char tmp ' '; + Buffer.add_string tmp n; + Buffer.add_string tmp "=\""; + let l = String.length v in + for p = 0 to l-1 do + match v.[p] with + | '\\' -> Buffer.add_string tmp "\\\\" + | '"' -> Buffer.add_string tmp "\\\"" + | c -> Buffer.add_char tmp c + done; + Buffer.add_char tmp '"' -let to_buffer tmp x = - let pcdata = ref false in - let rec loop = function - | Element (tag,alist,[]) -> - Buffer.add_char tmp '<'; - Buffer.add_string tmp tag; - List.iter (buffer_attr tmp) alist; - Buffer.add_string tmp "/>"; - pcdata := false; - | Element (tag,alist,l) -> - Buffer.add_char tmp '<'; - Buffer.add_string tmp tag; - List.iter (buffer_attr tmp) alist; - Buffer.add_char tmp '>'; - pcdata := false; - List.iter loop l; - Buffer.add_string tmp "</"; - Buffer.add_string tmp tag; - Buffer.add_char tmp '>'; - pcdata := false; - | PCData text -> - if !pcdata then Buffer.add_char tmp ' '; - buffer_pcdata tmp text; - pcdata := true; - in - loop x +let to_buffer tmp x = + let pcdata = ref false in + let rec loop = function + | Element (tag,alist,[]) -> + Buffer.add_char tmp '<'; + Buffer.add_string tmp tag; + List.iter (buffer_attr tmp) alist; + Buffer.add_string tmp "/>"; + pcdata := false; + | Element (tag,alist,l) -> + Buffer.add_char tmp '<'; + Buffer.add_string tmp tag; + List.iter (buffer_attr tmp) alist; + Buffer.add_char tmp '>'; + pcdata := false; + List.iter loop l; + Buffer.add_string tmp "</"; + Buffer.add_string tmp tag; + Buffer.add_char tmp '>'; + pcdata := false; + | PCData text -> + if !pcdata then Buffer.add_char tmp ' '; + buffer_pcdata tmp text; + pcdata := true; + in + loop x let to_string x = let b = Buffer.create 200 in @@ -79,43 +79,43 @@ let to_string x = let to_string_fmt x = let tmp = Buffer.create 200 in - let rec loop ?(newl=false) tab = function - | Element (tag,alist,[]) -> - Buffer.add_string tmp tab; - Buffer.add_char tmp '<'; - Buffer.add_string tmp tag; - List.iter (buffer_attr tmp) alist; - Buffer.add_string tmp "/>"; - if newl then Buffer.add_char tmp '\n'; - | Element (tag,alist,[PCData text]) -> - Buffer.add_string tmp tab; - Buffer.add_char tmp '<'; - Buffer.add_string tmp tag; - List.iter (buffer_attr tmp) alist; - Buffer.add_string tmp ">"; - buffer_pcdata tmp text; - Buffer.add_string tmp "</"; - Buffer.add_string tmp tag; - Buffer.add_char tmp '>'; - if newl then Buffer.add_char tmp '\n'; - | Element (tag,alist,l) -> - Buffer.add_string tmp tab; - Buffer.add_char tmp '<'; - Buffer.add_string tmp tag; - List.iter (buffer_attr tmp) alist; - Buffer.add_string tmp ">\n"; - List.iter (loop ~newl:true (tab^" ")) l; - Buffer.add_string tmp tab; - Buffer.add_string tmp "</"; - Buffer.add_string tmp tag; - Buffer.add_char tmp '>'; - if newl then Buffer.add_char tmp '\n'; - | PCData text -> - buffer_pcdata tmp text; - if newl then Buffer.add_char tmp '\n'; - in - loop "" x; - Buffer.contents tmp + let rec loop ?(newl=false) tab = function + | Element (tag,alist,[]) -> + Buffer.add_string tmp tab; + Buffer.add_char tmp '<'; + Buffer.add_string tmp tag; + List.iter (buffer_attr tmp) alist; + Buffer.add_string tmp "/>"; + if newl then Buffer.add_char tmp '\n'; + | Element (tag,alist,[PCData text]) -> + Buffer.add_string tmp tab; + Buffer.add_char tmp '<'; + Buffer.add_string tmp tag; + List.iter (buffer_attr tmp) alist; + Buffer.add_string tmp ">"; + buffer_pcdata tmp text; + Buffer.add_string tmp "</"; + Buffer.add_string tmp tag; + Buffer.add_char tmp '>'; + if newl then Buffer.add_char tmp '\n'; + | Element (tag,alist,l) -> + Buffer.add_string tmp tab; + Buffer.add_char tmp '<'; + Buffer.add_string tmp tag; + List.iter (buffer_attr tmp) alist; + Buffer.add_string tmp ">\n"; + List.iter (loop ~newl:true (tab^" ")) l; + Buffer.add_string tmp tab; + Buffer.add_string tmp "</"; + Buffer.add_string tmp tag; + Buffer.add_char tmp '>'; + if newl then Buffer.add_char tmp '\n'; + | PCData text -> + buffer_pcdata tmp text; + if newl then Buffer.add_char tmp '\n'; + in + loop "" x; + Buffer.contents tmp let print t xml = let tmp, flush = match t with |