aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Regis-Gianas <yrg@pps.univ-paris-diderot.fr>2014-10-30 22:13:21 +0100
committerGravatar Regis-Gianas <yrg@pps.univ-paris-diderot.fr>2014-11-04 22:51:35 +0100
commit812c611bdc8532b7cd25d9368a8356be3eb1d34a (patch)
tree14c49e92d23052afcb4a802dd531d2bdf23f2118
parentd21183a81fb73cd20ace5213cfe46c82250db38b (diff)
Xml_datatype.gxml: New type for semi-structured documents.
Xml_parser, Xml_printer: Use it.
-rw-r--r--lib/xml_datatype.mli13
-rw-r--r--lib/xml_parser.ml93
-rw-r--r--lib/xml_printer.ml182
3 files changed, 147 insertions, 141 deletions
diff --git a/lib/xml_datatype.mli b/lib/xml_datatype.mli
index 7b18c73bf..3d0e897ac 100644
--- a/lib/xml_datatype.mli
+++ b/lib/xml_datatype.mli
@@ -6,7 +6,14 @@
(* * GNU Lesser General Public License Version 2.1 *)
(************************************************************************)
-type xml =
- | Element of (string * (string * string) list * xml list)
- | PCData of string
+(** ['a gxml] is the type for semi-structured documents. They generalize
+ XML by allowing any kind of attributes. *)
+type 'a gxml =
+ | Element of (string * 'a * 'a gxml list)
+ | PCData of string
+
+(** [xml] is a semi-structured documents where attributes are association
+ lists from string to string. *)
+type xml = (string * string) list gxml
+
diff --git a/lib/xml_parser.ml b/lib/xml_parser.ml
index b9990c75a..f3283c659 100644
--- a/lib/xml_parser.ml
+++ b/lib/xml_parser.ml
@@ -19,10 +19,9 @@
*)
open Printf
+open Xml_datatype
-type xml = Xml_datatype.xml =
- | Element of (string * (string * string) list * xml list)
- | PCData of string
+type xml = Xml_datatype.xml
type error_pos = {
eline : int;
@@ -81,8 +80,8 @@ let is_blank s =
!i = len
let _raises e f =
- xml_error := e;
- file_not_found := f
+ xml_error := e;
+ file_not_found := f
let make source =
let source = match source with
@@ -101,14 +100,14 @@ let make source =
let check_eof p v = p.check_eof <- v
let pop s =
- try
- Stack.pop s.stack
- with
- Stack.Empty ->
- Xml_lexer.token s.source
+ try
+ Stack.pop s.stack
+ with
+ Stack.Empty ->
+ Xml_lexer.token s.source
let push t s =
- Stack.push t s.stack
+ Stack.push t s.stack
let canonicalize l =
let has_elt = List.exists (function Element _ -> true | _ -> false) l in
@@ -116,19 +115,19 @@ let canonicalize l =
else l
let rec read_node s =
- match pop s with
- | Xml_lexer.PCData s -> PCData s
- | Xml_lexer.Tag (tag, attr, true) -> Element (tag, attr, [])
- | Xml_lexer.Tag (tag, attr, false) ->
+ match pop s with
+ | Xml_lexer.PCData s -> PCData s
+ | Xml_lexer.Tag (tag, attr, true) -> Element (tag, attr, [])
+ | Xml_lexer.Tag (tag, attr, false) ->
let elements = read_elems tag s in
Element (tag, attr, canonicalize elements)
- | t ->
- push t s;
- raise NoMoreData
+ | t ->
+ push t s;
+ raise NoMoreData
and
- read_elems tag s =
- let elems = ref [] in
- (try
+ read_elems tag s =
+ let elems = ref [] in
+ (try
while true do
let node = read_node s in
match node, !elems with
@@ -137,11 +136,11 @@ and
| _, l ->
elems := node :: l
done
- with
- NoMoreData -> ());
- match pop s with
- | Xml_lexer.Endtag s when s = tag -> List.rev !elems
- | t -> raise (Internal_error (EndOfTagExpected tag))
+ with
+ NoMoreData -> ());
+ match pop s with
+ | Xml_lexer.Endtag s when s = tag -> List.rev !elems
+ | t -> raise (Internal_error (EndOfTagExpected tag))
let rec read_xml s =
let node = read_node s in
@@ -152,14 +151,14 @@ let rec read_xml s =
else raise (Xml_lexer.Error Xml_lexer.ENodeExpected)
let convert = function
- | Xml_lexer.EUnterminatedComment -> UnterminatedComment
- | Xml_lexer.EUnterminatedString -> UnterminatedString
- | Xml_lexer.EIdentExpected -> IdentExpected
- | Xml_lexer.ECloseExpected -> CloseExpected
- | Xml_lexer.ENodeExpected -> NodeExpected
- | Xml_lexer.EAttributeNameExpected -> AttributeNameExpected
- | Xml_lexer.EAttributeValueExpected -> AttributeValueExpected
- | Xml_lexer.EUnterminatedEntity -> UnterminatedEntity
+ | Xml_lexer.EUnterminatedComment -> UnterminatedComment
+ | Xml_lexer.EUnterminatedString -> UnterminatedString
+ | Xml_lexer.EIdentExpected -> IdentExpected
+ | Xml_lexer.ECloseExpected -> CloseExpected
+ | Xml_lexer.ENodeExpected -> NodeExpected
+ | Xml_lexer.EAttributeNameExpected -> AttributeNameExpected
+ | Xml_lexer.EAttributeValueExpected -> AttributeValueExpected
+ | Xml_lexer.EUnterminatedEntity -> UnterminatedEntity
let error_of_exn xparser = function
| NoMoreData when pop xparser = Xml_lexer.Eof -> Empty
@@ -171,15 +170,15 @@ let error_of_exn xparser = function
raise e
let do_parse xparser =
- try
- Xml_lexer.init xparser.source;
- let x = read_xml xparser in
- if xparser.check_eof && pop xparser <> Xml_lexer.Eof then raise (Internal_error EOFExpected);
- Xml_lexer.close ();
- x
- with any ->
- Xml_lexer.close ();
- raise (!xml_error (error_of_exn xparser any) xparser.source)
+ try
+ Xml_lexer.init xparser.source;
+ let x = read_xml xparser in
+ if xparser.check_eof && pop xparser <> Xml_lexer.Eof then raise (Internal_error EOFExpected);
+ Xml_lexer.close ();
+ x
+ with any ->
+ Xml_lexer.close ();
+ raise (!xml_error (error_of_exn xparser any) xparser.source)
let parse p = do_parse p
@@ -194,17 +193,17 @@ let error_msg = function
| AttributeValueExpected -> "Attribute value expected"
| EndOfTagExpected tag -> sprintf "End of tag expected : '%s'" tag
| EOFExpected -> "End of file expected"
- | Empty -> "Empty"
+ | Empty -> "Empty"
let error (msg,pos) =
if pos.emin = pos.emax then
sprintf "%s line %d character %d" (error_msg msg) pos.eline (pos.emin - pos.eline_start)
else
sprintf "%s line %d characters %d-%d" (error_msg msg) pos.eline (pos.emin - pos.eline_start) (pos.emax - pos.eline_start)
-
+
let line e = e.eline
-let range e =
+let range e =
e.emin - e.eline_start , e.emax - e.eline_start
let abs_range e =
@@ -219,7 +218,7 @@ let pos source =
emax = max;
}
-let () = _raises (fun x p ->
+let () = _raises (fun x p ->
(* local cast : Xml.error_msg -> error_msg *)
Error (x, pos p))
(fun f -> File_not_found f)
diff --git a/lib/xml_printer.ml b/lib/xml_printer.ml
index 001b48ed9..af328acbd 100644
--- a/lib/xml_printer.ml
+++ b/lib/xml_printer.ml
@@ -6,9 +6,9 @@
(* * GNU Lesser General Public License Version 2.1 *)
(************************************************************************)
-type xml = Xml_datatype.xml =
- | Element of (string * (string * string) list * xml list)
- | PCData of string
+open Xml_datatype
+
+type xml = Xml_datatype.xml
type target = TChannel of out_channel | TBuffer of Buffer.t
@@ -17,60 +17,60 @@ type t = target
let make x = x
let buffer_pcdata tmp text =
- let l = String.length text in
- for p = 0 to l-1 do
- match text.[p] with
- | '>' -> Buffer.add_string tmp "&gt;"
- | '<' -> Buffer.add_string tmp "&lt;"
- | '&' ->
- if p < l-1 && text.[p+1] = '#' then
- Buffer.add_char tmp '&'
- else
- Buffer.add_string tmp "&amp;"
- | '\'' -> Buffer.add_string tmp "&apos;"
- | '"' -> Buffer.add_string tmp "&quot;"
- | c -> Buffer.add_char tmp c
- done
+ let l = String.length text in
+ for p = 0 to l-1 do
+ match text.[p] with
+ | '>' -> Buffer.add_string tmp "&gt;"
+ | '<' -> Buffer.add_string tmp "&lt;"
+ | '&' ->
+ if p < l-1 && text.[p+1] = '#' then
+ Buffer.add_char tmp '&'
+ else
+ Buffer.add_string tmp "&amp;"
+ | '\'' -> Buffer.add_string tmp "&apos;"
+ | '"' -> Buffer.add_string tmp "&quot;"
+ | c -> Buffer.add_char tmp c
+ done
let buffer_attr tmp (n,v) =
- Buffer.add_char tmp ' ';
- Buffer.add_string tmp n;
- Buffer.add_string tmp "=\"";
- let l = String.length v in
- for p = 0 to l-1 do
- match v.[p] with
- | '\\' -> Buffer.add_string tmp "\\\\"
- | '"' -> Buffer.add_string tmp "\\\""
- | c -> Buffer.add_char tmp c
- done;
- Buffer.add_char tmp '"'
+ Buffer.add_char tmp ' ';
+ Buffer.add_string tmp n;
+ Buffer.add_string tmp "=\"";
+ let l = String.length v in
+ for p = 0 to l-1 do
+ match v.[p] with
+ | '\\' -> Buffer.add_string tmp "\\\\"
+ | '"' -> Buffer.add_string tmp "\\\""
+ | c -> Buffer.add_char tmp c
+ done;
+ Buffer.add_char tmp '"'
-let to_buffer tmp x =
- let pcdata = ref false in
- let rec loop = function
- | Element (tag,alist,[]) ->
- Buffer.add_char tmp '<';
- Buffer.add_string tmp tag;
- List.iter (buffer_attr tmp) alist;
- Buffer.add_string tmp "/>";
- pcdata := false;
- | Element (tag,alist,l) ->
- Buffer.add_char tmp '<';
- Buffer.add_string tmp tag;
- List.iter (buffer_attr tmp) alist;
- Buffer.add_char tmp '>';
- pcdata := false;
- List.iter loop l;
- Buffer.add_string tmp "</";
- Buffer.add_string tmp tag;
- Buffer.add_char tmp '>';
- pcdata := false;
- | PCData text ->
- if !pcdata then Buffer.add_char tmp ' ';
- buffer_pcdata tmp text;
- pcdata := true;
- in
- loop x
+let to_buffer tmp x =
+ let pcdata = ref false in
+ let rec loop = function
+ | Element (tag,alist,[]) ->
+ Buffer.add_char tmp '<';
+ Buffer.add_string tmp tag;
+ List.iter (buffer_attr tmp) alist;
+ Buffer.add_string tmp "/>";
+ pcdata := false;
+ | Element (tag,alist,l) ->
+ Buffer.add_char tmp '<';
+ Buffer.add_string tmp tag;
+ List.iter (buffer_attr tmp) alist;
+ Buffer.add_char tmp '>';
+ pcdata := false;
+ List.iter loop l;
+ Buffer.add_string tmp "</";
+ Buffer.add_string tmp tag;
+ Buffer.add_char tmp '>';
+ pcdata := false;
+ | PCData text ->
+ if !pcdata then Buffer.add_char tmp ' ';
+ buffer_pcdata tmp text;
+ pcdata := true;
+ in
+ loop x
let to_string x =
let b = Buffer.create 200 in
@@ -79,43 +79,43 @@ let to_string x =
let to_string_fmt x =
let tmp = Buffer.create 200 in
- let rec loop ?(newl=false) tab = function
- | Element (tag,alist,[]) ->
- Buffer.add_string tmp tab;
- Buffer.add_char tmp '<';
- Buffer.add_string tmp tag;
- List.iter (buffer_attr tmp) alist;
- Buffer.add_string tmp "/>";
- if newl then Buffer.add_char tmp '\n';
- | Element (tag,alist,[PCData text]) ->
- Buffer.add_string tmp tab;
- Buffer.add_char tmp '<';
- Buffer.add_string tmp tag;
- List.iter (buffer_attr tmp) alist;
- Buffer.add_string tmp ">";
- buffer_pcdata tmp text;
- Buffer.add_string tmp "</";
- Buffer.add_string tmp tag;
- Buffer.add_char tmp '>';
- if newl then Buffer.add_char tmp '\n';
- | Element (tag,alist,l) ->
- Buffer.add_string tmp tab;
- Buffer.add_char tmp '<';
- Buffer.add_string tmp tag;
- List.iter (buffer_attr tmp) alist;
- Buffer.add_string tmp ">\n";
- List.iter (loop ~newl:true (tab^" ")) l;
- Buffer.add_string tmp tab;
- Buffer.add_string tmp "</";
- Buffer.add_string tmp tag;
- Buffer.add_char tmp '>';
- if newl then Buffer.add_char tmp '\n';
- | PCData text ->
- buffer_pcdata tmp text;
- if newl then Buffer.add_char tmp '\n';
- in
- loop "" x;
- Buffer.contents tmp
+ let rec loop ?(newl=false) tab = function
+ | Element (tag,alist,[]) ->
+ Buffer.add_string tmp tab;
+ Buffer.add_char tmp '<';
+ Buffer.add_string tmp tag;
+ List.iter (buffer_attr tmp) alist;
+ Buffer.add_string tmp "/>";
+ if newl then Buffer.add_char tmp '\n';
+ | Element (tag,alist,[PCData text]) ->
+ Buffer.add_string tmp tab;
+ Buffer.add_char tmp '<';
+ Buffer.add_string tmp tag;
+ List.iter (buffer_attr tmp) alist;
+ Buffer.add_string tmp ">";
+ buffer_pcdata tmp text;
+ Buffer.add_string tmp "</";
+ Buffer.add_string tmp tag;
+ Buffer.add_char tmp '>';
+ if newl then Buffer.add_char tmp '\n';
+ | Element (tag,alist,l) ->
+ Buffer.add_string tmp tab;
+ Buffer.add_char tmp '<';
+ Buffer.add_string tmp tag;
+ List.iter (buffer_attr tmp) alist;
+ Buffer.add_string tmp ">\n";
+ List.iter (loop ~newl:true (tab^" ")) l;
+ Buffer.add_string tmp tab;
+ Buffer.add_string tmp "</";
+ Buffer.add_string tmp tag;
+ Buffer.add_char tmp '>';
+ if newl then Buffer.add_char tmp '\n';
+ | PCData text ->
+ buffer_pcdata tmp text;
+ if newl then Buffer.add_char tmp '\n';
+ in
+ loop "" x;
+ Buffer.contents tmp
let print t xml =
let tmp, flush = match t with