summaryrefslogtreecommitdiff
path: root/lib/xml_parser.ml
diff options
context:
space:
mode:
Diffstat (limited to 'lib/xml_parser.ml')
-rw-r--r--lib/xml_parser.ml299
1 files changed, 147 insertions, 152 deletions
diff --git a/lib/xml_parser.ml b/lib/xml_parser.ml
index 600796f7..8db3f9e8 100644
--- a/lib/xml_parser.ml
+++ b/lib/xml_parser.ml
@@ -19,30 +19,29 @@
*)
open Printf
+open Xml_datatype
-type xml =
- | Element of (string * (string * string) list * xml list)
- | PCData of string
+type xml = Xml_datatype.xml
type error_pos = {
- eline : int;
- eline_start : int;
- emin : int;
- emax : int;
+ eline : int;
+ eline_start : int;
+ emin : int;
+ emax : int;
}
type error_msg =
- | UnterminatedComment
- | UnterminatedString
- | UnterminatedEntity
- | IdentExpected
- | CloseExpected
- | NodeExpected
- | AttributeNameExpected
- | AttributeValueExpected
- | EndOfTagExpected of string
- | EOFExpected
- | Empty
+ | UnterminatedComment
+ | UnterminatedString
+ | UnterminatedEntity
+ | IdentExpected
+ | CloseExpected
+ | NodeExpected
+ | AttributeNameExpected
+ | AttributeValueExpected
+ | EndOfTagExpected of string
+ | EOFExpected
+ | Empty
type error = error_msg * error_pos
@@ -51,21 +50,16 @@ exception Error of error
exception File_not_found of string
type t = {
- mutable check_eof : bool;
- mutable concat_pcdata : bool;
+ mutable check_eof : bool;
+ mutable concat_pcdata : bool;
+ source : Lexing.lexbuf;
+ stack : Xml_lexer.token Stack.t;
}
-type source =
- | SFile of string
- | SChannel of in_channel
- | SString of string
- | SLexbuf of Lexing.lexbuf
-
-type state = {
- source : Lexing.lexbuf;
- stack : Xml_lexer.token Stack.t;
- xparser : t;
-}
+type source =
+ | SChannel of in_channel
+ | SString of string
+ | SLexbuf of Lexing.lexbuf
exception Internal_error of error_msg
exception NoMoreData
@@ -86,152 +80,153 @@ let is_blank s =
!i = len
let _raises e f =
- xml_error := e;
- file_not_found := f
-
-let make () =
- {
- check_eof = true;
- concat_pcdata = true;
- }
+ xml_error := e;
+ file_not_found := f
+
+let make source =
+ let source = match source with
+ | SChannel chan -> Lexing.from_channel chan
+ | SString s -> Lexing.from_string s
+ | SLexbuf lexbuf -> lexbuf
+ in
+ let () = Xml_lexer.init source in
+ {
+ check_eof = false;
+ concat_pcdata = true;
+ source = source;
+ stack = Stack.create ();
+ }
let check_eof p v = p.check_eof <- v
-let concat_pcdata p v = p.concat_pcdata <- v
let pop s =
- try
- Stack.pop s.stack
- with
- Stack.Empty ->
- Xml_lexer.token s.source
+ try
+ Stack.pop s.stack
+ with
+ Stack.Empty ->
+ Xml_lexer.token s.source
let push t s =
- Stack.push t s.stack
+ Stack.push t s.stack
let canonicalize l =
let has_elt = List.exists (function Element _ -> true | _ -> false) l in
if has_elt then List.filter (function PCData s -> not (is_blank s) | _ -> true) l
else l
-let rec read_node s =
- match pop s with
- | Xml_lexer.PCData s -> PCData s
- | Xml_lexer.Tag (tag, attr, true) -> Element (tag, attr, [])
- | Xml_lexer.Tag (tag, attr, false) ->
- let elements = read_elems tag s in
- Element (tag, attr, canonicalize elements)
- | t ->
- push t s;
- raise NoMoreData
-and
- read_elems tag s =
- let elems = ref [] in
- (try
- while true do
- let node = read_node s in
- match node, !elems with
- | PCData c , (PCData c2) :: q ->
- elems := PCData (c2 ^ c) :: q
- | _, l ->
- elems := node :: l
- done
- with
- NoMoreData -> ());
- match pop s with
- | Xml_lexer.Endtag s when s = tag -> List.rev !elems
- | t -> raise (Internal_error (EndOfTagExpected tag))
-
-let rec read_xml s =
- let node = read_node s in
- match node with
- | Element _ -> node
- | PCData c ->
- if is_blank c then read_xml s
- else raise (Xml_lexer.Error Xml_lexer.ENodeExpected)
+let rec read_xml do_not_canonicalize s =
+ let rec read_node s =
+ match pop s with
+ | Xml_lexer.PCData s -> PCData s
+ | Xml_lexer.Tag (tag, attr, true) -> Element (tag, attr, [])
+ | Xml_lexer.Tag (tag, attr, false) ->
+ let elements = read_elems tag s in
+ let elements =
+ if do_not_canonicalize then elements else canonicalize elements
+ in
+ Element (tag, attr, elements)
+ | t ->
+ push t s;
+ raise NoMoreData
+
+ and read_elems tag s =
+ let elems = ref [] in
+ (try
+ while true do
+ let node = read_node s in
+ match node, !elems with
+ | PCData c , (PCData c2) :: q ->
+ elems := PCData (c2 ^ c) :: q
+ | _, l ->
+ elems := node :: l
+ done
+ with
+ NoMoreData -> ());
+ match pop s with
+ | Xml_lexer.Endtag s when s = tag -> List.rev !elems
+ | t -> raise (Internal_error (EndOfTagExpected tag))
+ in
+ match read_node s with
+ | (Element _) as node ->
+ node
+ | PCData c ->
+ if is_blank c then
+ read_xml do_not_canonicalize s
+ else
+ raise (Xml_lexer.Error Xml_lexer.ENodeExpected)
let convert = function
- | Xml_lexer.EUnterminatedComment -> UnterminatedComment
- | Xml_lexer.EUnterminatedString -> UnterminatedString
- | Xml_lexer.EIdentExpected -> IdentExpected
- | Xml_lexer.ECloseExpected -> CloseExpected
- | Xml_lexer.ENodeExpected -> NodeExpected
- | Xml_lexer.EAttributeNameExpected -> AttributeNameExpected
- | Xml_lexer.EAttributeValueExpected -> AttributeValueExpected
- | Xml_lexer.EUnterminatedEntity -> UnterminatedEntity
-
-let error_of_exn stk = function
- | NoMoreData when Stack.pop stk = Xml_lexer.Eof -> Empty
+ | Xml_lexer.EUnterminatedComment -> UnterminatedComment
+ | Xml_lexer.EUnterminatedString -> UnterminatedString
+ | Xml_lexer.EIdentExpected -> IdentExpected
+ | Xml_lexer.ECloseExpected -> CloseExpected
+ | Xml_lexer.ENodeExpected -> NodeExpected
+ | Xml_lexer.EAttributeNameExpected -> AttributeNameExpected
+ | Xml_lexer.EAttributeValueExpected -> AttributeValueExpected
+ | Xml_lexer.EUnterminatedEntity -> UnterminatedEntity
+
+let error_of_exn xparser = function
+ | NoMoreData when pop xparser = Xml_lexer.Eof -> Empty
| NoMoreData -> NodeExpected
| Internal_error e -> e
| Xml_lexer.Error e -> convert e
- | e -> raise e
-
-let do_parse xparser source =
- let stk = Stack.create() in
- try
- Xml_lexer.init source;
- let s = { source = source; xparser = xparser; stack = stk } in
- let x = read_xml s in
- if xparser.check_eof && pop s <> Xml_lexer.Eof then raise (Internal_error EOFExpected);
- Xml_lexer.close source;
- x
- with e when e <> Sys.Break ->
- Xml_lexer.close source;
- raise (!xml_error (error_of_exn stk e) source)
-
-let parse p = function
- | SChannel ch -> do_parse p (Lexing.from_channel ch)
- | SString str -> do_parse p (Lexing.from_string str)
- | SLexbuf lex -> do_parse p lex
- | SFile fname ->
- let ch = (try open_in fname with Sys_error _ -> raise (!file_not_found fname)) in
- try
- let x = do_parse p (Lexing.from_channel ch) in
- close_in ch;
- x
- with
- reraise ->
- close_in ch;
- raise reraise
-
+ | e ->
+ (*let e = Errors.push e in: We do not record backtrace here. *)
+ raise e
+
+let do_parse do_not_canonicalize xparser =
+ try
+ Xml_lexer.init xparser.source;
+ let x = read_xml do_not_canonicalize xparser in
+ if xparser.check_eof && pop xparser <> Xml_lexer.Eof then raise (Internal_error EOFExpected);
+ Xml_lexer.close ();
+ x
+ with any ->
+ Xml_lexer.close ();
+ raise (!xml_error (error_of_exn xparser any) xparser.source)
+
+let parse ?(do_not_canonicalize=false) p =
+ do_parse do_not_canonicalize p
let error_msg = function
- | UnterminatedComment -> "Unterminated comment"
- | UnterminatedString -> "Unterminated string"
- | UnterminatedEntity -> "Unterminated entity"
- | IdentExpected -> "Ident expected"
- | CloseExpected -> "Element close expected"
- | NodeExpected -> "Xml node expected"
- | AttributeNameExpected -> "Attribute name expected"
- | AttributeValueExpected -> "Attribute value expected"
- | EndOfTagExpected tag -> sprintf "End of tag expected : '%s'" tag
- | EOFExpected -> "End of file expected"
- | Empty -> "Empty"
+ | UnterminatedComment -> "Unterminated comment"
+ | UnterminatedString -> "Unterminated string"
+ | UnterminatedEntity -> "Unterminated entity"
+ | IdentExpected -> "Ident expected"
+ | CloseExpected -> "Element close expected"
+ | NodeExpected -> "Xml node expected"
+ | AttributeNameExpected -> "Attribute name expected"
+ | AttributeValueExpected -> "Attribute value expected"
+ | EndOfTagExpected tag -> sprintf "End of tag expected : '%s'" tag
+ | EOFExpected -> "End of file expected"
+ | Empty -> "Empty"
let error (msg,pos) =
- if pos.emin = pos.emax then
- sprintf "%s line %d character %d" (error_msg msg) pos.eline (pos.emin - pos.eline_start)
- else
- sprintf "%s line %d characters %d-%d" (error_msg msg) pos.eline (pos.emin - pos.eline_start) (pos.emax - pos.eline_start)
-
+ if pos.emin = pos.emax then
+ sprintf "%s line %d character %d" (error_msg msg) pos.eline
+ (pos.emin - pos.eline_start)
+ else
+ sprintf "%s line %d characters %d-%d" (error_msg msg) pos.eline
+ (pos.emin - pos.eline_start) (pos.emax - pos.eline_start)
+
let line e = e.eline
-let range e =
- e.emin - e.eline_start , e.emax - e.eline_start
+let range e =
+ e.emin - e.eline_start , e.emax - e.eline_start
let abs_range e =
- e.emin , e.emax
+ e.emin , e.emax
let pos source =
- let line, lstart, min, max = Xml_lexer.pos source in
- {
- eline = line;
- eline_start = lstart;
- emin = min;
- emax = max;
- }
-
-let () = _raises (fun x p ->
+ let line, lstart, min, max = Xml_lexer.pos source in
+ {
+ eline = line;
+ eline_start = lstart;
+ emin = min;
+ emax = max;
+ }
+
+let () = _raises (fun x p ->
(* local cast : Xml.error_msg -> error_msg *)
- Error (x, pos p))
- (fun f -> File_not_found f)
+ Error (x, pos p))
+ (fun f -> File_not_found f)