aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--lib/xml_parser.ml79
1 files changed, 43 insertions, 36 deletions
diff --git a/lib/xml_parser.ml b/lib/xml_parser.ml
index 2809296e1..8db3f9e8b 100644
--- a/lib/xml_parser.ml
+++ b/lib/xml_parser.ml
@@ -114,40 +114,46 @@ let canonicalize l =
if has_elt then List.filter (function PCData s -> not (is_blank s) | _ -> true) l
else l
-let rec read_node s =
- match pop s with
- | Xml_lexer.PCData s -> PCData s
- | Xml_lexer.Tag (tag, attr, true) -> Element (tag, attr, [])
- | Xml_lexer.Tag (tag, attr, false) ->
- let elements = read_elems tag s in
- Element (tag, attr, canonicalize elements)
- | t ->
- push t s;
- raise NoMoreData
-and read_elems tag s =
- let elems = ref [] in
- (try
- while true do
- let node = read_node s in
- match node, !elems with
- | PCData c , (PCData c2) :: q ->
- elems := PCData (c2 ^ c) :: q
- | _, l ->
- elems := node :: l
- done
- with
- NoMoreData -> ());
- match pop s with
- | Xml_lexer.Endtag s when s = tag -> List.rev !elems
- | t -> raise (Internal_error (EndOfTagExpected tag))
-
-let rec read_xml s =
- let node = read_node s in
- match node with
- | Element _ -> node
+let rec read_xml do_not_canonicalize s =
+ let rec read_node s =
+ match pop s with
+ | Xml_lexer.PCData s -> PCData s
+ | Xml_lexer.Tag (tag, attr, true) -> Element (tag, attr, [])
+ | Xml_lexer.Tag (tag, attr, false) ->
+ let elements = read_elems tag s in
+ let elements =
+ if do_not_canonicalize then elements else canonicalize elements
+ in
+ Element (tag, attr, elements)
+ | t ->
+ push t s;
+ raise NoMoreData
+
+ and read_elems tag s =
+ let elems = ref [] in
+ (try
+ while true do
+ let node = read_node s in
+ match node, !elems with
+ | PCData c , (PCData c2) :: q ->
+ elems := PCData (c2 ^ c) :: q
+ | _, l ->
+ elems := node :: l
+ done
+ with
+ NoMoreData -> ());
+ match pop s with
+ | Xml_lexer.Endtag s when s = tag -> List.rev !elems
+ | t -> raise (Internal_error (EndOfTagExpected tag))
+ in
+ match read_node s with
+ | (Element _) as node ->
+ node
| PCData c ->
- if is_blank c then read_xml s
- else raise (Xml_lexer.Error Xml_lexer.ENodeExpected)
+ if is_blank c then
+ read_xml do_not_canonicalize s
+ else
+ raise (Xml_lexer.Error Xml_lexer.ENodeExpected)
let convert = function
| Xml_lexer.EUnterminatedComment -> UnterminatedComment
@@ -168,10 +174,10 @@ let error_of_exn xparser = function
(*let e = Errors.push e in: We do not record backtrace here. *)
raise e
-let do_parse xparser =
+let do_parse do_not_canonicalize xparser =
try
Xml_lexer.init xparser.source;
- let x = read_xml xparser in
+ let x = read_xml do_not_canonicalize xparser in
if xparser.check_eof && pop xparser <> Xml_lexer.Eof then raise (Internal_error EOFExpected);
Xml_lexer.close ();
x
@@ -179,7 +185,8 @@ let do_parse xparser =
Xml_lexer.close ();
raise (!xml_error (error_of_exn xparser any) xparser.source)
-let parse p = do_parse p
+let parse ?(do_not_canonicalize=false) p =
+ do_parse do_not_canonicalize p
let error_msg = function
| UnterminatedComment -> "Unterminated comment"