diff options
Diffstat (limited to 'lib/xml_parser.ml')
-rw-r--r-- | lib/xml_parser.ml | 299 |
1 files changed, 147 insertions, 152 deletions
diff --git a/lib/xml_parser.ml b/lib/xml_parser.ml index 600796f7..8db3f9e8 100644 --- a/lib/xml_parser.ml +++ b/lib/xml_parser.ml @@ -19,30 +19,29 @@ *) open Printf +open Xml_datatype -type xml = - | Element of (string * (string * string) list * xml list) - | PCData of string +type xml = Xml_datatype.xml type error_pos = { - eline : int; - eline_start : int; - emin : int; - emax : int; + eline : int; + eline_start : int; + emin : int; + emax : int; } type error_msg = - | UnterminatedComment - | UnterminatedString - | UnterminatedEntity - | IdentExpected - | CloseExpected - | NodeExpected - | AttributeNameExpected - | AttributeValueExpected - | EndOfTagExpected of string - | EOFExpected - | Empty + | UnterminatedComment + | UnterminatedString + | UnterminatedEntity + | IdentExpected + | CloseExpected + | NodeExpected + | AttributeNameExpected + | AttributeValueExpected + | EndOfTagExpected of string + | EOFExpected + | Empty type error = error_msg * error_pos @@ -51,21 +50,16 @@ exception Error of error exception File_not_found of string type t = { - mutable check_eof : bool; - mutable concat_pcdata : bool; + mutable check_eof : bool; + mutable concat_pcdata : bool; + source : Lexing.lexbuf; + stack : Xml_lexer.token Stack.t; } -type source = - | SFile of string - | SChannel of in_channel - | SString of string - | SLexbuf of Lexing.lexbuf - -type state = { - source : Lexing.lexbuf; - stack : Xml_lexer.token Stack.t; - xparser : t; -} +type source = + | SChannel of in_channel + | SString of string + | SLexbuf of Lexing.lexbuf exception Internal_error of error_msg exception NoMoreData @@ -86,152 +80,153 @@ let is_blank s = !i = len let _raises e f = - xml_error := e; - file_not_found := f - -let make () = - { - check_eof = true; - concat_pcdata = true; - } + xml_error := e; + file_not_found := f + +let make source = + let source = match source with + | SChannel chan -> Lexing.from_channel chan + | SString s -> Lexing.from_string s + | SLexbuf lexbuf -> lexbuf + in + let () = Xml_lexer.init source in + { + check_eof = false; + concat_pcdata = true; + source = source; + stack = Stack.create (); + } let check_eof p v = p.check_eof <- v -let concat_pcdata p v = p.concat_pcdata <- v let pop s = - try - Stack.pop s.stack - with - Stack.Empty -> - Xml_lexer.token s.source + try + Stack.pop s.stack + with + Stack.Empty -> + Xml_lexer.token s.source let push t s = - Stack.push t s.stack + Stack.push t s.stack let canonicalize l = let has_elt = List.exists (function Element _ -> true | _ -> false) l in if has_elt then List.filter (function PCData s -> not (is_blank s) | _ -> true) l else l -let rec read_node s = - match pop s with - | Xml_lexer.PCData s -> PCData s - | Xml_lexer.Tag (tag, attr, true) -> Element (tag, attr, []) - | Xml_lexer.Tag (tag, attr, false) -> - let elements = read_elems tag s in - Element (tag, attr, canonicalize elements) - | t -> - push t s; - raise NoMoreData -and - read_elems tag s = - let elems = ref [] in - (try - while true do - let node = read_node s in - match node, !elems with - | PCData c , (PCData c2) :: q -> - elems := PCData (c2 ^ c) :: q - | _, l -> - elems := node :: l - done - with - NoMoreData -> ()); - match pop s with - | Xml_lexer.Endtag s when s = tag -> List.rev !elems - | t -> raise (Internal_error (EndOfTagExpected tag)) - -let rec read_xml s = - let node = read_node s in - match node with - | Element _ -> node - | PCData c -> - if is_blank c then read_xml s - else raise (Xml_lexer.Error Xml_lexer.ENodeExpected) +let rec read_xml do_not_canonicalize s = + let rec read_node s = + match pop s with + | Xml_lexer.PCData s -> PCData s + | Xml_lexer.Tag (tag, attr, true) -> Element (tag, attr, []) + | Xml_lexer.Tag (tag, attr, false) -> + let elements = read_elems tag s in + let elements = + if do_not_canonicalize then elements else canonicalize elements + in + Element (tag, attr, elements) + | t -> + push t s; + raise NoMoreData + + and read_elems tag s = + let elems = ref [] in + (try + while true do + let node = read_node s in + match node, !elems with + | PCData c , (PCData c2) :: q -> + elems := PCData (c2 ^ c) :: q + | _, l -> + elems := node :: l + done + with + NoMoreData -> ()); + match pop s with + | Xml_lexer.Endtag s when s = tag -> List.rev !elems + | t -> raise (Internal_error (EndOfTagExpected tag)) + in + match read_node s with + | (Element _) as node -> + node + | PCData c -> + if is_blank c then + read_xml do_not_canonicalize s + else + raise (Xml_lexer.Error Xml_lexer.ENodeExpected) let convert = function - | Xml_lexer.EUnterminatedComment -> UnterminatedComment - | Xml_lexer.EUnterminatedString -> UnterminatedString - | Xml_lexer.EIdentExpected -> IdentExpected - | Xml_lexer.ECloseExpected -> CloseExpected - | Xml_lexer.ENodeExpected -> NodeExpected - | Xml_lexer.EAttributeNameExpected -> AttributeNameExpected - | Xml_lexer.EAttributeValueExpected -> AttributeValueExpected - | Xml_lexer.EUnterminatedEntity -> UnterminatedEntity - -let error_of_exn stk = function - | NoMoreData when Stack.pop stk = Xml_lexer.Eof -> Empty + | Xml_lexer.EUnterminatedComment -> UnterminatedComment + | Xml_lexer.EUnterminatedString -> UnterminatedString + | Xml_lexer.EIdentExpected -> IdentExpected + | Xml_lexer.ECloseExpected -> CloseExpected + | Xml_lexer.ENodeExpected -> NodeExpected + | Xml_lexer.EAttributeNameExpected -> AttributeNameExpected + | Xml_lexer.EAttributeValueExpected -> AttributeValueExpected + | Xml_lexer.EUnterminatedEntity -> UnterminatedEntity + +let error_of_exn xparser = function + | NoMoreData when pop xparser = Xml_lexer.Eof -> Empty | NoMoreData -> NodeExpected | Internal_error e -> e | Xml_lexer.Error e -> convert e - | e -> raise e - -let do_parse xparser source = - let stk = Stack.create() in - try - Xml_lexer.init source; - let s = { source = source; xparser = xparser; stack = stk } in - let x = read_xml s in - if xparser.check_eof && pop s <> Xml_lexer.Eof then raise (Internal_error EOFExpected); - Xml_lexer.close source; - x - with e when e <> Sys.Break -> - Xml_lexer.close source; - raise (!xml_error (error_of_exn stk e) source) - -let parse p = function - | SChannel ch -> do_parse p (Lexing.from_channel ch) - | SString str -> do_parse p (Lexing.from_string str) - | SLexbuf lex -> do_parse p lex - | SFile fname -> - let ch = (try open_in fname with Sys_error _ -> raise (!file_not_found fname)) in - try - let x = do_parse p (Lexing.from_channel ch) in - close_in ch; - x - with - reraise -> - close_in ch; - raise reraise - + | e -> + (*let e = Errors.push e in: We do not record backtrace here. *) + raise e + +let do_parse do_not_canonicalize xparser = + try + Xml_lexer.init xparser.source; + let x = read_xml do_not_canonicalize xparser in + if xparser.check_eof && pop xparser <> Xml_lexer.Eof then raise (Internal_error EOFExpected); + Xml_lexer.close (); + x + with any -> + Xml_lexer.close (); + raise (!xml_error (error_of_exn xparser any) xparser.source) + +let parse ?(do_not_canonicalize=false) p = + do_parse do_not_canonicalize p let error_msg = function - | UnterminatedComment -> "Unterminated comment" - | UnterminatedString -> "Unterminated string" - | UnterminatedEntity -> "Unterminated entity" - | IdentExpected -> "Ident expected" - | CloseExpected -> "Element close expected" - | NodeExpected -> "Xml node expected" - | AttributeNameExpected -> "Attribute name expected" - | AttributeValueExpected -> "Attribute value expected" - | EndOfTagExpected tag -> sprintf "End of tag expected : '%s'" tag - | EOFExpected -> "End of file expected" - | Empty -> "Empty" + | UnterminatedComment -> "Unterminated comment" + | UnterminatedString -> "Unterminated string" + | UnterminatedEntity -> "Unterminated entity" + | IdentExpected -> "Ident expected" + | CloseExpected -> "Element close expected" + | NodeExpected -> "Xml node expected" + | AttributeNameExpected -> "Attribute name expected" + | AttributeValueExpected -> "Attribute value expected" + | EndOfTagExpected tag -> sprintf "End of tag expected : '%s'" tag + | EOFExpected -> "End of file expected" + | Empty -> "Empty" let error (msg,pos) = - if pos.emin = pos.emax then - sprintf "%s line %d character %d" (error_msg msg) pos.eline (pos.emin - pos.eline_start) - else - sprintf "%s line %d characters %d-%d" (error_msg msg) pos.eline (pos.emin - pos.eline_start) (pos.emax - pos.eline_start) - + if pos.emin = pos.emax then + sprintf "%s line %d character %d" (error_msg msg) pos.eline + (pos.emin - pos.eline_start) + else + sprintf "%s line %d characters %d-%d" (error_msg msg) pos.eline + (pos.emin - pos.eline_start) (pos.emax - pos.eline_start) + let line e = e.eline -let range e = - e.emin - e.eline_start , e.emax - e.eline_start +let range e = + e.emin - e.eline_start , e.emax - e.eline_start let abs_range e = - e.emin , e.emax + e.emin , e.emax let pos source = - let line, lstart, min, max = Xml_lexer.pos source in - { - eline = line; - eline_start = lstart; - emin = min; - emax = max; - } - -let () = _raises (fun x p -> + let line, lstart, min, max = Xml_lexer.pos source in + { + eline = line; + eline_start = lstart; + emin = min; + emax = max; + } + +let () = _raises (fun x p -> (* local cast : Xml.error_msg -> error_msg *) - Error (x, pos p)) - (fun f -> File_not_found f) + Error (x, pos p)) + (fun f -> File_not_found f) |