From 2ed4b1e88e3e304c5146d74124d7057ac62c59a2 Mon Sep 17 00:00:00 2001 From: ppedrot Date: Thu, 24 Nov 2011 17:12:59 +0000 Subject: Fixed the XML parser CDATA handling (and changed the EOL convention of these files which where Windows-like, whoever knows why). git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/coq/trunk@14726 85f007b7-540e-0410-9357-904b9bb8a0f7 --- lib/xml_lexer.mll | 603 +++++++++++++++++++++++++++--------------------------- 1 file changed, 299 insertions(+), 304 deletions(-) (limited to 'lib/xml_lexer.mll') diff --git a/lib/xml_lexer.mll b/lib/xml_lexer.mll index 3e5a47ed9..2be4bf98b 100644 --- a/lib/xml_lexer.mll +++ b/lib/xml_lexer.mll @@ -1,304 +1,299 @@ -{(* - * Xml Light, an small Xml parser/printer with DTD support. - * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com) - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - *) - -open Lexing - -type error = - | EUnterminatedComment - | EUnterminatedString - | EIdentExpected - | ECloseExpected - | ENodeExpected - | EAttributeNameExpected - | EAttributeValueExpected - | EUnterminatedEntity - -exception Error of error - -type pos = int * int * int * int - -type token = - | Tag of string * (string * string) list * bool - | PCData of string - | Endtag of string - | Eof - -let last_pos = ref 0 -and current_line = ref 0 -and current_line_start = ref 0 - -let tmp = Buffer.create 200 - -let idents = Hashtbl.create 0 - -let _ = begin - Hashtbl.add idents "gt;" ">"; - Hashtbl.add idents "lt;" "<"; - Hashtbl.add idents "amp;" "&"; - Hashtbl.add idents "apos;" "'"; - Hashtbl.add idents "quot;" "\""; -end - -let init lexbuf = - current_line := 1; - current_line_start := lexeme_start lexbuf; - last_pos := !current_line_start - -let close lexbuf = - Buffer.reset tmp - -let pos lexbuf = - !current_line , !current_line_start , - !last_pos , - lexeme_start lexbuf - -let restore (cl,cls,lp,_) = - current_line := cl; - current_line_start := cls; - last_pos := lp - -let newline lexbuf = - incr current_line; - last_pos := lexeme_end lexbuf; - current_line_start := !last_pos - -let error lexbuf e = - last_pos := lexeme_start lexbuf; - raise (Error e) - -} - -let newline = ['\n'] -let break = ['\r'] -let space = [' ' '\t'] -let identchar = ['A'-'Z' 'a'-'z' '_' '0'-'9' ':' '-'] -let entitychar = ['A'-'Z' 'a'-'z'] -let pcchar = [^ '\r' '\n' '<' '>' '&'] - -rule token = parse - | newline - { - newline lexbuf; - token lexbuf - } - | (space | break) + - { - last_pos := lexeme_end lexbuf; - token lexbuf - } - | "" - { () } - | eof - { raise (Error EUnterminatedComment) } - | _ - { comment lexbuf } - -and header = parse - | newline - { - newline lexbuf; - header lexbuf - } - | "?>" - { () } - | eof - { error lexbuf ECloseExpected } - | _ - { header lexbuf } - -and pcdata = parse - | pcchar+ - { - Buffer.add_string tmp (lexeme lexbuf); - pcdata lexbuf - } - | "&#" - { - Buffer.add_string tmp (lexeme lexbuf); - pcdata lexbuf; - } - | '&' - { - Buffer.add_string tmp (entity lexbuf); - pcdata lexbuf - } - | "" - { Buffer.contents tmp } - -and entity = parse - | entitychar+ ';' - { - let ident = lexeme lexbuf in - try - Hashtbl.find idents (String.lowercase ident) - with - Not_found -> "&" ^ ident - } - | _ | eof - { raise (Error EUnterminatedEntity) } - -and ident_name = parse - | identchar+ - { lexeme lexbuf } - | _ | eof - { error lexbuf EIdentExpected } - -and close_tag = parse - | '>' - { () } - | _ | eof - { error lexbuf ECloseExpected } - -and attributes = parse - | '>' - { [], false } - | "/>" - { [], true } - | "" (* do not read a char ! *) - { - let key = attribute lexbuf in - let data = attribute_data lexbuf in - ignore_spaces lexbuf; - let others, closed = attributes lexbuf in - (key, data) :: others, closed - } - -and attribute = parse - | identchar+ - { lexeme lexbuf } - | _ | eof - { error lexbuf EAttributeNameExpected } - -and attribute_data = parse - | space* '=' space* '"' - { - Buffer.reset tmp; - last_pos := lexeme_end lexbuf; - dq_string lexbuf - } - | space* '=' space* '\'' - { - Buffer.reset tmp; - last_pos := lexeme_end lexbuf; - q_string lexbuf - } - | _ | eof - { error lexbuf EAttributeValueExpected } - -and dq_string = parse - | '"' - { Buffer.contents tmp } - | '\\' [ '"' '\\' ] - { - Buffer.add_char tmp (lexeme_char lexbuf 1); - dq_string lexbuf - } - | eof - { raise (Error EUnterminatedString) } - | _ - { - Buffer.add_char tmp (lexeme_char lexbuf 0); - dq_string lexbuf - } - -and q_string = parse - | '\'' - { Buffer.contents tmp } - | '\\' [ '\'' '\\' ] - { - Buffer.add_char tmp (lexeme_char lexbuf 1); - q_string lexbuf - } - | eof - { raise (Error EUnterminatedString) } - | _ - { - Buffer.add_char tmp (lexeme_char lexbuf 0); - q_string lexbuf - } +{(* + * Xml Light, an small Xml parser/printer with DTD support. + * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +open Lexing + +type error = + | EUnterminatedComment + | EUnterminatedString + | EIdentExpected + | ECloseExpected + | ENodeExpected + | EAttributeNameExpected + | EAttributeValueExpected + | EUnterminatedEntity + +exception Error of error + +type pos = int * int * int * int + +type token = + | Tag of string * (string * string) list * bool + | PCData of string + | Endtag of string + | Eof + +let last_pos = ref 0 +and current_line = ref 0 +and current_line_start = ref 0 + +let tmp = Buffer.create 200 + +let idents = Hashtbl.create 0 + +let _ = begin + Hashtbl.add idents "gt;" ">"; + Hashtbl.add idents "lt;" "<"; + Hashtbl.add idents "amp;" "&"; + Hashtbl.add idents "apos;" "'"; + Hashtbl.add idents "quot;" "\""; +end + +let init lexbuf = + current_line := 1; + current_line_start := lexeme_start lexbuf; + last_pos := !current_line_start + +let close lexbuf = + Buffer.reset tmp + +let pos lexbuf = + !current_line , !current_line_start , + !last_pos , + lexeme_start lexbuf + +let restore (cl,cls,lp,_) = + current_line := cl; + current_line_start := cls; + last_pos := lp + +let newline lexbuf = + incr current_line; + last_pos := lexeme_end lexbuf; + current_line_start := !last_pos + +let error lexbuf e = + last_pos := lexeme_start lexbuf; + raise (Error e) + +} + +let newline = ['\n'] +let break = ['\r'] +let space = [' ' '\t'] +let identchar = ['A'-'Z' 'a'-'z' '_' '0'-'9' ':' '-'] +let entitychar = ['A'-'Z' 'a'-'z'] +let pcchar = [^ '\r' '\n' '<' '>' '&'] + +rule token = parse + | newline | (newline break) | break + { + newline lexbuf; + PCData "\n" + } + | "" + { () } + | eof + { raise (Error EUnterminatedComment) } + | _ + { comment lexbuf } + +and header = parse + | newline | (newline break) | break + { + newline lexbuf; + header lexbuf + } + | "?>" + { () } + | eof + { error lexbuf ECloseExpected } + | _ + { header lexbuf } + +and pcdata = parse + | pcchar+ + { + Buffer.add_string tmp (lexeme lexbuf); + pcdata lexbuf + } + | "&#" + { + Buffer.add_string tmp (lexeme lexbuf); + pcdata lexbuf; + } + | '&' + { + Buffer.add_string tmp (entity lexbuf); + pcdata lexbuf + } + | "" + { Buffer.contents tmp } + +and entity = parse + | entitychar+ ';' + { + let ident = lexeme lexbuf in + try + Hashtbl.find idents (String.lowercase ident) + with + Not_found -> "&" ^ ident + } + | _ | eof + { raise (Error EUnterminatedEntity) } + +and ident_name = parse + | identchar+ + { lexeme lexbuf } + | _ | eof + { error lexbuf EIdentExpected } + +and close_tag = parse + | '>' + { () } + | _ | eof + { error lexbuf ECloseExpected } + +and attributes = parse + | '>' + { [], false } + | "/>" + { [], true } + | "" (* do not read a char ! *) + { + let key = attribute lexbuf in + let data = attribute_data lexbuf in + ignore_spaces lexbuf; + let others, closed = attributes lexbuf in + (key, data) :: others, closed + } + +and attribute = parse + | identchar+ + { lexeme lexbuf } + | _ | eof + { error lexbuf EAttributeNameExpected } + +and attribute_data = parse + | space* '=' space* '"' + { + Buffer.reset tmp; + last_pos := lexeme_end lexbuf; + dq_string lexbuf + } + | space* '=' space* '\'' + { + Buffer.reset tmp; + last_pos := lexeme_end lexbuf; + q_string lexbuf + } + | _ | eof + { error lexbuf EAttributeValueExpected } + +and dq_string = parse + | '"' + { Buffer.contents tmp } + | '\\' [ '"' '\\' ] + { + Buffer.add_char tmp (lexeme_char lexbuf 1); + dq_string lexbuf + } + | eof + { raise (Error EUnterminatedString) } + | _ + { + Buffer.add_char tmp (lexeme_char lexbuf 0); + dq_string lexbuf + } + +and q_string = parse + | '\'' + { Buffer.contents tmp } + | '\\' [ '\'' '\\' ] + { + Buffer.add_char tmp (lexeme_char lexbuf 1); + q_string lexbuf + } + | eof + { raise (Error EUnterminatedString) } + | _ + { + Buffer.add_char tmp (lexeme_char lexbuf 0); + q_string lexbuf + } -- cgit v1.2.3