diff options
Diffstat (limited to 'parsing')
-rw-r--r-- | parsing/cLexer.ml4 | 121 | ||||
-rw-r--r-- | parsing/cLexer.mli | 17 | ||||
-rw-r--r-- | parsing/compat.ml4 | 79 |
3 files changed, 102 insertions, 115 deletions
diff --git a/parsing/cLexer.ml4 b/parsing/cLexer.ml4 index 181c4b7fd..79771f3f6 100644 --- a/parsing/cLexer.ml4 +++ b/parsing/cLexer.ml4 @@ -69,6 +69,15 @@ let ttree_remove ttree str = in remove ttree 0 +let ttree_elements ttree = + let rec elts tt accu = + let accu = match tt.node with + | None -> accu + | Some s -> CString.Set.add s accu + in + CharMap.fold (fun _ tt accu -> elts tt accu) tt.branch accu + in + elts ttree CString.Set.empty (* Errors occurring while lexing (explained as "Lexer error: ...") *) @@ -101,14 +110,6 @@ module Error = struct end open Error -let current_file = ref "" - -let get_current_file () = - !current_file - -let set_current_file ~fname = - current_file := fname - let err loc str = Loc.raise (Compat.to_coqloc loc) (Error.E str) let bad_token str = raise (Error.E (Bad_token str)) @@ -120,11 +121,6 @@ type token_kind = | AsciiChar | EmptyStream -let error_unsupported_unicode_character loc n unicode cs = - let bp = Stream.count cs in - let loc = set_loc_pos loc bp (bp+n) in - err loc (UnsupportedUnicode unicode) - let error_utf8 loc cs = let bp = Stream.count cs in Stream.junk cs; (* consume the char to avoid read it and fail again *) @@ -174,14 +170,12 @@ let lookup_utf8_tail loc c cs = (Char.code c3 land 0x3F) lsl 6 + (Char.code c4 land 0x3F) | _ -> error_utf8 loc cs in - try Unicode.classify unicode, n - with Unicode.Unsupported -> - njunk n cs; error_unsupported_unicode_character loc n unicode cs + Utf8Token (Unicode.classify unicode, n) let lookup_utf8 loc cs = match Stream.peek cs with | Some ('\x00'..'\x7F') -> AsciiChar - | Some ('\x80'..'\xFF' as c) -> Utf8Token (lookup_utf8_tail loc c cs) + | Some ('\x80'..'\xFF' as c) -> lookup_utf8_tail loc c cs | None -> EmptyStream let unlocated f x = f x @@ -199,17 +193,6 @@ let check_keyword str = in loop_symb (Stream.of_string str) -let warn_unparsable_keyword = - CWarnings.create ~name:"unparsable-keyword" ~category:"parsing" - (fun (s,unicode) -> - strbrk (Printf.sprintf "Token '%s' contains unicode character 0x%x \ - which will not be parsable." s unicode)) - -let check_keyword_to_add s = - try check_keyword s - with Error.E (UnsupportedUnicode unicode) -> - warn_unparsable_keyword (s,unicode) - let check_ident str = let rec loop_id intail = parser | [< ' ('a'..'z' | 'A'..'Z' | '_'); s >] -> @@ -240,13 +223,15 @@ let is_keyword s = let add_keyword str = if not (is_keyword str) then begin - check_keyword_to_add str; + check_keyword str; token_tree := ttree_add !token_tree str end let remove_keyword str = token_tree := ttree_remove !token_tree str +let keywords () = ttree_elements !token_tree + (* Freeze and unfreeze the state of the lexer *) type frozen_t = ttree @@ -270,6 +255,12 @@ let get_buff len = String.sub !buff 0 len (* The classical lexer: idents, numbers, quoted strings, comments *) +let warn_unrecognized_unicode = + CWarnings.create ~name:"unrecognized-unicode" ~category:"parsing" + (fun (u,id) -> + strbrk (Printf.sprintf "Not considering unicode character \"%s\" of unknown \ + lexical status as part of identifier \"%s\"." u id)) + let rec ident_tail loc len = parser | [< ' ('a'..'z' | 'A'..'Z' | '0'..'9' | ''' | '_' as c); s >] -> ident_tail loc (store len c) s @@ -277,6 +268,10 @@ let rec ident_tail loc len = parser match lookup_utf8 loc s with | Utf8Token ((Unicode.IdentPart | Unicode.Letter), n) -> ident_tail loc (nstore n len s) s + | Utf8Token (Unicode.Unknown, n) -> + let id = get_buff len in + let u = String.concat "" (List.map (String.make 1) (Stream.npeek n s)) in + warn_unrecognized_unicode ~loc:!@loc (u,id); len | _ -> len let rec number len = parser @@ -334,6 +329,9 @@ let rec string loc ~comm_level bp len = parser (* Hook for exporting comment into xml theory files *) let (f_xml_output_comment, xml_output_comment) = Hook.make ~default:ignore () +(* To associate locations to a file name *) +let current_file = ref None + (* Utilities for comments in beautify *) let comment_begin = ref None let comm_loc bp = match !comment_begin with @@ -354,16 +352,20 @@ let rec split_comments comacc acc pos = function let extract_comments pos = split_comments [] [] pos !comments -type comments_state = int option * string * bool * ((int * int) * string) list -let restore_comments_state (o,s,b,c) = +(* The state of the lexer visible from outside *) +type lexer_state = int option * string * bool * ((int * int) * string) list * string option + +let init_lexer_state f = (None,"",true,[],f) +let set_lexer_state (o,s,b,c,f) = comment_begin := o; Buffer.clear current_comment; Buffer.add_string current_comment s; between_commands := b; - comments := c -let default_comments_state = (None,"",true,[]) -let comments_state () = - let s = (!comment_begin, Buffer.contents current_comment, !between_commands, !comments) in - restore_comments_state default_comments_state; s + comments := c; + current_file := f +let release_lexer_state () = + (!comment_begin, Buffer.contents current_comment, !between_commands, !comments, !current_file) +let drop_lexer_state () = + set_lexer_state (init_lexer_state None) let real_push_char c = Buffer.add_char current_comment c @@ -391,7 +393,7 @@ let comment_stop ep = if !Flags.xml_export && Buffer.length current_comment > 0 && (!between_commands || not(null_comment current_s)) then Hook.get f_xml_output_comment current_s; - (if Flags.do_beautify() && Buffer.length current_comment > 0 && + (if !Flags.beautify && Buffer.length current_comment > 0 && (!between_commands || not(null_comment current_s)) then let bp = match !comment_begin with Some bp -> bp @@ -408,7 +410,7 @@ let comment_stop ep = (* Does not unescape!!! *) let rec comm_string loc bp = parser - | [< ''"' >] ep -> push_string "\""; loc + | [< ''"' >] -> push_string "\""; loc | [< ''\\'; loc = (parser [< ' ('"' | '\\' as c) >] -> let () = match c with @@ -438,7 +440,7 @@ let rec comment loc bp = parser bp2 let loc = (* In beautify mode, the lexing differs between strings in comments and regular strings (e.g. escaping). It seems wrong. *) - if Flags.do_beautify() then (push_string"\""; comm_string loc bp2 s) + if !Flags.beautify then (push_string"\""; comm_string loc bp2 s) else fst (string loc ~comm_level:(Some 0) bp2 0 s) in comment loc bp s @@ -511,20 +513,19 @@ let process_chars loc bp c cs = let loc = set_loc_pos loc bp ep' in err loc Undefined_token -let token_of_special c s = match c with - | '.' -> FIELD s - | _ -> assert false +(* Parse what follows a dot *) -(* Parse what follows a dot / a dollar *) - -let parse_after_special loc c bp = +let parse_after_dot loc c bp = parser - | [< ' ('a'..'z' | 'A'..'Z' | '_' as d); len = ident_tail loc (store 0 d) >] -> - token_of_special c (get_buff len) + | [< ' ('a'..'z' | 'A'..'Z' | '_' as d); len = ident_tail loc (store 0 d); s >] -> + let field = get_buff len in + (try find_keyword loc ("."^field) s with Not_found -> FIELD field) | [< s >] -> match lookup_utf8 loc s with | Utf8Token (Unicode.Letter, n) -> - token_of_special c (get_buff (ident_tail loc (nstore n 0 s) s)) + let len = ident_tail loc (nstore n 0 s) s in + let field = get_buff len in + (try find_keyword loc ("."^field) s with Not_found -> FIELD field) | AsciiChar | Utf8Token _ | EmptyStream -> fst (process_chars loc bp c s) (* Parse what follows a question mark *) @@ -552,7 +553,7 @@ let rec next_token loc = parser bp comm_loc bp; push_char c; next_token (bump_loc_line loc ep) s | [< '' ' | '\t' | '\r' as c; s >] -> comm_loc bp; push_char c; next_token loc s - | [< ''.' as c; t = parse_after_special loc c bp; s >] ep -> + | [< ''.' as c; t = parse_after_dot loc c bp; s >] ep -> comment_stop bp; (* We enforce that "." should either be part of a larger keyword, for instance ".(", or followed by a blank or eof. *) @@ -572,7 +573,7 @@ let rec next_token loc = parser bp comment_stop bp; between_commands := new_between_commands; t | [< ''?'; s >] ep -> let t = parse_after_qmark loc bp s in - comment_stop bp; (t, set_loc_pos loc ep bp) + comment_stop bp; (t, set_loc_pos loc bp ep) | [< ' ('a'..'z' | 'A'..'Z' | '_' as c); len = ident_tail loc (store 0 c); s >] ep -> let id = get_buff len in @@ -592,6 +593,12 @@ let rec next_token loc = parser bp let loc = comment loc bp s in next_token loc s | [< t = process_chars loc bp c >] -> comment_stop bp; t >] -> t + | [< ' ('{' | '}' as c); s >] ep -> + let t,new_between_commands = + if !between_commands then (KEYWORD (String.make 1 c), set_loc_pos loc bp ep), true + else process_chars loc bp c s, false + in + comment_stop bp; between_commands := new_between_commands; t | [< s >] -> match lookup_utf8 loc s with | Utf8Token (Unicode.Letter, n) -> @@ -600,11 +607,9 @@ let rec next_token loc = parser bp let ep = Stream.count s in comment_stop bp; (try find_keyword loc id s with Not_found -> IDENT id), set_loc_pos loc bp ep - | AsciiChar | Utf8Token ((Unicode.Symbol | Unicode.IdentPart), _) -> + | AsciiChar | Utf8Token ((Unicode.Symbol | Unicode.IdentPart | Unicode.Unknown), _) -> let t = process_chars loc bp (Stream.next s) s in - let new_between_commands = match t with - (KEYWORD ("{"|"}"),_) -> !between_commands | _ -> false in - comment_stop bp; between_commands := new_between_commands; t + comment_stop bp; t | EmptyStream -> comment_stop bp; (EOI, set_loc_pos loc bp (bp+1)) @@ -626,12 +631,6 @@ let loct_func loct i = let loct_add loct i loc = Hashtbl.add loct i loc -let current_location_table = ref (loct_create ()) - -type location_table = (int, Compat.CompatLoc.t) Hashtbl.t -let location_table () = !current_location_table -let restore_location_table t = current_location_table := t - (** {6 The lexer of Coq} *) (** Note: removing a token. @@ -669,7 +668,6 @@ let func cs = cur_loc := Compat.after loc; loct_add loct i loc; Some tok) in - current_location_table := loct; (ts, loct_func loct) let lexer = { @@ -706,7 +704,6 @@ end let mk () = let loct = loct_create () in let cur_loc = ref (Compat.make_loc !current_file 1 0 0 0) in - current_location_table := loct; let rec self init_loc (* FIXME *) = parser i [< (tok, loc) = next_token !cur_loc; s >] -> diff --git a/parsing/cLexer.mli b/parsing/cLexer.mli index 3ad49eb74..e0fdf8cb5 100644 --- a/parsing/cLexer.mli +++ b/parsing/cLexer.mli @@ -9,22 +9,7 @@ val add_keyword : string -> unit val remove_keyword : string -> unit val is_keyword : string -> bool - -(* val location_function : int -> Loc.t *) - -(** for coqdoc *) -type location_table -val location_table : unit -> location_table -val restore_location_table : location_table -> unit - - -(** [get_current_file fname] returns the filename used in locations emitted by - the lexer *) -val get_current_file : unit -> string - -(** [set_current_file fname] sets the filename used in locations emitted by the - lexer *) -val set_current_file : fname:string -> unit +val keywords : unit -> CString.Set.t val check_ident : string -> unit val is_ident : string -> bool diff --git a/parsing/compat.ml4 b/parsing/compat.ml4 index ecf515111..befa0d01b 100644 --- a/parsing/compat.ml4 +++ b/parsing/compat.ml4 @@ -10,6 +10,10 @@ (** Locations *) +let file_loc_of_file = function +| None -> "" +| Some f -> f + IFDEF CAMLP5 THEN module CompatLoc = struct @@ -29,7 +33,7 @@ let to_coqloc loc = Loc.line_nb_last = Ploc.line_nb_last loc; Loc.bol_pos_last = Ploc.bol_pos_last loc; } -let make_loc fname line_nb bol_pos bp ep = Ploc.make_loc fname line_nb bol_pos (bp, ep) "" +let make_loc fname line_nb bol_pos bp ep = Ploc.make_loc (file_loc_of_file fname) line_nb bol_pos (bp, ep) "" (* Update a loc without allocating an intermediate pair *) let set_loc_pos loc bp ep = @@ -80,7 +84,7 @@ let to_coqloc loc = Loc.bol_pos_last = CompatLoc.stop_bol loc; } let make_loc fname line_nb bol_pos start stop = - CompatLoc.of_tuple (fname, line_nb, bol_pos, start, line_nb, bol_pos, stop, false) + CompatLoc.of_tuple (file_loc_of_file fname, line_nb, bol_pos, start, line_nb, bol_pos, stop, false) open CompatLoc @@ -97,7 +101,7 @@ let bump_loc_line_last loc bol_pos = stop_line loc + 1, bol_pos, stop_off loc, is_ghost loc) let set_loc_file loc fname = - of_tuple (fname, start_line loc, start_bol loc, start_off loc, + of_tuple (file_loc_of_file fname, start_line loc, start_bol loc, start_off loc, stop_line loc, stop_bol loc, stop_off loc, is_ghost loc) let after loc = @@ -138,20 +142,22 @@ module type LexerSig = sig exception E of t val to_string : t -> string end - type comments_state - val default_comments_state : comments_state - val comments_state : unit -> comments_state - val restore_comments_state : comments_state -> unit + type lexer_state + val init_lexer_state : string option -> lexer_state + val set_lexer_state : lexer_state -> unit + val release_lexer_state : unit -> lexer_state + val drop_lexer_state : unit -> unit end ELSE module type LexerSig = sig include Camlp4.Sig.Lexer with module Loc = CompatLoc and type Token.t = Tok.t - type comments_state - val default_comments_state : comments_state - val comments_state : unit -> comments_state - val restore_comments_state : comments_state -> unit + type lexer_state + val init_lexer_state : string option -> lexer_state + val set_lexer_state : lexer_state -> unit + val release_lexer_state : unit -> lexer_state + val drop_lexer_state : unit -> unit end END @@ -172,7 +178,7 @@ module type GrammarSig = sig type extend_statment = Gramext.position option * single_extend_statment list type coq_parsable - val parsable : char Stream.t -> coq_parsable + val parsable : ?file:string -> char Stream.t -> coq_parsable val action : 'a -> action val entry_create : string -> 'a entry val entry_parse : 'a entry -> coq_parsable -> 'a @@ -193,32 +199,34 @@ module GrammarMake (L:LexerSig) : GrammarSig = struct string option * Gramext.g_assoc option * production_rule list type extend_statment = Gramext.position option * single_extend_statment list - type coq_parsable = parsable * L.comments_state ref - let parsable c = - let state = ref L.default_comments_state in (parsable c, state) + type coq_parsable = parsable * L.lexer_state ref + let parsable ?file c = + let state = ref (L.init_lexer_state file) in + L.set_lexer_state !state; + let a = parsable c in + state := L.release_lexer_state (); + (a,state) let action = Gramext.action let entry_create = Entry.create let entry_parse e (p,state) = - L.restore_comments_state !state; + L.set_lexer_state !state; try let c = Entry.parse e p in - state := L.comments_state (); - L.restore_comments_state L.default_comments_state; + state := L.release_lexer_state (); c with Exc_located (loc,e) -> - L.restore_comments_state L.default_comments_state; + L.drop_lexer_state (); let loc' = Loc.get_loc (Exninfo.info e) in let loc = match loc' with None -> to_coqloc loc | Some loc -> loc in Loc.raise loc e let with_parsable (p,state) f x = - L.restore_comments_state !state; + L.set_lexer_state !state; try let a = f x in - state := L.comments_state (); - L.restore_comments_state L.default_comments_state; + state := L.release_lexer_state (); a with e -> - L.restore_comments_state L.default_comments_state; + L.drop_lexer_state (); raise e let entry_print ft x = Entry.print ft x @@ -234,7 +242,7 @@ module type GrammarSig = sig type 'a entry = 'a Entry.t type action = Action.t type coq_parsable - val parsable : char Stream.t -> coq_parsable + val parsable : ?file:string -> char Stream.t -> coq_parsable val action : 'a -> action val entry_create : string -> 'a entry val entry_parse : 'a entry -> coq_parsable -> 'a @@ -249,31 +257,28 @@ module GrammarMake (L:LexerSig) : GrammarSig = struct include Camlp4.Struct.Grammar.Static.Make (L) type 'a entry = 'a Entry.t type action = Action.t - type comments_state = int option * string * bool * ((int * int) * string) list - type coq_parsable = char Stream.t * L.comments_state ref - let parsable s = let state = ref L.default_comments_state in (s, state) + type coq_parsable = char Stream.t * L.lexer_state ref + let parsable ?file s = let state = ref (L.init_lexer_state file) in (s, state) let action = Action.mk let entry_create = Entry.mk let entry_parse e (s,state) = - L.restore_comments_state !state; + L.set_lexer_state !state; try let c = parse e (*FIXME*)CompatLoc.ghost s in - state := L.comments_state (); - L.restore_comments_state L.default_comments_state; + state := L.release_lexer_state (); c with Exc_located (loc,e) -> - L.restore_comments_state L.default_comments_state; - raise_coq_loc loc e + L.drop_lexer_state (); + raise_coq_loc loc e;; let with_parsable (p,state) f x = - L.restore_comments_state !state; + L.set_lexer_state !state; try let a = f x in - state := L.comments_state (); - L.restore_comments_state L.default_comments_state; + state := L.release_lexer_state (); a with e -> - L.restore_comments_state L.default_comments_state; - raise e + L.drop_lexer_state (); + Pervasives.raise e;; let entry_print ft x = Entry.print ft x let srules' = srules (entry_create "dummy") end |