aboutsummaryrefslogtreecommitdiffhomepage
path: root/parsing
diff options
context:
space:
mode:
Diffstat (limited to 'parsing')
-rw-r--r--parsing/cLexer.ml4121
-rw-r--r--parsing/cLexer.mli17
-rw-r--r--parsing/compat.ml479
3 files changed, 102 insertions, 115 deletions
diff --git a/parsing/cLexer.ml4 b/parsing/cLexer.ml4
index 181c4b7fd..79771f3f6 100644
--- a/parsing/cLexer.ml4
+++ b/parsing/cLexer.ml4
@@ -69,6 +69,15 @@ let ttree_remove ttree str =
in
remove ttree 0
+let ttree_elements ttree =
+ let rec elts tt accu =
+ let accu = match tt.node with
+ | None -> accu
+ | Some s -> CString.Set.add s accu
+ in
+ CharMap.fold (fun _ tt accu -> elts tt accu) tt.branch accu
+ in
+ elts ttree CString.Set.empty
(* Errors occurring while lexing (explained as "Lexer error: ...") *)
@@ -101,14 +110,6 @@ module Error = struct
end
open Error
-let current_file = ref ""
-
-let get_current_file () =
- !current_file
-
-let set_current_file ~fname =
- current_file := fname
-
let err loc str = Loc.raise (Compat.to_coqloc loc) (Error.E str)
let bad_token str = raise (Error.E (Bad_token str))
@@ -120,11 +121,6 @@ type token_kind =
| AsciiChar
| EmptyStream
-let error_unsupported_unicode_character loc n unicode cs =
- let bp = Stream.count cs in
- let loc = set_loc_pos loc bp (bp+n) in
- err loc (UnsupportedUnicode unicode)
-
let error_utf8 loc cs =
let bp = Stream.count cs in
Stream.junk cs; (* consume the char to avoid read it and fail again *)
@@ -174,14 +170,12 @@ let lookup_utf8_tail loc c cs =
(Char.code c3 land 0x3F) lsl 6 + (Char.code c4 land 0x3F)
| _ -> error_utf8 loc cs
in
- try Unicode.classify unicode, n
- with Unicode.Unsupported ->
- njunk n cs; error_unsupported_unicode_character loc n unicode cs
+ Utf8Token (Unicode.classify unicode, n)
let lookup_utf8 loc cs =
match Stream.peek cs with
| Some ('\x00'..'\x7F') -> AsciiChar
- | Some ('\x80'..'\xFF' as c) -> Utf8Token (lookup_utf8_tail loc c cs)
+ | Some ('\x80'..'\xFF' as c) -> lookup_utf8_tail loc c cs
| None -> EmptyStream
let unlocated f x = f x
@@ -199,17 +193,6 @@ let check_keyword str =
in
loop_symb (Stream.of_string str)
-let warn_unparsable_keyword =
- CWarnings.create ~name:"unparsable-keyword" ~category:"parsing"
- (fun (s,unicode) ->
- strbrk (Printf.sprintf "Token '%s' contains unicode character 0x%x \
- which will not be parsable." s unicode))
-
-let check_keyword_to_add s =
- try check_keyword s
- with Error.E (UnsupportedUnicode unicode) ->
- warn_unparsable_keyword (s,unicode)
-
let check_ident str =
let rec loop_id intail = parser
| [< ' ('a'..'z' | 'A'..'Z' | '_'); s >] ->
@@ -240,13 +223,15 @@ let is_keyword s =
let add_keyword str =
if not (is_keyword str) then
begin
- check_keyword_to_add str;
+ check_keyword str;
token_tree := ttree_add !token_tree str
end
let remove_keyword str =
token_tree := ttree_remove !token_tree str
+let keywords () = ttree_elements !token_tree
+
(* Freeze and unfreeze the state of the lexer *)
type frozen_t = ttree
@@ -270,6 +255,12 @@ let get_buff len = String.sub !buff 0 len
(* The classical lexer: idents, numbers, quoted strings, comments *)
+let warn_unrecognized_unicode =
+ CWarnings.create ~name:"unrecognized-unicode" ~category:"parsing"
+ (fun (u,id) ->
+ strbrk (Printf.sprintf "Not considering unicode character \"%s\" of unknown \
+ lexical status as part of identifier \"%s\"." u id))
+
let rec ident_tail loc len = parser
| [< ' ('a'..'z' | 'A'..'Z' | '0'..'9' | ''' | '_' as c); s >] ->
ident_tail loc (store len c) s
@@ -277,6 +268,10 @@ let rec ident_tail loc len = parser
match lookup_utf8 loc s with
| Utf8Token ((Unicode.IdentPart | Unicode.Letter), n) ->
ident_tail loc (nstore n len s) s
+ | Utf8Token (Unicode.Unknown, n) ->
+ let id = get_buff len in
+ let u = String.concat "" (List.map (String.make 1) (Stream.npeek n s)) in
+ warn_unrecognized_unicode ~loc:!@loc (u,id); len
| _ -> len
let rec number len = parser
@@ -334,6 +329,9 @@ let rec string loc ~comm_level bp len = parser
(* Hook for exporting comment into xml theory files *)
let (f_xml_output_comment, xml_output_comment) = Hook.make ~default:ignore ()
+(* To associate locations to a file name *)
+let current_file = ref None
+
(* Utilities for comments in beautify *)
let comment_begin = ref None
let comm_loc bp = match !comment_begin with
@@ -354,16 +352,20 @@ let rec split_comments comacc acc pos = function
let extract_comments pos = split_comments [] [] pos !comments
-type comments_state = int option * string * bool * ((int * int) * string) list
-let restore_comments_state (o,s,b,c) =
+(* The state of the lexer visible from outside *)
+type lexer_state = int option * string * bool * ((int * int) * string) list * string option
+
+let init_lexer_state f = (None,"",true,[],f)
+let set_lexer_state (o,s,b,c,f) =
comment_begin := o;
Buffer.clear current_comment; Buffer.add_string current_comment s;
between_commands := b;
- comments := c
-let default_comments_state = (None,"",true,[])
-let comments_state () =
- let s = (!comment_begin, Buffer.contents current_comment, !between_commands, !comments) in
- restore_comments_state default_comments_state; s
+ comments := c;
+ current_file := f
+let release_lexer_state () =
+ (!comment_begin, Buffer.contents current_comment, !between_commands, !comments, !current_file)
+let drop_lexer_state () =
+ set_lexer_state (init_lexer_state None)
let real_push_char c = Buffer.add_char current_comment c
@@ -391,7 +393,7 @@ let comment_stop ep =
if !Flags.xml_export && Buffer.length current_comment > 0 &&
(!between_commands || not(null_comment current_s)) then
Hook.get f_xml_output_comment current_s;
- (if Flags.do_beautify() && Buffer.length current_comment > 0 &&
+ (if !Flags.beautify && Buffer.length current_comment > 0 &&
(!between_commands || not(null_comment current_s)) then
let bp = match !comment_begin with
Some bp -> bp
@@ -408,7 +410,7 @@ let comment_stop ep =
(* Does not unescape!!! *)
let rec comm_string loc bp = parser
- | [< ''"' >] ep -> push_string "\""; loc
+ | [< ''"' >] -> push_string "\""; loc
| [< ''\\'; loc =
(parser [< ' ('"' | '\\' as c) >] ->
let () = match c with
@@ -438,7 +440,7 @@ let rec comment loc bp = parser bp2
let loc =
(* In beautify mode, the lexing differs between strings in comments and
regular strings (e.g. escaping). It seems wrong. *)
- if Flags.do_beautify() then (push_string"\""; comm_string loc bp2 s)
+ if !Flags.beautify then (push_string"\""; comm_string loc bp2 s)
else fst (string loc ~comm_level:(Some 0) bp2 0 s)
in
comment loc bp s
@@ -511,20 +513,19 @@ let process_chars loc bp c cs =
let loc = set_loc_pos loc bp ep' in
err loc Undefined_token
-let token_of_special c s = match c with
- | '.' -> FIELD s
- | _ -> assert false
+(* Parse what follows a dot *)
-(* Parse what follows a dot / a dollar *)
-
-let parse_after_special loc c bp =
+let parse_after_dot loc c bp =
parser
- | [< ' ('a'..'z' | 'A'..'Z' | '_' as d); len = ident_tail loc (store 0 d) >] ->
- token_of_special c (get_buff len)
+ | [< ' ('a'..'z' | 'A'..'Z' | '_' as d); len = ident_tail loc (store 0 d); s >] ->
+ let field = get_buff len in
+ (try find_keyword loc ("."^field) s with Not_found -> FIELD field)
| [< s >] ->
match lookup_utf8 loc s with
| Utf8Token (Unicode.Letter, n) ->
- token_of_special c (get_buff (ident_tail loc (nstore n 0 s) s))
+ let len = ident_tail loc (nstore n 0 s) s in
+ let field = get_buff len in
+ (try find_keyword loc ("."^field) s with Not_found -> FIELD field)
| AsciiChar | Utf8Token _ | EmptyStream -> fst (process_chars loc bp c s)
(* Parse what follows a question mark *)
@@ -552,7 +553,7 @@ let rec next_token loc = parser bp
comm_loc bp; push_char c; next_token (bump_loc_line loc ep) s
| [< '' ' | '\t' | '\r' as c; s >] ->
comm_loc bp; push_char c; next_token loc s
- | [< ''.' as c; t = parse_after_special loc c bp; s >] ep ->
+ | [< ''.' as c; t = parse_after_dot loc c bp; s >] ep ->
comment_stop bp;
(* We enforce that "." should either be part of a larger keyword,
for instance ".(", or followed by a blank or eof. *)
@@ -572,7 +573,7 @@ let rec next_token loc = parser bp
comment_stop bp; between_commands := new_between_commands; t
| [< ''?'; s >] ep ->
let t = parse_after_qmark loc bp s in
- comment_stop bp; (t, set_loc_pos loc ep bp)
+ comment_stop bp; (t, set_loc_pos loc bp ep)
| [< ' ('a'..'z' | 'A'..'Z' | '_' as c);
len = ident_tail loc (store 0 c); s >] ep ->
let id = get_buff len in
@@ -592,6 +593,12 @@ let rec next_token loc = parser bp
let loc = comment loc bp s in next_token loc s
| [< t = process_chars loc bp c >] -> comment_stop bp; t >] ->
t
+ | [< ' ('{' | '}' as c); s >] ep ->
+ let t,new_between_commands =
+ if !between_commands then (KEYWORD (String.make 1 c), set_loc_pos loc bp ep), true
+ else process_chars loc bp c s, false
+ in
+ comment_stop bp; between_commands := new_between_commands; t
| [< s >] ->
match lookup_utf8 loc s with
| Utf8Token (Unicode.Letter, n) ->
@@ -600,11 +607,9 @@ let rec next_token loc = parser bp
let ep = Stream.count s in
comment_stop bp;
(try find_keyword loc id s with Not_found -> IDENT id), set_loc_pos loc bp ep
- | AsciiChar | Utf8Token ((Unicode.Symbol | Unicode.IdentPart), _) ->
+ | AsciiChar | Utf8Token ((Unicode.Symbol | Unicode.IdentPart | Unicode.Unknown), _) ->
let t = process_chars loc bp (Stream.next s) s in
- let new_between_commands = match t with
- (KEYWORD ("{"|"}"),_) -> !between_commands | _ -> false in
- comment_stop bp; between_commands := new_between_commands; t
+ comment_stop bp; t
| EmptyStream ->
comment_stop bp; (EOI, set_loc_pos loc bp (bp+1))
@@ -626,12 +631,6 @@ let loct_func loct i =
let loct_add loct i loc = Hashtbl.add loct i loc
-let current_location_table = ref (loct_create ())
-
-type location_table = (int, Compat.CompatLoc.t) Hashtbl.t
-let location_table () = !current_location_table
-let restore_location_table t = current_location_table := t
-
(** {6 The lexer of Coq} *)
(** Note: removing a token.
@@ -669,7 +668,6 @@ let func cs =
cur_loc := Compat.after loc;
loct_add loct i loc; Some tok)
in
- current_location_table := loct;
(ts, loct_func loct)
let lexer = {
@@ -706,7 +704,6 @@ end
let mk () =
let loct = loct_create () in
let cur_loc = ref (Compat.make_loc !current_file 1 0 0 0) in
- current_location_table := loct;
let rec self init_loc (* FIXME *) =
parser i
[< (tok, loc) = next_token !cur_loc; s >] ->
diff --git a/parsing/cLexer.mli b/parsing/cLexer.mli
index 3ad49eb74..e0fdf8cb5 100644
--- a/parsing/cLexer.mli
+++ b/parsing/cLexer.mli
@@ -9,22 +9,7 @@
val add_keyword : string -> unit
val remove_keyword : string -> unit
val is_keyword : string -> bool
-
-(* val location_function : int -> Loc.t *)
-
-(** for coqdoc *)
-type location_table
-val location_table : unit -> location_table
-val restore_location_table : location_table -> unit
-
-
-(** [get_current_file fname] returns the filename used in locations emitted by
- the lexer *)
-val get_current_file : unit -> string
-
-(** [set_current_file fname] sets the filename used in locations emitted by the
- lexer *)
-val set_current_file : fname:string -> unit
+val keywords : unit -> CString.Set.t
val check_ident : string -> unit
val is_ident : string -> bool
diff --git a/parsing/compat.ml4 b/parsing/compat.ml4
index ecf515111..befa0d01b 100644
--- a/parsing/compat.ml4
+++ b/parsing/compat.ml4
@@ -10,6 +10,10 @@
(** Locations *)
+let file_loc_of_file = function
+| None -> ""
+| Some f -> f
+
IFDEF CAMLP5 THEN
module CompatLoc = struct
@@ -29,7 +33,7 @@ let to_coqloc loc =
Loc.line_nb_last = Ploc.line_nb_last loc;
Loc.bol_pos_last = Ploc.bol_pos_last loc; }
-let make_loc fname line_nb bol_pos bp ep = Ploc.make_loc fname line_nb bol_pos (bp, ep) ""
+let make_loc fname line_nb bol_pos bp ep = Ploc.make_loc (file_loc_of_file fname) line_nb bol_pos (bp, ep) ""
(* Update a loc without allocating an intermediate pair *)
let set_loc_pos loc bp ep =
@@ -80,7 +84,7 @@ let to_coqloc loc =
Loc.bol_pos_last = CompatLoc.stop_bol loc; }
let make_loc fname line_nb bol_pos start stop =
- CompatLoc.of_tuple (fname, line_nb, bol_pos, start, line_nb, bol_pos, stop, false)
+ CompatLoc.of_tuple (file_loc_of_file fname, line_nb, bol_pos, start, line_nb, bol_pos, stop, false)
open CompatLoc
@@ -97,7 +101,7 @@ let bump_loc_line_last loc bol_pos =
stop_line loc + 1, bol_pos, stop_off loc, is_ghost loc)
let set_loc_file loc fname =
- of_tuple (fname, start_line loc, start_bol loc, start_off loc,
+ of_tuple (file_loc_of_file fname, start_line loc, start_bol loc, start_off loc,
stop_line loc, stop_bol loc, stop_off loc, is_ghost loc)
let after loc =
@@ -138,20 +142,22 @@ module type LexerSig = sig
exception E of t
val to_string : t -> string
end
- type comments_state
- val default_comments_state : comments_state
- val comments_state : unit -> comments_state
- val restore_comments_state : comments_state -> unit
+ type lexer_state
+ val init_lexer_state : string option -> lexer_state
+ val set_lexer_state : lexer_state -> unit
+ val release_lexer_state : unit -> lexer_state
+ val drop_lexer_state : unit -> unit
end
ELSE
module type LexerSig = sig
include Camlp4.Sig.Lexer with module Loc = CompatLoc and type Token.t = Tok.t
- type comments_state
- val default_comments_state : comments_state
- val comments_state : unit -> comments_state
- val restore_comments_state : comments_state -> unit
+ type lexer_state
+ val init_lexer_state : string option -> lexer_state
+ val set_lexer_state : lexer_state -> unit
+ val release_lexer_state : unit -> lexer_state
+ val drop_lexer_state : unit -> unit
end
END
@@ -172,7 +178,7 @@ module type GrammarSig = sig
type extend_statment =
Gramext.position option * single_extend_statment list
type coq_parsable
- val parsable : char Stream.t -> coq_parsable
+ val parsable : ?file:string -> char Stream.t -> coq_parsable
val action : 'a -> action
val entry_create : string -> 'a entry
val entry_parse : 'a entry -> coq_parsable -> 'a
@@ -193,32 +199,34 @@ module GrammarMake (L:LexerSig) : GrammarSig = struct
string option * Gramext.g_assoc option * production_rule list
type extend_statment =
Gramext.position option * single_extend_statment list
- type coq_parsable = parsable * L.comments_state ref
- let parsable c =
- let state = ref L.default_comments_state in (parsable c, state)
+ type coq_parsable = parsable * L.lexer_state ref
+ let parsable ?file c =
+ let state = ref (L.init_lexer_state file) in
+ L.set_lexer_state !state;
+ let a = parsable c in
+ state := L.release_lexer_state ();
+ (a,state)
let action = Gramext.action
let entry_create = Entry.create
let entry_parse e (p,state) =
- L.restore_comments_state !state;
+ L.set_lexer_state !state;
try
let c = Entry.parse e p in
- state := L.comments_state ();
- L.restore_comments_state L.default_comments_state;
+ state := L.release_lexer_state ();
c
with Exc_located (loc,e) ->
- L.restore_comments_state L.default_comments_state;
+ L.drop_lexer_state ();
let loc' = Loc.get_loc (Exninfo.info e) in
let loc = match loc' with None -> to_coqloc loc | Some loc -> loc in
Loc.raise loc e
let with_parsable (p,state) f x =
- L.restore_comments_state !state;
+ L.set_lexer_state !state;
try
let a = f x in
- state := L.comments_state ();
- L.restore_comments_state L.default_comments_state;
+ state := L.release_lexer_state ();
a
with e ->
- L.restore_comments_state L.default_comments_state;
+ L.drop_lexer_state ();
raise e
let entry_print ft x = Entry.print ft x
@@ -234,7 +242,7 @@ module type GrammarSig = sig
type 'a entry = 'a Entry.t
type action = Action.t
type coq_parsable
- val parsable : char Stream.t -> coq_parsable
+ val parsable : ?file:string -> char Stream.t -> coq_parsable
val action : 'a -> action
val entry_create : string -> 'a entry
val entry_parse : 'a entry -> coq_parsable -> 'a
@@ -249,31 +257,28 @@ module GrammarMake (L:LexerSig) : GrammarSig = struct
include Camlp4.Struct.Grammar.Static.Make (L)
type 'a entry = 'a Entry.t
type action = Action.t
- type comments_state = int option * string * bool * ((int * int) * string) list
- type coq_parsable = char Stream.t * L.comments_state ref
- let parsable s = let state = ref L.default_comments_state in (s, state)
+ type coq_parsable = char Stream.t * L.lexer_state ref
+ let parsable ?file s = let state = ref (L.init_lexer_state file) in (s, state)
let action = Action.mk
let entry_create = Entry.mk
let entry_parse e (s,state) =
- L.restore_comments_state !state;
+ L.set_lexer_state !state;
try
let c = parse e (*FIXME*)CompatLoc.ghost s in
- state := L.comments_state ();
- L.restore_comments_state L.default_comments_state;
+ state := L.release_lexer_state ();
c
with Exc_located (loc,e) ->
- L.restore_comments_state L.default_comments_state;
- raise_coq_loc loc e
+ L.drop_lexer_state ();
+ raise_coq_loc loc e;;
let with_parsable (p,state) f x =
- L.restore_comments_state !state;
+ L.set_lexer_state !state;
try
let a = f x in
- state := L.comments_state ();
- L.restore_comments_state L.default_comments_state;
+ state := L.release_lexer_state ();
a
with e ->
- L.restore_comments_state L.default_comments_state;
- raise e
+ L.drop_lexer_state ();
+ Pervasives.raise e;;
let entry_print ft x = Entry.print ft x
let srules' = srules (entry_create "dummy")
end