From 4e31561f7e0d5e647e86978806cae82ffb35f90b Mon Sep 17 00:00:00 2001
From: Hugo Herbelin <Hugo.Herbelin@inria.fr>
Date: Tue, 11 Oct 2016 12:58:42 +0200
Subject: Removing export of location_table outside of cLexer.

It was not used any more by coqdoc since b8194b22 (Dec 2010).

The table is now only part of the lexer function closure
(and only in the camlp5 case).
---
 parsing/cLexer.ml4 | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'parsing/cLexer.ml4')

diff --git a/parsing/cLexer.ml4 b/parsing/cLexer.ml4
index 181c4b7fd..0090117f6 100644
--- a/parsing/cLexer.ml4
+++ b/parsing/cLexer.ml4
@@ -626,12 +626,6 @@ let loct_func loct i =
 
 let loct_add loct i loc = Hashtbl.add loct i loc
 
-let current_location_table = ref (loct_create ())
-
-type location_table = (int, Compat.CompatLoc.t) Hashtbl.t
-let location_table () = !current_location_table
-let restore_location_table t = current_location_table := t
-
 (** {6 The lexer of Coq} *)
 
 (** Note: removing a token.
@@ -669,7 +663,6 @@ let func cs =
 	 cur_loc := Compat.after loc;
          loct_add loct i loc; Some tok)
   in
-  current_location_table := loct;
   (ts, loct_func loct)
 
 let lexer = {
@@ -706,7 +699,6 @@ end
 let mk () =
   let loct = loct_create () in
   let cur_loc = ref (Compat.make_loc !current_file 1 0 0 0) in
-  current_location_table := loct; 
   let rec self init_loc (* FIXME *) =
     parser i
        [< (tok, loc) = next_token !cur_loc; s >] ->
-- 
cgit v1.2.3


From f3d1eff69850d379bad5ab8f1cdadb7f5d5c7eca Mon Sep 17 00:00:00 2001
From: Hugo Herbelin <Hugo.Herbelin@inria.fr>
Date: Tue, 11 Oct 2016 13:23:26 +0200
Subject: More on making the lexer more functional (continuing b8ae2de5 and
 8a8caba3).

- Adding cLexer.current_file to the lexer state, i.e. making it a
  component of the type "coq_parsable" of lexer state (it was
  forgotten in b8ae2de5 and 8a8caba3).
- Inlining save_translator/restore_translator which have now lost most
  of their substance.
---
 dev/doc/changes.txt       |  5 +++
 parsing/cLexer.ml4        | 29 +++++++++--------
 parsing/cLexer.mli        |  8 -----
 parsing/compat.ml4        | 79 +++++++++++++++++++++++++----------------------
 toplevel/vernac.ml        | 30 +++++-------------
 toplevel/vernacentries.ml |  2 +-
 6 files changed, 69 insertions(+), 84 deletions(-)

(limited to 'parsing/cLexer.ml4')

diff --git a/dev/doc/changes.txt b/dev/doc/changes.txt
index fcee79e07..6dd7cb970 100644
--- a/dev/doc/changes.txt
+++ b/dev/doc/changes.txt
@@ -2,6 +2,11 @@
 = CHANGES BETWEEN COQ V8.5 AND COQ V8.6 =
 =========================================
 
+** Parsing **
+
+Pcoq.parsable now takes an extra optional filename argument so as to
+bind locations to a file name when relevant.
+
 ** Files **
 
 To avoid clashes with OCaml's compiler libs, the following files were renamed:
diff --git a/parsing/cLexer.ml4 b/parsing/cLexer.ml4
index 0090117f6..cfee77ab2 100644
--- a/parsing/cLexer.ml4
+++ b/parsing/cLexer.ml4
@@ -101,14 +101,6 @@ module Error = struct
 end
 open Error
 
-let current_file = ref ""
-
-let get_current_file () =
-  !current_file
-
-let set_current_file ~fname =
-  current_file := fname
-
 let err loc str = Loc.raise (Compat.to_coqloc loc) (Error.E str)
 
 let bad_token str = raise (Error.E (Bad_token str))
@@ -334,6 +326,9 @@ let rec string loc ~comm_level bp len = parser
 (* Hook for exporting comment into xml theory files *)
 let (f_xml_output_comment, xml_output_comment) = Hook.make ~default:ignore ()
 
+(* To associate locations to a file name *)
+let current_file = ref None
+
 (* Utilities for comments in beautify *)
 let comment_begin = ref None
 let comm_loc bp = match !comment_begin with
@@ -354,16 +349,20 @@ let rec split_comments comacc acc pos = function
 
 let extract_comments pos = split_comments [] [] pos !comments
 
-type comments_state = int option * string * bool * ((int * int) * string) list
-let restore_comments_state (o,s,b,c) =
+(* The state of the lexer visible from outside *)
+type lexer_state = int option * string * bool * ((int * int) * string) list * string option
+
+let init_lexer_state f = (None,"",true,[],f)
+let set_lexer_state (o,s,b,c,f) =
   comment_begin := o;
   Buffer.clear current_comment; Buffer.add_string current_comment s;
   between_commands := b;
-  comments := c
-let default_comments_state = (None,"",true,[])
-let comments_state () =
-  let s = (!comment_begin, Buffer.contents current_comment, !between_commands, !comments) in
-  restore_comments_state default_comments_state; s
+  comments := c;
+  current_file := f
+let release_lexer_state () =
+  (!comment_begin, Buffer.contents current_comment, !between_commands, !comments, !current_file)
+let drop_lexer_state () =
+    set_lexer_state (init_lexer_state None)
 
 let real_push_char c = Buffer.add_char current_comment c
 
diff --git a/parsing/cLexer.mli b/parsing/cLexer.mli
index 71edda760..f69d95335 100644
--- a/parsing/cLexer.mli
+++ b/parsing/cLexer.mli
@@ -10,14 +10,6 @@ val add_keyword : string -> unit
 val remove_keyword : string -> unit
 val is_keyword : string -> bool
 
-(** [get_current_file fname] returns the filename used in locations emitted by
-    the lexer *)
-val get_current_file : unit -> string
-
-(** [set_current_file fname] sets the filename used in locations emitted by the
-    lexer *)
-val set_current_file : fname:string -> unit
-
 val check_ident : string -> unit
 val is_ident : string -> bool
 val check_keyword : string -> unit
diff --git a/parsing/compat.ml4 b/parsing/compat.ml4
index 26e07c2f2..befa0d01b 100644
--- a/parsing/compat.ml4
+++ b/parsing/compat.ml4
@@ -10,6 +10,10 @@
 
 (** Locations *)
 
+let file_loc_of_file = function
+| None -> ""
+| Some f -> f
+
 IFDEF CAMLP5 THEN
 
 module CompatLoc = struct
@@ -29,7 +33,7 @@ let to_coqloc loc =
     Loc.line_nb_last = Ploc.line_nb_last loc;
     Loc.bol_pos_last = Ploc.bol_pos_last loc; }
 
-let make_loc fname line_nb bol_pos bp ep = Ploc.make_loc fname line_nb bol_pos (bp, ep) ""
+let make_loc fname line_nb bol_pos bp ep = Ploc.make_loc (file_loc_of_file fname) line_nb bol_pos (bp, ep) ""
 
 (* Update a loc without allocating an intermediate pair *)
 let set_loc_pos loc bp ep =
@@ -80,7 +84,7 @@ let to_coqloc loc =
     Loc.bol_pos_last = CompatLoc.stop_bol loc; }
 
 let make_loc fname line_nb bol_pos start stop =
-  CompatLoc.of_tuple (fname, line_nb, bol_pos, start, line_nb, bol_pos, stop, false)
+  CompatLoc.of_tuple (file_loc_of_file fname, line_nb, bol_pos, start, line_nb, bol_pos, stop, false)
 
 open CompatLoc
 
@@ -97,7 +101,7 @@ let bump_loc_line_last loc bol_pos =
 	    stop_line loc + 1, bol_pos, stop_off loc, is_ghost loc)
 
 let set_loc_file loc fname =
-  of_tuple (fname, start_line loc, start_bol loc, start_off loc,
+  of_tuple (file_loc_of_file fname, start_line loc, start_bol loc, start_off loc,
 	    stop_line loc, stop_bol loc, stop_off loc, is_ghost loc)
 
 let after loc =
@@ -138,20 +142,22 @@ module type LexerSig = sig
     exception E of t
     val to_string : t -> string
   end
-  type comments_state
-  val default_comments_state : comments_state
-  val comments_state : unit -> comments_state
-  val restore_comments_state : comments_state -> unit
+  type lexer_state
+  val init_lexer_state : string option -> lexer_state
+  val set_lexer_state : lexer_state -> unit
+  val release_lexer_state : unit -> lexer_state
+  val drop_lexer_state : unit -> unit
 end
 
 ELSE
 
 module type LexerSig = sig
   include Camlp4.Sig.Lexer with module Loc = CompatLoc and type Token.t = Tok.t
-  type comments_state
-  val default_comments_state : comments_state
-  val comments_state : unit -> comments_state
-  val restore_comments_state : comments_state -> unit
+  type lexer_state
+  val init_lexer_state : string option -> lexer_state
+  val set_lexer_state : lexer_state -> unit
+  val release_lexer_state : unit -> lexer_state
+  val drop_lexer_state : unit -> unit
 end
 
 END
@@ -172,7 +178,7 @@ module type GrammarSig = sig
   type extend_statment =
       Gramext.position option * single_extend_statment list
   type coq_parsable
-  val parsable : char Stream.t -> coq_parsable
+  val parsable : ?file:string -> char Stream.t -> coq_parsable
   val action : 'a -> action
   val entry_create : string -> 'a entry
   val entry_parse : 'a entry -> coq_parsable -> 'a
@@ -193,32 +199,34 @@ module GrammarMake (L:LexerSig) : GrammarSig = struct
       string option * Gramext.g_assoc option * production_rule list
   type extend_statment =
       Gramext.position option * single_extend_statment list
-  type coq_parsable = parsable * L.comments_state ref
-  let parsable c =
-    let state = ref L.default_comments_state in (parsable c, state)
+  type coq_parsable = parsable * L.lexer_state ref
+  let parsable ?file c =
+    let state = ref (L.init_lexer_state file) in
+    L.set_lexer_state !state;
+    let a = parsable c in
+    state := L.release_lexer_state ();
+    (a,state)
   let action = Gramext.action
   let entry_create = Entry.create
   let entry_parse e (p,state) =
-    L.restore_comments_state !state;
+    L.set_lexer_state !state;
     try
       let c = Entry.parse e p in
-      state := L.comments_state ();
-      L.restore_comments_state L.default_comments_state;
+      state := L.release_lexer_state ();
       c
     with Exc_located (loc,e) ->
-      L.restore_comments_state L.default_comments_state;
+      L.drop_lexer_state ();
       let loc' = Loc.get_loc (Exninfo.info e) in
       let loc = match loc' with None -> to_coqloc loc | Some loc -> loc in
       Loc.raise loc e
   let with_parsable (p,state) f x =
-    L.restore_comments_state !state;
+    L.set_lexer_state !state;
     try
       let a = f x in
-      state := L.comments_state ();
-      L.restore_comments_state L.default_comments_state;
+      state := L.release_lexer_state ();
       a
     with e ->
-      L.restore_comments_state L.default_comments_state;
+      L.drop_lexer_state ();
       raise e
 
   let entry_print ft x = Entry.print ft x
@@ -234,7 +242,7 @@ module type GrammarSig = sig
   type 'a entry = 'a Entry.t
   type action = Action.t
   type coq_parsable
-  val parsable : char Stream.t -> coq_parsable
+  val parsable : ?file:string -> char Stream.t -> coq_parsable
   val action : 'a -> action
   val entry_create : string -> 'a entry
   val entry_parse : 'a entry -> coq_parsable -> 'a
@@ -249,31 +257,28 @@ module GrammarMake (L:LexerSig) : GrammarSig = struct
   include Camlp4.Struct.Grammar.Static.Make (L)
   type 'a entry = 'a Entry.t
   type action = Action.t
-  type comments_state = int option * string * bool * ((int * int) * string) list
-  type coq_parsable = char Stream.t * L.comments_state ref
-  let parsable s = let state = ref L.default_comments_state in (s, state)
+  type coq_parsable = char Stream.t * L.lexer_state ref
+  let parsable ?file s = let state = ref (L.init_lexer_state file) in (s, state)
   let action = Action.mk
   let entry_create = Entry.mk
   let entry_parse e (s,state) =
-    L.restore_comments_state !state;
+    L.set_lexer_state !state;
     try
       let c = parse e (*FIXME*)CompatLoc.ghost s in
-      state := L.comments_state ();
-      L.restore_comments_state L.default_comments_state;
+      state := L.release_lexer_state ();
       c
     with Exc_located (loc,e) ->
-      L.restore_comments_state L.default_comments_state;
-      raise_coq_loc loc e
+      L.drop_lexer_state ();
+      raise_coq_loc loc e;;
   let with_parsable (p,state) f x =
-    L.restore_comments_state !state;
+    L.set_lexer_state !state;
     try
       let a = f x in
-      state := L.comments_state ();
-      L.restore_comments_state L.default_comments_state;
+      state := L.release_lexer_state ();
       a
     with e ->
-      L.restore_comments_state L.default_comments_state;
-      Pervasives.raise e
+      L.drop_lexer_state ();
+      Pervasives.raise e;;
   let entry_print ft x = Entry.print ft x
   let srules' = srules (entry_create "dummy")
 end
diff --git a/toplevel/vernac.ml b/toplevel/vernac.ml
index 661a597ae..f03f31178 100644
--- a/toplevel/vernac.ml
+++ b/toplevel/vernac.ml
@@ -81,7 +81,7 @@ let open_file_twice_if verbosely longfname =
   let in_chan = open_utf8_file_in longfname in
   let verb_ch =
     if verbosely then Some (open_utf8_file_in longfname) else None in
-  let po = Pcoq.Gram.parsable (Stream.of_channel in_chan) in
+  let po = Pcoq.Gram.parsable ~file:longfname (Stream.of_channel in_chan) in
   (in_chan, longfname, (po, verb_ch))
 
 let close_input in_chan (_,verb) =
@@ -145,13 +145,6 @@ let pr_new_syntax (po,_) loc ocom =
   (* Reinstall the context of parsing which includes the bindings of comments to locations *)
   Pcoq.Gram.with_parsable po (pr_new_syntax_in_context loc) ocom
 
-let save_translator () =
-  !chan_beautify
-
-let restore_translator ch =
-  if !Flags.beautify_file then close_out !chan_beautify;
-  chan_beautify := ch
-
 (* For coqtop -time, we display the position in the file,
    and a glimpse of the executed command *)
 
@@ -186,22 +179,17 @@ let rec vernac_com input checknav (loc,com) =
 	let fname = Envars.expand_path_macros ~warn:(fun x -> Feedback.msg_warning (str x)) fname in
         let fname = CUnix.make_suffix fname ".v" in
         let f = Loadpath.locate_file fname in
-	let st = save_translator () in
-        let old_lexer_file = CLexer.get_current_file () in
-        CLexer.set_current_file f;
-	if !Flags.beautify_file then
-	  begin
-	    chan_beautify := open_out (f^beautify_suffix);
-          end;
+	let ch = !chan_beautify in
+	if !Flags.beautify_file then chan_beautify := open_out (f^beautify_suffix);
 	begin
 	  try
             Flags.silently (read_vernac_file verbosely) f;
-	    restore_translator st;
-            CLexer.set_current_file old_lexer_file;
+            if !Flags.beautify_file then close_out !chan_beautify;
+            chan_beautify := ch;
 	  with reraise ->
             let reraise = CErrors.push reraise in
-	    restore_translator st;
-            CLexer.set_current_file old_lexer_file;
+            if !Flags.beautify_file then close_out !chan_beautify;
+            chan_beautify := ch;
 	    iraise reraise
 	end
 
@@ -269,16 +257,12 @@ let (f_xml_end_library, xml_end_library) = Hook.make ~default:ignore ()
 let load_vernac verb file =
   chan_beautify :=
     if !Flags.beautify_file then open_out (file^beautify_suffix) else stdout;
-    let old_lexer_file = CLexer.get_current_file () in
   try
-    CLexer.set_current_file file;
     Flags.silently (read_vernac_file verb) file;
     if !Flags.beautify_file then close_out !chan_beautify;
-    CLexer.set_current_file old_lexer_file;
   with any ->
     let (e, info) = CErrors.push any in
     if !Flags.beautify_file then close_out !chan_beautify;
-    CLexer.set_current_file old_lexer_file;
     iraise (disable_drop e, info)
 
 let warn_file_no_extension =
diff --git a/toplevel/vernacentries.ml b/toplevel/vernacentries.ml
index feec23b50..df83f7685 100644
--- a/toplevel/vernacentries.ml
+++ b/toplevel/vernacentries.ml
@@ -1813,7 +1813,7 @@ let vernac_load interp fname =
   let input =
     let longfname = Loadpath.locate_file fname in
     let in_chan = open_utf8_file_in longfname in
-    Pcoq.Gram.parsable (Stream.of_channel in_chan) in
+    Pcoq.Gram.parsable ~file:longfname (Stream.of_channel in_chan) in
   try while true do interp (snd (parse_sentence input)) done
   with End_of_input -> ()
 
-- 
cgit v1.2.3


From b51eac830d2be726db06ae6d2539a81b41e90677 Mon Sep 17 00:00:00 2001
From: Emilio Jesus Gallego Arias <e+git@x80.org>
Date: Wed, 12 Oct 2016 16:58:04 +0200
Subject: [toplevel] Remove duplicate beautify flags.

Given the current style in flags.mli no reason to have a function.

A deeper question is why a global flag is needed, in particular the use
in `interp/constrextern.ml` seems strange, the condition in the lexer
should be looked at and I'm not sure about `printing/`.
---
 lib/flags.ml         | 2 --
 lib/flags.mli        | 2 --
 parsing/cLexer.ml4   | 4 ++--
 printing/ppconstr.ml | 2 +-
 printing/pputils.ml  | 2 +-
 toplevel/coqtop.ml   | 6 +++---
 toplevel/vernac.ml   | 4 ++--
 7 files changed, 9 insertions(+), 13 deletions(-)

(limited to 'parsing/cLexer.ml4')

diff --git a/lib/flags.ml b/lib/flags.ml
index 65873e521..35681804f 100644
--- a/lib/flags.ml
+++ b/lib/flags.ml
@@ -139,8 +139,6 @@ let pr_version = function
 
 (* Translate *)
 let beautify = ref false
-let make_beautify f = beautify := f
-let do_beautify () = !beautify
 let beautify_file = ref false
 
 (* Silent / Verbose *)
diff --git a/lib/flags.mli b/lib/flags.mli
index 9dc0c9c04..897602641 100644
--- a/lib/flags.mli
+++ b/lib/flags.mli
@@ -70,8 +70,6 @@ val version_less_or_equal : compat_version -> bool
 val pr_version : compat_version -> string
 
 val beautify : bool ref
-val make_beautify : bool -> unit
-val do_beautify : unit -> bool
 val beautify_file : bool ref
 
 val make_silent : bool -> unit
diff --git a/parsing/cLexer.ml4 b/parsing/cLexer.ml4
index cfee77ab2..e59b9630f 100644
--- a/parsing/cLexer.ml4
+++ b/parsing/cLexer.ml4
@@ -390,7 +390,7 @@ let comment_stop ep =
   if !Flags.xml_export && Buffer.length current_comment > 0 &&
     (!between_commands || not(null_comment current_s)) then
       Hook.get f_xml_output_comment current_s;
-  (if Flags.do_beautify() && Buffer.length current_comment > 0 &&
+  (if !Flags.beautify && Buffer.length current_comment > 0 &&
     (!between_commands || not(null_comment current_s)) then
     let bp = match !comment_begin with
         Some bp -> bp
@@ -437,7 +437,7 @@ let rec comment loc bp = parser bp2
       let loc =
 	(* In beautify mode, the lexing differs between strings in comments and
 	   regular strings (e.g. escaping). It seems wrong. *)
-	if Flags.do_beautify() then (push_string"\""; comm_string loc bp2 s)
+	if !Flags.beautify then (push_string"\""; comm_string loc bp2 s)
 	else fst (string loc ~comm_level:(Some 0) bp2 0 s)
       in
       comment loc bp s
diff --git a/printing/ppconstr.ml b/printing/ppconstr.ml
index c94650f1e..aa94fb7be 100644
--- a/printing/ppconstr.ml
+++ b/printing/ppconstr.ml
@@ -129,7 +129,7 @@ end) = struct
     str "`" ++ str hd ++ c ++ str tl
 
   let pr_com_at n =
-    if Flags.do_beautify() && not (Int.equal n 0) then comment (CLexer.extract_comments n)
+    if !Flags.beautify && not (Int.equal n 0) then comment (CLexer.extract_comments n)
     else mt()
 
   let pr_with_comments loc pp = pr_located (fun x -> x) (loc,pp)
diff --git a/printing/pputils.ml b/printing/pputils.ml
index 57a1d957e..50ce56fb0 100644
--- a/printing/pputils.ml
+++ b/printing/pputils.ml
@@ -9,7 +9,7 @@
 open Pp
 
 let pr_located pr (loc, x) =
-  if Flags.do_beautify () && loc <> Loc.ghost then
+  if !Flags.beautify && loc <> Loc.ghost then
     let (b, e) = Loc.unloc loc in
     (* Side-effect: order matters *)
     let before = Pp.comment (CLexer.extract_comments b) in
diff --git a/toplevel/coqtop.ml b/toplevel/coqtop.ml
index 26ef68262..4fd2b0e92 100644
--- a/toplevel/coqtop.ml
+++ b/toplevel/coqtop.ml
@@ -168,7 +168,7 @@ let load_vernacular () =
   List.iter
     (fun (s,b) ->
       let s = Loadpath.locate_file s in
-      if Flags.do_beautify () then
+      if !Flags.beautify then
 	with_option beautify_file (Vernac.load_vernac b) s
       else
 	Vernac.load_vernac b s)
@@ -219,7 +219,7 @@ let add_compile verbose s =
   compile_list := (verbose,s) :: !compile_list
 
 let compile_file (v,f) =
-  if Flags.do_beautify () then
+  if !Flags.beautify then
     with_option beautify_file (Vernac.compile v) f
   else
     Vernac.compile v f
@@ -536,7 +536,7 @@ let parse_args arglist =
         Flags.async_proofs_never_reopen_branch := true;
     |"-batch" -> set_batch_mode ()
     |"-test-mode" -> test_mode := true
-    |"-beautify" -> make_beautify true
+    |"-beautify" -> beautify := true
     |"-boot" -> boot := true; no_load_rc ()
     |"-bt" -> Backtrace.record_backtrace true
     |"-color" -> set_color (next ())
diff --git a/toplevel/vernac.ml b/toplevel/vernac.ml
index 54b2ac3c1..8230f92a6 100644
--- a/toplevel/vernac.ml
+++ b/toplevel/vernac.ml
@@ -199,7 +199,7 @@ let rec interp_vernac po chan_beautify checknav (loc,com) =
   in
     try
       checknav loc com;
-      if do_beautify () then pr_new_syntax po chan_beautify loc (Some com);
+      if !beautify then pr_new_syntax po chan_beautify loc (Some com);
       (* XXX: This is not 100% correct if called from an IDE context *)
       if !Flags.time then print_cmd_header loc com;
       let com = if !Flags.time then VernacTime (loc,com) else com in
@@ -228,7 +228,7 @@ and load_vernac verbosely file =
     close_input in_chan input;    (* we must close the file first *)
     match e with
       | End_of_input ->
-          if do_beautify () then
+          if !beautify then
             pr_new_syntax (fst input) chan_beautify (Loc.make_loc (max_int,max_int)) None;
           if !Flags.beautify_file then close_out chan_beautify;
       | reraise ->
-- 
cgit v1.2.3


From 57c6ffd23836364168ffd1c66dbddbecf830c7c6 Mon Sep 17 00:00:00 2001
From: Hugo Herbelin <Hugo.Herbelin@inria.fr>
Date: Thu, 13 Oct 2016 15:57:14 +0200
Subject: Stopping warning on unrecognized unicode character in notation
 (fixing #5136).
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The warning was pointless since the notation was accepted and parsed
anyway.

We now treat unrecognized unicode characters like ordinary
undefined tokens (e.g. "#" in a bare Coq).

For instance, "aₚ", or ".ₚ", or "?ₚ" now fail with "Undefined token"
rather than "Unsupported Unicode character".
---
 lib/unicode.ml     |  8 +++-----
 lib/unicode.mli    | 12 +++---------
 parsing/cLexer.ml4 | 26 ++++----------------------
 3 files changed, 10 insertions(+), 36 deletions(-)

(limited to 'parsing/cLexer.ml4')

diff --git a/lib/unicode.ml b/lib/unicode.ml
index dc852d981..ced5e258c 100644
--- a/lib/unicode.ml
+++ b/lib/unicode.ml
@@ -8,9 +8,7 @@
 
 (** Unicode utilities *)
 
-type status = Letter | IdentPart | Symbol
-
-exception Unsupported
+type status = Letter | IdentPart | Symbol | Unknown
 
 (* The following table stores classes of Unicode characters that
    are used by the lexer. There are 3 different classes so 2 bits are
@@ -29,6 +27,7 @@ let mask i = function
   | Letter    -> 1 lsl ((i land 7) lsl 1) (* 01 *)
   | IdentPart -> 2 lsl ((i land 7) lsl 1) (* 10 *)
   | Symbol    -> 3 lsl ((i land 7) lsl 1) (* 11 *)
+  | Unknown   -> 0 lsl ((i land 7) lsl 1) (* 00 *)
 
 (* Helper to reset 2 bits in a word. *)
 let reset_mask i =
@@ -55,7 +54,7 @@ let lookup x =
     if      v = 1 then Letter
     else if v = 2 then IdentPart
     else if v = 3 then Symbol
-    else raise Unsupported
+    else Unknown
 
 (* [classify] discriminates between 3 different kinds of
    symbols based on the standard unicode classification (extracted from
@@ -215,7 +214,6 @@ let ident_refutation s =
         |x -> x
   with
   | End_of_input -> Some (true,"The empty string is not an identifier.")
-  | Unsupported -> Some (true,s^": unsupported character in utf8 sequence.")
   | Invalid_argument _ -> Some (true,s^": invalid utf8 sequence.")
 
 let lowercase_unicode =
diff --git a/lib/unicode.mli b/lib/unicode.mli
index 1f8bd44ee..2609e1968 100644
--- a/lib/unicode.mli
+++ b/lib/unicode.mli
@@ -8,22 +8,16 @@
 
 (** Unicode utilities *)
 
-type status = Letter | IdentPart | Symbol
+type status = Letter | IdentPart | Symbol | Unknown
 
-(** This exception is raised when UTF-8 the input string contains unsupported UTF-8 characters. *)
-exception Unsupported
-
-(** Classify a unicode char into 3 classes.
-    @raise Unsupported if the input string contains unsupported UTF-8 characters. *)
+(** Classify a unicode char into 3 classes or unknown. *)
 val classify : int -> status
 
 (** Return [None] if a given string can be used as a (Coq) identifier.
-    Return [Some (b,s)] otherwise, where [s] is an explanation and [b] is severity.
-    @raise Unsupported if the input string contains unsupported UTF-8 characters. *)
+    Return [Some (b,s)] otherwise, where [s] is an explanation and [b] is severity. *)
 val ident_refutation : string -> (bool * string) option
 
 (** First char of a string, converted to lowercase
-    @raise Unsupported if the input string contains unsupported UTF-8 characters.
     @raise Assert_failure if the input string is empty. *)
 val lowercase_first_char : string -> string
 
diff --git a/parsing/cLexer.ml4 b/parsing/cLexer.ml4
index e59b9630f..6a343f50e 100644
--- a/parsing/cLexer.ml4
+++ b/parsing/cLexer.ml4
@@ -112,11 +112,6 @@ type token_kind =
   | AsciiChar
   | EmptyStream
 
-let error_unsupported_unicode_character loc n unicode cs =
-  let bp = Stream.count cs in
-  let loc = set_loc_pos loc bp (bp+n) in
-  err loc (UnsupportedUnicode unicode)
-
 let error_utf8 loc cs =
   let bp = Stream.count cs in
   Stream.junk cs; (* consume the char to avoid read it and fail again *)
@@ -166,14 +161,12 @@ let lookup_utf8_tail loc c cs =
           (Char.code c3 land 0x3F) lsl 6 + (Char.code c4 land 0x3F)
       | _ -> error_utf8 loc cs
     in
-    try Unicode.classify unicode, n
-    with Unicode.Unsupported ->
-      njunk n cs; error_unsupported_unicode_character loc n unicode cs
+    Utf8Token (Unicode.classify unicode, n)
 
 let lookup_utf8 loc cs =
   match Stream.peek cs with
     | Some ('\x00'..'\x7F') -> AsciiChar
-    | Some ('\x80'..'\xFF' as c) -> Utf8Token (lookup_utf8_tail loc c cs)
+    | Some ('\x80'..'\xFF' as c) -> lookup_utf8_tail loc c cs
     | None -> EmptyStream
 
 let unlocated f x = f x
@@ -191,17 +184,6 @@ let check_keyword str =
   in
   loop_symb (Stream.of_string str)
 
-let warn_unparsable_keyword =
-  CWarnings.create ~name:"unparsable-keyword" ~category:"parsing"
-         (fun (s,unicode) ->
-          strbrk (Printf.sprintf "Token '%s' contains unicode character 0x%x \
-                                  which will not be parsable." s unicode))
-            
-let check_keyword_to_add s =
-  try check_keyword s
-  with Error.E (UnsupportedUnicode unicode) ->
-    warn_unparsable_keyword (s,unicode)
-
 let check_ident str =
   let rec loop_id intail = parser
     | [< ' ('a'..'z' | 'A'..'Z' | '_'); s >] ->
@@ -232,7 +214,7 @@ let is_keyword s =
 let add_keyword str =
   if not (is_keyword str) then
     begin
-      check_keyword_to_add str;
+      check_keyword str;
       token_tree := ttree_add !token_tree str
     end
 
@@ -599,7 +581,7 @@ let rec next_token loc = parser bp
             let ep = Stream.count s in
             comment_stop bp;
             (try find_keyword loc id s with Not_found -> IDENT id), set_loc_pos loc bp ep
-        | AsciiChar | Utf8Token ((Unicode.Symbol | Unicode.IdentPart), _) ->
+        | AsciiChar | Utf8Token ((Unicode.Symbol | Unicode.IdentPart | Unicode.Unknown), _) ->
             let t = process_chars loc bp (Stream.next s) s in
             let new_between_commands = match t with
               (KEYWORD ("{"|"}"),_) -> !between_commands | _ -> false in
-- 
cgit v1.2.3


From 81ee9f1cb152a82cc4c116dd47294f2ae6eee0ed Mon Sep 17 00:00:00 2001
From: Hugo Herbelin <Hugo.Herbelin@inria.fr>
Date: Thu, 13 Oct 2016 16:21:40 +0200
Subject: Fixing a few other inconsistencies with notations.

`Notation ".a" := nat.' was accepted and used for printing but not
recognized in parsing. Now it does. Other examples in test-suite.
---
 parsing/cLexer.ml4               | 21 ++++++++++-----------
 test-suite/output/Notations2.out | 24 ++++++++++++++++++++++++
 test-suite/output/Notations2.v   | 29 +++++++++++++++++++++++++++++
 3 files changed, 63 insertions(+), 11 deletions(-)

(limited to 'parsing/cLexer.ml4')

diff --git a/parsing/cLexer.ml4 b/parsing/cLexer.ml4
index 6a343f50e..740578aad 100644
--- a/parsing/cLexer.ml4
+++ b/parsing/cLexer.ml4
@@ -389,7 +389,7 @@ let comment_stop ep =
 
 (* Does not unescape!!! *)
 let rec comm_string loc bp = parser
-  | [< ''"' >] ep -> push_string "\""; loc
+  | [< ''"' >] -> push_string "\""; loc
   | [< ''\\'; loc =
            (parser [< ' ('"' | '\\' as c) >] ->
               let () = match c with
@@ -492,20 +492,19 @@ let process_chars loc bp c cs =
 	let loc = set_loc_pos loc bp ep' in
         err loc Undefined_token
 
-let token_of_special c s = match c with
-  | '.' -> FIELD s
-  | _ -> assert false
+(* Parse what follows a dot *)
 
-(* Parse what follows a dot / a dollar *)
-
-let parse_after_special loc c bp =
+let parse_after_dot loc c bp =
   parser
-  | [< ' ('a'..'z' | 'A'..'Z' | '_' as d); len = ident_tail loc (store 0 d) >] ->
-      token_of_special c (get_buff len)
+  | [< ' ('a'..'z' | 'A'..'Z' | '_' as d); len = ident_tail loc (store 0 d); s >] ->
+      let field = get_buff len in
+      (try find_keyword loc ("."^field) s with Not_found -> FIELD field)
   | [< s >] ->
       match lookup_utf8 loc s with
       | Utf8Token (Unicode.Letter, n) ->
-          token_of_special c (get_buff (ident_tail loc (nstore n 0 s) s))
+          let len = ident_tail loc (nstore n 0 s) s in
+          let field = get_buff len in
+          (try find_keyword loc ("."^field) s with Not_found -> FIELD field)
       | AsciiChar | Utf8Token _ | EmptyStream -> fst (process_chars loc bp c s)
 
 (* Parse what follows a question mark *)
@@ -533,7 +532,7 @@ let rec next_token loc = parser bp
       comm_loc bp; push_char c; next_token (bump_loc_line loc ep) s
   | [< '' ' | '\t' | '\r' as c; s >] ->
       comm_loc bp; push_char c; next_token loc s
-  | [< ''.' as c; t = parse_after_special loc c bp; s >] ep ->
+  | [< ''.' as c; t = parse_after_dot loc c bp; s >] ep ->
       comment_stop bp;
       (* We enforce that "." should either be part of a larger keyword,
          for instance ".(", or followed by a blank or eof. *)
diff --git a/test-suite/output/Notations2.out b/test-suite/output/Notations2.out
index 5541ccf57..ad60aeccc 100644
--- a/test-suite/output/Notations2.out
+++ b/test-suite/output/Notations2.out
@@ -60,3 +60,27 @@ exist (Q x) y conj
      : nat -> nat
 {1, 2}
      : nat -> Prop
+a#
+     : Set
+a#
+     : Set
+a≡
+     : Set
+a≡
+     : Set
+.≡
+     : Set
+.≡
+     : Set
+.a#
+     : Set
+.a#
+     : Set
+.a≡
+     : Set
+.a≡
+     : Set
+.α
+     : Set
+.α
+     : Set
diff --git a/test-suite/output/Notations2.v b/test-suite/output/Notations2.v
index 1d8278c08..ceb29d1b9 100644
--- a/test-suite/output/Notations2.v
+++ b/test-suite/output/Notations2.v
@@ -116,3 +116,32 @@ Check %j.
 
 Notation "{ x , y , .. , v }" := (fun a => (or .. (or (a = x) (a = y)) .. (a = v))).
 Check ({1, 2}).
+
+(**********************************************************************)
+(* Check notations of the form ".a", ".a≡", "a≡"                      *)
+(* Only "a#", "a≡" and ".≡" were working properly for parsing. The    *)
+(* other ones were working only for printing.                         *)
+
+Notation "a#" := nat.
+Check nat.
+Check a#.
+
+Notation "a≡" := nat.
+Check nat.
+Check a≡.
+
+Notation ".≡" := nat.
+Check nat.
+Check .≡.
+
+Notation ".a#" := nat.
+Check nat.
+Check .a#.
+
+Notation ".a≡" := nat.
+Check nat.
+Check .a≡.
+
+Notation ".α" := nat.
+Check nat.
+Check .α.
-- 
cgit v1.2.3


From 561349466556f02b8d2e1cb8f2b846c188243bf9 Mon Sep 17 00:00:00 2001
From: Hugo Herbelin <Hugo.Herbelin@inria.fr>
Date: Thu, 13 Oct 2016 18:05:12 +0200
Subject: Extra warning about unicode character of unknown status following an
 ident. This covers the case e.g. of "xₚ" (until the table of unicode
 characters is upgraded!).
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 parsing/cLexer.ml4 | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'parsing/cLexer.ml4')

diff --git a/parsing/cLexer.ml4 b/parsing/cLexer.ml4
index 740578aad..31025075c 100644
--- a/parsing/cLexer.ml4
+++ b/parsing/cLexer.ml4
@@ -244,6 +244,12 @@ let get_buff len = String.sub !buff 0 len
 
 (* The classical lexer: idents, numbers, quoted strings, comments *)
 
+let warn_unrecognized_unicode =
+  CWarnings.create ~name:"unrecognized-unicode" ~category:"parsing"
+         (fun (u,id) ->
+          strbrk (Printf.sprintf "Not considering unicode character \"%s\" of unknown \
+                                  lexical status as part of identifier \"%s\"." u id))
+
 let rec ident_tail loc len = parser
   | [< ' ('a'..'z' | 'A'..'Z' | '0'..'9' | ''' | '_' as c); s >] ->
       ident_tail loc (store len c) s
@@ -251,6 +257,10 @@ let rec ident_tail loc len = parser
       match lookup_utf8 loc s with
       | Utf8Token ((Unicode.IdentPart | Unicode.Letter), n) ->
           ident_tail loc (nstore n len s) s
+      | Utf8Token (Unicode.Unknown, n) ->
+          let id = get_buff len in
+          let u = String.concat "" (List.map (String.make 1) (Stream.npeek n s)) in
+          warn_unrecognized_unicode ~loc:!@loc (u,id); len
       | _ -> len
 
 let rec number len = parser
-- 
cgit v1.2.3