[camlpX] Remove camlp4 compat layer.

We remove the camlp4 compatibility layer, and try to clean up most structures. `parsing/compat` is gone. We added some documentation to the lexer/parser interfaces that are often obscured by module includes.
author: Emilio Jesus Gallego Arias <e+git@x80.org> 2017-03-08 03:22:22 +0100
committer: Emilio Jesus Gallego Arias <e+git@x80.org> 2017-04-07 02:55:41 +0200
commit: 1d0eb5d4d6fea88abc29798ee2004b2e27e952c6 (patch)
tree: 24b4e369c4acbe2bb9c9ca79b84fc7ddff34e2d8 /parsing/cLexer.ml4
parent: fee2365f13900b4d4f4b88c986cbbf94403eeefa (diff)
1 files changed, 61 insertions, 47 deletions
diff --git a/parsing/cLexer.ml4 b/parsing/cLexer.ml4
index 3b84eaa81..6d259e1b1 100644
--- a/parsing/cLexer.ml4
+++ b/parsing/cLexer.ml4
@@ -9,7 +9,17 @@
 open Pp
 open Util
 open Tok
-open Compat
+
+let to_coqloc loc =
+  { Loc.fname = Ploc.file_name loc;
+    Loc.line_nb = Ploc.line_nb loc;
+    Loc.bol_pos = Ploc.bol_pos loc;
+    Loc.bp = Ploc.first_pos loc;
+    Loc.ep = Ploc.last_pos loc;
+    Loc.line_nb_last = Ploc.line_nb_last loc;
+    Loc.bol_pos_last = Ploc.bol_pos_last loc; }
+
+let (!@) = to_coqloc
 
 (* Dictionaries: trees annotated with string options, each node being a map
    from chars to dictionaries (the subtrees). A trie, in other words. *)
@@ -110,11 +120,52 @@ module Error = struct
 end
 open Error
 
-let err loc str = Loc.raise ~loc:(Compat.to_coqloc loc) (Error.E str)
+let err loc str = Loc.raise ~loc:(to_coqloc loc) (Error.E str)
 
 let bad_token str = raise (Error.E (Bad_token str))
 
-(* Lexer conventions on tokens *)
+(** Location utilities  *)
+let file_loc_of_file = function
+| None -> ""
+| Some f -> f
+
+let make_loc fname line_nb bol_pos bp ep =
+  Ploc.make_loc (file_loc_of_file fname) line_nb bol_pos (bp, ep) ""
+
+(* Update a loc without allocating an intermediate pair *)
+let set_loc_pos loc bp ep =
+  Ploc.sub loc (bp - Ploc.first_pos loc) (ep - bp)
+
+(* Increase line number by 1 and update position of beginning of line *)
+let bump_loc_line loc bol_pos =
+  Ploc.make_loc (Ploc.file_name loc) (Ploc.line_nb loc + 1) bol_pos
+		(Ploc.first_pos loc, Ploc.last_pos loc) (Ploc.comment loc)
+
+(* Same as [bump_loc_line], but for the last line in location *)
+(* For an obscure reason, camlp5 does not give an easy way to set line_nb_stop,
+   so we have to resort to a hack merging two locations. *)
+(* Warning: [bump_loc_line_last] changes the end position. You may need to call
+   [set_loc_pos] to fix it. *)
+let bump_loc_line_last loc bol_pos =
+  let loc' =
+    Ploc.make_loc (Ploc.file_name loc) (Ploc.line_nb_last loc + 1) bol_pos
+		  (Ploc.first_pos loc + 1, Ploc.last_pos loc + 1) (Ploc.comment loc)
+  in
+  Ploc.encl loc loc'
+
+let set_loc_file loc fname =
+  Ploc.make_loc fname (Ploc.line_nb loc) (Ploc.bol_pos loc)
+		(Ploc.first_pos loc, Ploc.last_pos loc) (Ploc.comment loc)
+
+(* For some reason, the [Ploc.after] function of Camlp5 does not update line
+   numbers, so we define our own function that does it. *)
+let after loc =
+  let line_nb = Ploc.line_nb_last loc in
+  let bol_pos = Ploc.bol_pos_last loc in
+  Ploc.make_loc (Ploc.file_name loc) line_nb bol_pos
+		(Ploc.last_pos loc, Ploc.last_pos loc) (Ploc.comment loc)
+
+(** Lexer conventions on tokens *)
 
 type token_kind =
   | Utf8Token of (Unicode.status * int)
@@ -186,7 +237,7 @@ let check_keyword str =
   let rec loop_symb = parser
     | [< ' (' ' | '\n' | '\r' | '\t' | '"') >] -> bad_token str
     | [< s >] ->
-        match unlocated lookup_utf8 Compat.CompatLoc.ghost s with
+        match unlocated lookup_utf8 Ploc.dummy s with
         | Utf8Token (_,n) -> njunk n s; loop_symb s
         | AsciiChar -> Stream.junk s; loop_symb s
         | EmptyStream -> ()
@@ -200,7 +251,7 @@ let check_ident str =
     | [< ' ('0'..'9' | ''') when intail; s >] ->
         loop_id true s
     | [< s >] ->
-        match unlocated lookup_utf8 Compat.CompatLoc.ghost s with
+        match unlocated lookup_utf8 Ploc.dummy s with
         | Utf8Token (Unicode.Letter, n) -> njunk n s; loop_id true s
         | Utf8Token (Unicode.IdentPart, n) when intail ->
           njunk n s;
@@ -233,10 +284,10 @@ let remove_keyword str =
 let keywords () = ttree_elements !token_tree
 
 (* Freeze and unfreeze the state of the lexer *)
-type frozen_t = ttree
+type keyword_state = ttree
 
-let freeze () = !token_tree
-let unfreeze tt = (token_tree := tt)
+let get_keyword_state () = !token_tree
+let set_keyword_state tt = (token_tree := tt)
 
 (* The string buffering machinery *)
 
@@ -621,8 +672,6 @@ let loct_add loct i loc = Hashtbl.add loct i loc
    we unfreeze the state of the lexer. This restores the behaviour of the
    lexer. B.B. *)
 
-IFDEF CAMLP5 THEN
-
 type te = Tok.t
 
 (** Names of tokens, for this lexer, used in Grammar error messages *)
@@ -640,12 +689,12 @@ let token_text = function
 
 let func cs =
   let loct = loct_create () in
-  let cur_loc = ref (Compat.make_loc !current_file 1 0 0 0) in
+  let cur_loc = ref (make_loc !current_file 1 0 0 0) in
   let ts =
     Stream.from
       (fun i ->
          let (tok, loc) = next_token !cur_loc cs in
-	 cur_loc := Compat.after loc;
+	 cur_loc := after loc;
          loct_add loct i loc; Some tok)
   in
   (ts, loct_func loct)
@@ -661,41 +710,6 @@ let lexer = {
   Token.tok_comm = None;
   Token.tok_text = token_text }
 
-ELSE (* official camlp4 for ocaml >= 3.10 *)
-
-module M_ = Camlp4.ErrorHandler.Register (Error)
-
-module Loc = Compat.CompatLoc
-module Token = struct
-  include Tok (* Cf. tok.ml *)
-  module Loc = Compat.CompatLoc
-  module Error = Camlp4.Struct.EmptyError
-  module Filter = struct
-    type token_filter = (Tok.t * Loc.t) Stream.t -> (Tok.t * Loc.t) Stream.t
-    type t = unit
-    let mk _is_kwd = ()
-    let keyword_added () kwd _ = add_keyword kwd
-    let keyword_removed () _ = ()
-    let filter () x = x
-    let define_filter () _ = ()
-  end
-end
-
-let mk () =
-  let loct = loct_create () in
-  let cur_loc = ref (Compat.make_loc !current_file 1 0 0 0) in
-  let rec self init_loc (* FIXME *) =
-    parser i
-       [< (tok, loc) = next_token !cur_loc; s >] ->
-            cur_loc := Compat.set_loc_file loc !current_file;
-	    loct_add loct i loc;
-            [< '(tok, loc); self init_loc s >]
-      | [< >] -> [< >]
-  in
-  self
-
-END
-
 (** Terminal symbols interpretation *)
 
 let is_ident_not_keyword s =
author	Emilio Jesus Gallego Arias <e+git@x80.org>	2017-03-08 03:22:22 +0100
committer	Emilio Jesus Gallego Arias <e+git@x80.org>	2017-04-07 02:55:41 +0200
commit	1d0eb5d4d6fea88abc29798ee2004b2e27e952c6 (patch)
tree	24b4e369c4acbe2bb9c9ca79b84fc7ddff34e2d8 /parsing/cLexer.ml4
parent	fee2365f13900b4d4f4b88c986cbbf94403eeefa (diff)