aboutsummaryrefslogtreecommitdiffhomepage
path: root/parsing/cLexer.ml4
diff options
context:
space:
mode:
authorGravatar Emilio Jesus Gallego Arias <e+git@x80.org>2017-03-08 03:22:22 +0100
committerGravatar Emilio Jesus Gallego Arias <e+git@x80.org>2017-04-07 02:55:41 +0200
commit1d0eb5d4d6fea88abc29798ee2004b2e27e952c6 (patch)
tree24b4e369c4acbe2bb9c9ca79b84fc7ddff34e2d8 /parsing/cLexer.ml4
parentfee2365f13900b4d4f4b88c986cbbf94403eeefa (diff)
[camlpX] Remove camlp4 compat layer.
We remove the camlp4 compatibility layer, and try to clean up most structures. `parsing/compat` is gone. We added some documentation to the lexer/parser interfaces that are often obscured by module includes.
Diffstat (limited to 'parsing/cLexer.ml4')
-rw-r--r--parsing/cLexer.ml4108
1 files changed, 61 insertions, 47 deletions
diff --git a/parsing/cLexer.ml4 b/parsing/cLexer.ml4
index 3b84eaa81..6d259e1b1 100644
--- a/parsing/cLexer.ml4
+++ b/parsing/cLexer.ml4
@@ -9,7 +9,17 @@
open Pp
open Util
open Tok
-open Compat
+
+let to_coqloc loc =
+ { Loc.fname = Ploc.file_name loc;
+ Loc.line_nb = Ploc.line_nb loc;
+ Loc.bol_pos = Ploc.bol_pos loc;
+ Loc.bp = Ploc.first_pos loc;
+ Loc.ep = Ploc.last_pos loc;
+ Loc.line_nb_last = Ploc.line_nb_last loc;
+ Loc.bol_pos_last = Ploc.bol_pos_last loc; }
+
+let (!@) = to_coqloc
(* Dictionaries: trees annotated with string options, each node being a map
from chars to dictionaries (the subtrees). A trie, in other words. *)
@@ -110,11 +120,52 @@ module Error = struct
end
open Error
-let err loc str = Loc.raise ~loc:(Compat.to_coqloc loc) (Error.E str)
+let err loc str = Loc.raise ~loc:(to_coqloc loc) (Error.E str)
let bad_token str = raise (Error.E (Bad_token str))
-(* Lexer conventions on tokens *)
+(** Location utilities *)
+let file_loc_of_file = function
+| None -> ""
+| Some f -> f
+
+let make_loc fname line_nb bol_pos bp ep =
+ Ploc.make_loc (file_loc_of_file fname) line_nb bol_pos (bp, ep) ""
+
+(* Update a loc without allocating an intermediate pair *)
+let set_loc_pos loc bp ep =
+ Ploc.sub loc (bp - Ploc.first_pos loc) (ep - bp)
+
+(* Increase line number by 1 and update position of beginning of line *)
+let bump_loc_line loc bol_pos =
+ Ploc.make_loc (Ploc.file_name loc) (Ploc.line_nb loc + 1) bol_pos
+ (Ploc.first_pos loc, Ploc.last_pos loc) (Ploc.comment loc)
+
+(* Same as [bump_loc_line], but for the last line in location *)
+(* For an obscure reason, camlp5 does not give an easy way to set line_nb_stop,
+ so we have to resort to a hack merging two locations. *)
+(* Warning: [bump_loc_line_last] changes the end position. You may need to call
+ [set_loc_pos] to fix it. *)
+let bump_loc_line_last loc bol_pos =
+ let loc' =
+ Ploc.make_loc (Ploc.file_name loc) (Ploc.line_nb_last loc + 1) bol_pos
+ (Ploc.first_pos loc + 1, Ploc.last_pos loc + 1) (Ploc.comment loc)
+ in
+ Ploc.encl loc loc'
+
+let set_loc_file loc fname =
+ Ploc.make_loc fname (Ploc.line_nb loc) (Ploc.bol_pos loc)
+ (Ploc.first_pos loc, Ploc.last_pos loc) (Ploc.comment loc)
+
+(* For some reason, the [Ploc.after] function of Camlp5 does not update line
+ numbers, so we define our own function that does it. *)
+let after loc =
+ let line_nb = Ploc.line_nb_last loc in
+ let bol_pos = Ploc.bol_pos_last loc in
+ Ploc.make_loc (Ploc.file_name loc) line_nb bol_pos
+ (Ploc.last_pos loc, Ploc.last_pos loc) (Ploc.comment loc)
+
+(** Lexer conventions on tokens *)
type token_kind =
| Utf8Token of (Unicode.status * int)
@@ -186,7 +237,7 @@ let check_keyword str =
let rec loop_symb = parser
| [< ' (' ' | '\n' | '\r' | '\t' | '"') >] -> bad_token str
| [< s >] ->
- match unlocated lookup_utf8 Compat.CompatLoc.ghost s with
+ match unlocated lookup_utf8 Ploc.dummy s with
| Utf8Token (_,n) -> njunk n s; loop_symb s
| AsciiChar -> Stream.junk s; loop_symb s
| EmptyStream -> ()
@@ -200,7 +251,7 @@ let check_ident str =
| [< ' ('0'..'9' | ''') when intail; s >] ->
loop_id true s
| [< s >] ->
- match unlocated lookup_utf8 Compat.CompatLoc.ghost s with
+ match unlocated lookup_utf8 Ploc.dummy s with
| Utf8Token (Unicode.Letter, n) -> njunk n s; loop_id true s
| Utf8Token (Unicode.IdentPart, n) when intail ->
njunk n s;
@@ -233,10 +284,10 @@ let remove_keyword str =
let keywords () = ttree_elements !token_tree
(* Freeze and unfreeze the state of the lexer *)
-type frozen_t = ttree
+type keyword_state = ttree
-let freeze () = !token_tree
-let unfreeze tt = (token_tree := tt)
+let get_keyword_state () = !token_tree
+let set_keyword_state tt = (token_tree := tt)
(* The string buffering machinery *)
@@ -621,8 +672,6 @@ let loct_add loct i loc = Hashtbl.add loct i loc
we unfreeze the state of the lexer. This restores the behaviour of the
lexer. B.B. *)
-IFDEF CAMLP5 THEN
-
type te = Tok.t
(** Names of tokens, for this lexer, used in Grammar error messages *)
@@ -640,12 +689,12 @@ let token_text = function
let func cs =
let loct = loct_create () in
- let cur_loc = ref (Compat.make_loc !current_file 1 0 0 0) in
+ let cur_loc = ref (make_loc !current_file 1 0 0 0) in
let ts =
Stream.from
(fun i ->
let (tok, loc) = next_token !cur_loc cs in
- cur_loc := Compat.after loc;
+ cur_loc := after loc;
loct_add loct i loc; Some tok)
in
(ts, loct_func loct)
@@ -661,41 +710,6 @@ let lexer = {
Token.tok_comm = None;
Token.tok_text = token_text }
-ELSE (* official camlp4 for ocaml >= 3.10 *)
-
-module M_ = Camlp4.ErrorHandler.Register (Error)
-
-module Loc = Compat.CompatLoc
-module Token = struct
- include Tok (* Cf. tok.ml *)
- module Loc = Compat.CompatLoc
- module Error = Camlp4.Struct.EmptyError
- module Filter = struct
- type token_filter = (Tok.t * Loc.t) Stream.t -> (Tok.t * Loc.t) Stream.t
- type t = unit
- let mk _is_kwd = ()
- let keyword_added () kwd _ = add_keyword kwd
- let keyword_removed () _ = ()
- let filter () x = x
- let define_filter () _ = ()
- end
-end
-
-let mk () =
- let loct = loct_create () in
- let cur_loc = ref (Compat.make_loc !current_file 1 0 0 0) in
- let rec self init_loc (* FIXME *) =
- parser i
- [< (tok, loc) = next_token !cur_loc; s >] ->
- cur_loc := Compat.set_loc_file loc !current_file;
- loct_add loct i loc;
- [< '(tok, loc); self init_loc s >]
- | [< >] -> [< >]
- in
- self
-
-END
-
(** Terminal symbols interpretation *)
let is_ident_not_keyword s =