diff options
author | Emilio Jesus Gallego Arias <e+git@x80.org> | 2017-03-08 03:22:22 +0100 |
---|---|---|
committer | Emilio Jesus Gallego Arias <e+git@x80.org> | 2017-04-07 02:55:41 +0200 |
commit | 1d0eb5d4d6fea88abc29798ee2004b2e27e952c6 (patch) | |
tree | 24b4e369c4acbe2bb9c9ca79b84fc7ddff34e2d8 /parsing/cLexer.ml4 | |
parent | fee2365f13900b4d4f4b88c986cbbf94403eeefa (diff) |
[camlpX] Remove camlp4 compat layer.
We remove the camlp4 compatibility layer, and try to clean up
most structures. `parsing/compat` is gone.
We added some documentation to the lexer/parser interfaces that are
often obscured by module includes.
Diffstat (limited to 'parsing/cLexer.ml4')
-rw-r--r-- | parsing/cLexer.ml4 | 108 |
1 files changed, 61 insertions, 47 deletions
diff --git a/parsing/cLexer.ml4 b/parsing/cLexer.ml4 index 3b84eaa81..6d259e1b1 100644 --- a/parsing/cLexer.ml4 +++ b/parsing/cLexer.ml4 @@ -9,7 +9,17 @@ open Pp open Util open Tok -open Compat + +let to_coqloc loc = + { Loc.fname = Ploc.file_name loc; + Loc.line_nb = Ploc.line_nb loc; + Loc.bol_pos = Ploc.bol_pos loc; + Loc.bp = Ploc.first_pos loc; + Loc.ep = Ploc.last_pos loc; + Loc.line_nb_last = Ploc.line_nb_last loc; + Loc.bol_pos_last = Ploc.bol_pos_last loc; } + +let (!@) = to_coqloc (* Dictionaries: trees annotated with string options, each node being a map from chars to dictionaries (the subtrees). A trie, in other words. *) @@ -110,11 +120,52 @@ module Error = struct end open Error -let err loc str = Loc.raise ~loc:(Compat.to_coqloc loc) (Error.E str) +let err loc str = Loc.raise ~loc:(to_coqloc loc) (Error.E str) let bad_token str = raise (Error.E (Bad_token str)) -(* Lexer conventions on tokens *) +(** Location utilities *) +let file_loc_of_file = function +| None -> "" +| Some f -> f + +let make_loc fname line_nb bol_pos bp ep = + Ploc.make_loc (file_loc_of_file fname) line_nb bol_pos (bp, ep) "" + +(* Update a loc without allocating an intermediate pair *) +let set_loc_pos loc bp ep = + Ploc.sub loc (bp - Ploc.first_pos loc) (ep - bp) + +(* Increase line number by 1 and update position of beginning of line *) +let bump_loc_line loc bol_pos = + Ploc.make_loc (Ploc.file_name loc) (Ploc.line_nb loc + 1) bol_pos + (Ploc.first_pos loc, Ploc.last_pos loc) (Ploc.comment loc) + +(* Same as [bump_loc_line], but for the last line in location *) +(* For an obscure reason, camlp5 does not give an easy way to set line_nb_stop, + so we have to resort to a hack merging two locations. *) +(* Warning: [bump_loc_line_last] changes the end position. You may need to call + [set_loc_pos] to fix it. *) +let bump_loc_line_last loc bol_pos = + let loc' = + Ploc.make_loc (Ploc.file_name loc) (Ploc.line_nb_last loc + 1) bol_pos + (Ploc.first_pos loc + 1, Ploc.last_pos loc + 1) (Ploc.comment loc) + in + Ploc.encl loc loc' + +let set_loc_file loc fname = + Ploc.make_loc fname (Ploc.line_nb loc) (Ploc.bol_pos loc) + (Ploc.first_pos loc, Ploc.last_pos loc) (Ploc.comment loc) + +(* For some reason, the [Ploc.after] function of Camlp5 does not update line + numbers, so we define our own function that does it. *) +let after loc = + let line_nb = Ploc.line_nb_last loc in + let bol_pos = Ploc.bol_pos_last loc in + Ploc.make_loc (Ploc.file_name loc) line_nb bol_pos + (Ploc.last_pos loc, Ploc.last_pos loc) (Ploc.comment loc) + +(** Lexer conventions on tokens *) type token_kind = | Utf8Token of (Unicode.status * int) @@ -186,7 +237,7 @@ let check_keyword str = let rec loop_symb = parser | [< ' (' ' | '\n' | '\r' | '\t' | '"') >] -> bad_token str | [< s >] -> - match unlocated lookup_utf8 Compat.CompatLoc.ghost s with + match unlocated lookup_utf8 Ploc.dummy s with | Utf8Token (_,n) -> njunk n s; loop_symb s | AsciiChar -> Stream.junk s; loop_symb s | EmptyStream -> () @@ -200,7 +251,7 @@ let check_ident str = | [< ' ('0'..'9' | ''') when intail; s >] -> loop_id true s | [< s >] -> - match unlocated lookup_utf8 Compat.CompatLoc.ghost s with + match unlocated lookup_utf8 Ploc.dummy s with | Utf8Token (Unicode.Letter, n) -> njunk n s; loop_id true s | Utf8Token (Unicode.IdentPart, n) when intail -> njunk n s; @@ -233,10 +284,10 @@ let remove_keyword str = let keywords () = ttree_elements !token_tree (* Freeze and unfreeze the state of the lexer *) -type frozen_t = ttree +type keyword_state = ttree -let freeze () = !token_tree -let unfreeze tt = (token_tree := tt) +let get_keyword_state () = !token_tree +let set_keyword_state tt = (token_tree := tt) (* The string buffering machinery *) @@ -621,8 +672,6 @@ let loct_add loct i loc = Hashtbl.add loct i loc we unfreeze the state of the lexer. This restores the behaviour of the lexer. B.B. *) -IFDEF CAMLP5 THEN - type te = Tok.t (** Names of tokens, for this lexer, used in Grammar error messages *) @@ -640,12 +689,12 @@ let token_text = function let func cs = let loct = loct_create () in - let cur_loc = ref (Compat.make_loc !current_file 1 0 0 0) in + let cur_loc = ref (make_loc !current_file 1 0 0 0) in let ts = Stream.from (fun i -> let (tok, loc) = next_token !cur_loc cs in - cur_loc := Compat.after loc; + cur_loc := after loc; loct_add loct i loc; Some tok) in (ts, loct_func loct) @@ -661,41 +710,6 @@ let lexer = { Token.tok_comm = None; Token.tok_text = token_text } -ELSE (* official camlp4 for ocaml >= 3.10 *) - -module M_ = Camlp4.ErrorHandler.Register (Error) - -module Loc = Compat.CompatLoc -module Token = struct - include Tok (* Cf. tok.ml *) - module Loc = Compat.CompatLoc - module Error = Camlp4.Struct.EmptyError - module Filter = struct - type token_filter = (Tok.t * Loc.t) Stream.t -> (Tok.t * Loc.t) Stream.t - type t = unit - let mk _is_kwd = () - let keyword_added () kwd _ = add_keyword kwd - let keyword_removed () _ = () - let filter () x = x - let define_filter () _ = () - end -end - -let mk () = - let loct = loct_create () in - let cur_loc = ref (Compat.make_loc !current_file 1 0 0 0) in - let rec self init_loc (* FIXME *) = - parser i - [< (tok, loc) = next_token !cur_loc; s >] -> - cur_loc := Compat.set_loc_file loc !current_file; - loct_add loct i loc; - [< '(tok, loc); self init_loc s >] - | [< >] -> [< >] - in - self - -END - (** Terminal symbols interpretation *) let is_ident_not_keyword s = |