aboutsummaryrefslogtreecommitdiffhomepage
path: root/parsing/lexer.ml4
diff options
context:
space:
mode:
Diffstat (limited to 'parsing/lexer.ml4')
-rw-r--r--parsing/lexer.ml4147
1 files changed, 85 insertions, 62 deletions
diff --git a/parsing/lexer.ml4 b/parsing/lexer.ml4
index 175ce16a4..ba5a8c425 100644
--- a/parsing/lexer.ml4
+++ b/parsing/lexer.ml4
@@ -8,7 +8,7 @@
open Pp
open Util
-open Token
+open Compat
open Tok
(* Dictionaries: trees annotated with string options, each node being a map
@@ -71,18 +71,34 @@ let ttree_remove ttree str =
(* Errors occuring while lexing (explained as "Lexer error: ...") *)
-type error =
- | Illegal_character
- | Unterminated_comment
- | Unterminated_string
- | Undefined_token
- | Bad_token of string
+module Error = struct
-exception Error of error
+ type t =
+ | Illegal_character
+ | Unterminated_comment
+ | Unterminated_string
+ | Undefined_token
+ | Bad_token of string
-let err loc str = Stdpp.raise_with_loc (Util.make_loc loc) (Error str)
+ exception E of t
-let bad_token str = raise (Error (Bad_token str))
+ let to_string x =
+ "Syntax Error: Lexer: " ^
+ (match x with
+ | Illegal_character -> "Illegal character"
+ | Unterminated_comment -> "Unterminated comment"
+ | Unterminated_string -> "Unterminated string"
+ | Undefined_token -> "Undefined token"
+ | Bad_token tok -> Format.sprintf "Bad token %S" tok)
+
+ let print ppf x = Format.fprintf ppf "%s@." (to_string x)
+
+end
+open Error
+
+let err loc str = Loc.raise (make_loc loc) (Error.E str)
+
+let bad_token str = raise (Error.E (Bad_token str))
(* Lexer conventions on tokens *)
@@ -175,7 +191,7 @@ let check_ident str =
let check_keyword str =
try check_special_token str
- with Error _ -> check_ident str
+ with Error.E _ -> check_ident str
(* Keyword and symbol dictionary *)
let token_tree = ref empty_ttree
@@ -379,7 +395,7 @@ let find_keyword id s =
let tt = ttree_find !token_tree id in
match progress_further tt.node 0 tt s with
| None -> raise Not_found
- | Some c -> c
+ | Some c -> KEYWORD c
(* Must be a special token *)
let process_chars bp c cs =
@@ -438,7 +454,7 @@ let rec next_token = parser bp
len = ident_tail (store 0 c); s >] ep ->
let id = get_buff len in
comment_stop bp;
- (try KEYWORD (find_keyword id s) with Not_found -> IDENT id), (bp, ep)
+ (try find_keyword id s with Not_found -> IDENT id), (bp, ep)
| [< ' ('0'..'9' as c); len = number (store 0 c) >] ep ->
comment_stop bp;
(INT (get_buff len), (bp, ep))
@@ -461,7 +477,7 @@ let rec next_token = parser bp
let id = get_buff len in
let ep = Stream.count s in
comment_stop bp;
- (try KEYWORD (find_keyword id s) with Not_found -> IDENT id), (bp, ep)
+ (try find_keyword id s with Not_found -> IDENT id), (bp, ep)
| AsciiChar | Utf8Token ((UnicodeSymbol | UnicodeIdentPart), _) ->
let t = process_chars bp (Stream.next s) s in
comment_stop bp; t
@@ -473,53 +489,23 @@ let rec next_token = parser bp
let locerr () = invalid_arg "Lexer: location function"
-let tsz = 256 (* up to 2^29 entries on a 32-bit machine, 2^61 on 64-bit *)
-
-let loct_create () = ref [| [| |] |]
+let loct_create () = Hashtbl.create 207
let loct_func loct i =
- match
- if i < 0 || i/tsz >= Array.length !loct then None
- else if !loct.(i/tsz) = [| |] then None
- else !loct.(i/tsz).(i mod tsz)
- with
- | Some loc -> Util.make_loc loc
- | _ -> locerr ()
-
-let loct_add loct i loc =
- while i/tsz >= Array.length !loct do
- let new_tmax = Array.length !loct * 2 in
- let new_loct = Array.make new_tmax [| |] in
- Array.blit !loct 0 new_loct 0 (Array.length !loct);
- loct := new_loct;
- done;
- if !loct.(i/tsz) = [| |] then !loct.(i/tsz) <- Array.make tsz None;
- !loct.(i/tsz).(i mod tsz) <- Some loc
-
-let current_location_table = ref (ref [| [| |] |])
-
-let location_function n =
- loct_func !current_location_table n
+ try Hashtbl.find loct i with Not_found -> locerr ()
-let func cs =
- let loct = loct_create () in
- let ts =
- Stream.from
- (fun i ->
- let (tok, loc) = next_token cs in
- loct_add loct i loc; Some tok)
- in
- current_location_table := loct;
- (ts, loct_func loct)
+let loct_add loct i loc = Hashtbl.add loct i loc
-type location_table = (int * int) option array array ref
+let current_location_table = ref (loct_create ())
+
+type location_table = (int, loc) Hashtbl.t
let location_table () = !current_location_table
let restore_location_table t = current_location_table := t
+let location_function n = loct_func !current_location_table n
+(** {6 The lexer of Coq} *)
-(* The lexer of Coq *)
-
-(* Note: removing a token.
+(** Note: removing a token.
We do nothing because [remove_token] is called only when removing a grammar
rule with [Grammar.delete_rule]. The latter command is called only when
unfreezing the state of the grammar entries (see GRAMMAR summary, file
@@ -529,7 +515,9 @@ let restore_location_table t = current_location_table := t
IFDEF CAMLP5 THEN
-(* Names of tokens, for this lexer, used in Grammar error messages *)
+type te = Tok.t
+
+(** Names of tokens, for this lexer, used in Grammar error messages *)
let token_text = function
| ("", t) -> "'" ^ t ^ "'"
@@ -542,15 +530,23 @@ let token_text = function
| (con, "") -> con
| (con, prm) -> con ^ " \"" ^ prm ^ "\""
-(* Adding a new token (keyword or special token). *)
-
-let add_token pat = match Tok.of_pattern pat with
- | KEYWORD s -> add_keyword s
- | _ -> ()
+let func cs =
+ let loct = loct_create () in
+ let ts =
+ Stream.from
+ (fun i ->
+ let (tok, loc) = next_token cs in
+ loct_add loct i (make_loc loc); Some tok)
+ in
+ current_location_table := loct;
+ (ts, loct_func loct)
let lexer = {
Token.tok_func = func;
- Token.tok_using = add_token;
+ Token.tok_using =
+ (fun pat -> match Tok.of_pattern pat with
+ | KEYWORD s -> add_keyword s
+ | _ -> ());
Token.tok_removing = (fun _ -> ());
Token.tok_match = Tok.match_pattern;
Token.tok_comm = None;
@@ -558,11 +554,38 @@ let lexer = {
ELSE (* official camlp4 for ocaml >= 3.10 *)
-TODO
+module M_ = Camlp4.ErrorHandler.Register (Error)
+
+module Loc = Loc
+module Token = struct
+ include Tok (* Cf. tok.ml *)
+ module Loc = Loc
+ module Error = Camlp4.Struct.EmptyError
+ module Filter = struct
+ type token_filter = (Tok.t * Loc.t) Stream.t -> (Tok.t * Loc.t) Stream.t
+ type t = unit
+ let mk _is_kwd = ()
+ let keyword_added () kwd _ = add_keyword kwd
+ let keyword_removed () _ = ()
+ let filter () x = x
+ let define_filter () _ = ()
+ end
+end
+
+let mk () _init_loc(*FIXME*) cs =
+ let loct = loct_create () in
+ let rec self =
+ parser i
+ [< (tok, loc) = next_token; s >] ->
+ let loc = make_loc loc in
+ loct_add loct i loc;
+ [< '(tok, loc); self s >]
+ | [< >] -> [< >]
+ in current_location_table := loct; self cs
END
-(* Terminal symbols interpretation *)
+(** Terminal symbols interpretation *)
let is_ident_not_keyword s =
match s.[0] with