aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar letouzey <letouzey@85f007b7-540e-0410-9357-904b9bb8a0f7>2010-05-19 15:29:44 +0000
committerGravatar letouzey <letouzey@85f007b7-540e-0410-9357-904b9bb8a0f7>2010-05-19 15:29:44 +0000
commit259dde7928696593c2d3c6de474f5cf50fa4417d (patch)
tree7fe225a0731c13b30cb10ae7098e096f38903366
parente1feff1215562d8f99fedf73c87011e6d7edca19 (diff)
Nicer representation of tokens, more independant of camlp*
Cf tok.ml, token isn't anymore string*string where first string encodes the kind of the token, but rather a nice sum type. Unfortunately, string*string (a.k.a Plexing.pattern) is still used in some places of Camlp5, so there's a few conversions back and forth. But the penalty should be quite low, and having nicer tokens helps in the forthcoming integration of support for camlp4 post 3.10 git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/coq/trunk@13018 85f007b7-540e-0410-9357-904b9bb8a0f7
-rw-r--r--dev/printers.mllib1
-rw-r--r--interp/interp.mllib1
-rw-r--r--lib/compat.ml43
-rw-r--r--lib/util.ml9
-rw-r--r--lib/util.mli7
-rw-r--r--parsing/argextend.ml44
-rw-r--r--parsing/egrammar.ml6
-rw-r--r--parsing/egrammar.mli2
-rw-r--r--parsing/extend.ml2
-rw-r--r--parsing/extend.mli2
-rw-r--r--parsing/g_constr.ml473
-rw-r--r--parsing/g_prim.ml42
-rw-r--r--parsing/g_tactic.ml4109
-rw-r--r--parsing/g_vernac.ml42
-rw-r--r--parsing/grammar.mllib1
-rw-r--r--parsing/lexer.ml4108
-rw-r--r--parsing/lexer.mli4
-rw-r--r--parsing/pcoq.ml415
-rw-r--r--parsing/pcoq.mli27
-rw-r--r--parsing/tok.ml90
-rw-r--r--parsing/tok.mli29
-rw-r--r--toplevel/metasyntax.ml15
-rw-r--r--toplevel/toplevel.ml4
23 files changed, 324 insertions, 192 deletions
diff --git a/dev/printers.mllib b/dev/printers.mllib
index 8ada6769c..d060440b3 100644
--- a/dev/printers.mllib
+++ b/dev/printers.mllib
@@ -93,6 +93,7 @@ Unification
Cases
Pretyping
+Tok
Lexer
Ppextend
Genarg
diff --git a/interp/interp.mllib b/interp/interp.mllib
index 3825f3d87..08a935a44 100644
--- a/interp/interp.mllib
+++ b/interp/interp.mllib
@@ -1,3 +1,4 @@
+Tok
Lexer
Topconstr
Ppextend
diff --git a/lib/compat.ml4 b/lib/compat.ml4
index 5ed8d7a54..c37758152 100644
--- a/lib/compat.ml4
+++ b/lib/compat.ml4
@@ -17,8 +17,7 @@ let unloc loc = Stdpp.first_pos loc, Stdpp.last_pos loc
let join_loc loc1 loc2 =
if loc1 = dummy_loc or loc2 = dummy_loc then dummy_loc
else Stdpp.encl_loc loc1 loc2
-type token = string*string
-type lexer = token Token.glexer
+type lexer = Tok.t Token.glexer
ELSE (* official camlp4 of ocaml >= 3.10 *)
diff --git a/lib/util.ml b/lib/util.ml
index a70278d57..d08727d27 100644
--- a/lib/util.ml
+++ b/lib/util.ml
@@ -1115,6 +1115,15 @@ let array_rev_to_list a =
if i >= Array.length a then res else tolist (i+1) (a.(i) :: res) in
tolist 0 []
+(* Stream *)
+
+let stream_nth n st =
+ try List.nth (Stream.npeek (n+1) st) n
+ with Failure _ -> raise Stream.Failure
+
+let stream_njunk n st =
+ for i = 1 to n do Stream.junk st done
+
(* Matrices *)
let matrix_transpose mat =
diff --git a/lib/util.mli b/lib/util.mli
index f7f40b805..9cef982fb 100644
--- a/lib/util.mli
+++ b/lib/util.mli
@@ -269,7 +269,12 @@ val array_distinct : 'a array -> bool
val array_union_map : ('a -> 'b -> 'b) -> 'a array -> 'b -> 'b
val array_rev_to_list : 'a array -> 'a list
-(** {6 Matrices } *)
+(** {6 Streams. } *)
+
+val stream_nth : int -> 'a Stream.t -> 'a
+val stream_njunk : int -> 'a Stream.t -> unit
+
+(** {6 Matrices. } *)
val matrix_transpose : 'a list list -> 'a list list
diff --git a/parsing/argextend.ml4 b/parsing/argextend.ml4
index 72b9d0a50..fd4952a52 100644
--- a/parsing/argextend.ml4
+++ b/parsing/argextend.ml4
@@ -105,7 +105,7 @@ let make_act loc act pil =
make (List.rev pil)
let make_prod_item = function
- | GramTerminal s -> <:expr< (Gramext.Stoken (Lexer.terminal $str:s$)) >>
+ | GramTerminal s -> <:expr< gram_token_of_string $str:s$ >>
| GramNonTerminal (_,_,g,_) ->
<:expr< Pcoq.symbol_of_prod_entry_key $mlexpr_of_prod_entry_key g$ >>
@@ -249,7 +249,7 @@ EXTEND
GramNonTerminal (loc, t, g, Some (Names.id_of_string s))
| s = STRING ->
if String.length s > 0 && Util.is_letter s.[0] then
- Lexer.add_token ("", s);
+ Lexer.add_keyword s;
GramTerminal s
] ]
;
diff --git a/parsing/egrammar.ml b/parsing/egrammar.ml
index 337ddd7ed..a07c52e84 100644
--- a/parsing/egrammar.ml
+++ b/parsing/egrammar.ml
@@ -55,7 +55,7 @@ let cases_pattern_expr_of_name (loc,na) = match na with
| Name id -> CPatAtom (loc,Some (Ident (loc,id)))
type grammar_constr_prod_item =
- | GramConstrTerminal of Token.pattern
+ | GramConstrTerminal of Tok.t
| GramConstrNonTerminal of constr_prod_entry_key * identifier option
| GramConstrListMark of int * bool
(* tells action rule to make a list of the n previous parsed items;
@@ -133,7 +133,7 @@ let make_cases_pattern_action
let rec make_constr_prod_item assoc from forpat = function
| GramConstrTerminal tok :: l ->
- Gramext.Stoken tok :: make_constr_prod_item assoc from forpat l
+ gram_token_of_token tok :: make_constr_prod_item assoc from forpat l
| GramConstrNonTerminal (nt, ovar) :: l ->
symbol_of_constr_prod_entry_key assoc from forpat nt
:: make_constr_prod_item assoc from forpat l
@@ -203,7 +203,7 @@ type grammar_prod_item =
loc * argument_type * Gram.te prod_entry_key * identifier option
let make_prod_item = function
- | GramTerminal s -> (Gramext.Stoken (Lexer.terminal s), None)
+ | GramTerminal s -> (gram_token_of_string s, None)
| GramNonTerminal (_,t,e,po) ->
(symbol_of_prod_entry_key e, Option.map (fun p -> (p,t)) po)
diff --git a/parsing/egrammar.mli b/parsing/egrammar.mli
index 1e9159933..874aed570 100644
--- a/parsing/egrammar.mli
+++ b/parsing/egrammar.mli
@@ -25,7 +25,7 @@ open Mod_subst
(** For constr notations *)
type grammar_constr_prod_item =
- | GramConstrTerminal of Token.pattern
+ | GramConstrTerminal of Tok.t
| GramConstrNonTerminal of constr_prod_entry_key * identifier option
| GramConstrListMark of int * bool
(* tells action rule to make a list of the n previous parsed items;
diff --git a/parsing/extend.ml b/parsing/extend.ml
index a3ef4893a..cc3551d32 100644
--- a/parsing/extend.ml
+++ b/parsing/extend.ml
@@ -46,7 +46,7 @@ type ('lev,'pos) constr_entry_key_gen =
| ETConstr of ('lev * 'pos)
| ETPattern
| ETOther of string * string
- | ETConstrList of ('lev * 'pos) * Token.pattern list
+ | ETConstrList of ('lev * 'pos) * Tok.t list
(* Entries level (left-hand-side of grammar rules) *)
type constr_entry_key =
diff --git a/parsing/extend.mli b/parsing/extend.mli
index 5ff475f79..99b83785c 100644
--- a/parsing/extend.mli
+++ b/parsing/extend.mli
@@ -46,7 +46,7 @@ type ('lev,'pos) constr_entry_key_gen =
| ETConstr of ('lev * 'pos)
| ETPattern
| ETOther of string * string
- | ETConstrList of ('lev * 'pos) * Token.pattern list
+ | ETConstrList of ('lev * 'pos) * Tok.t list
(* Entries level (left-hand-side of grammar rules) *)
type constr_entry_key =
diff --git a/parsing/g_constr.ml4 b/parsing/g_constr.ml4
index 12ed0c4b6..9ce65226d 100644
--- a/parsing/g_constr.ml4
+++ b/parsing/g_constr.ml4
@@ -15,8 +15,8 @@ open Term
open Names
open Libnames
open Topconstr
-
open Util
+open Tok
let constr_kw =
[ "forall"; "fun"; "match"; "fix"; "cofix"; "with"; "in"; "for";
@@ -24,7 +24,7 @@ let constr_kw =
"Prop"; "Set"; "Type"; ".("; "_"; "..";
"`{"; "`("; "{|"; "|}" ]
-let _ = List.iter (fun s -> Lexer.add_token("",s)) constr_kw
+let _ = List.iter Lexer.add_keyword constr_kw
let mk_cast = function
(c,(_,None)) -> c
@@ -67,60 +67,61 @@ let mk_fix(loc,kw,id,dcls) =
let mk_single_fix (loc,kw,dcl) =
let (id,_,_,_,_) = dcl in mk_fix(loc,kw,id,[dcl])
+let err () = raise Stream.Failure
+
(* Hack to parse "(x:=t)" as an explicit argument without conflicts with the *)
(* admissible notation "(x t)" *)
let lpar_id_coloneq =
Gram.Entry.of_parser "test_lpar_id_coloneq"
(fun strm ->
- match Stream.npeek 1 strm with
- | [("","(")] ->
- (match Stream.npeek 2 strm with
- | [_; ("IDENT",s)] ->
- (match Stream.npeek 3 strm with
- | [_; _; ("", ":=")] ->
- Stream.junk strm; Stream.junk strm; Stream.junk strm;
+ match stream_nth 0 strm with
+ | KEYWORD "(" ->
+ (match stream_nth 1 strm with
+ | IDENT s ->
+ (match stream_nth 2 strm with
+ | KEYWORD ":=" ->
+ stream_njunk 3 strm;
Names.id_of_string s
- | _ -> raise Stream.Failure)
- | _ -> raise Stream.Failure)
- | _ -> raise Stream.Failure)
+ | _ -> err ())
+ | _ -> err ())
+ | _ -> err ())
let impl_ident =
Gram.Entry.of_parser "impl_ident"
(fun strm ->
- match Stream.npeek 1 strm with
- | [(_,"{")] ->
- (match Stream.npeek 2 strm with
- | [_;("IDENT",("wf"|"struct"|"measure"))] ->
- raise Stream.Failure
- | [_;("IDENT",s)] ->
- Stream.junk strm; Stream.junk strm;
+ match stream_nth 0 strm with
+ | KEYWORD "{" ->
+ (match stream_nth 1 strm with
+ | IDENT ("wf"|"struct"|"measure") -> err ()
+ | IDENT s ->
+ stream_njunk 2 strm;
Names.id_of_string s
- | _ -> raise Stream.Failure)
- | _ -> raise Stream.Failure)
+ | _ -> err ())
+ | _ -> err ())
let ident_colon =
Gram.Entry.of_parser "ident_colon"
(fun strm ->
- match Stream.npeek 1 strm with
- | [("IDENT",s)] ->
- (match Stream.npeek 2 strm with
- | [_; ("", ":")] ->
- Stream.junk strm; Stream.junk strm;
+ match stream_nth 0 strm with
+ | IDENT s ->
+ (match stream_nth 1 strm with
+ | KEYWORD ":" ->
+ stream_njunk 2 strm;
Names.id_of_string s
- | _ -> raise Stream.Failure)
- | _ -> raise Stream.Failure)
+ | _ -> err ())
+ | _ -> err ())
let ident_with =
Gram.Entry.of_parser "ident_with"
(fun strm ->
- match Stream.npeek 1 strm with
- | [("IDENT",s)] ->
- (match Stream.npeek 2 strm with
- | [_; ("", "with")] ->
- Stream.junk strm; Stream.junk strm;
- Names.id_of_string s
- | _ -> raise Stream.Failure)
- | _ -> raise Stream.Failure)
+ match stream_nth 0 strm with
+ | IDENT s ->
+ (match stream_nth 1 strm with
+ | KEYWORD "with" ->
+ stream_njunk 2 strm;
+ Names.id_of_string s
+ | _ -> err ())
+ | _ -> err ())
let aliasvar = function CPatAlias (_, _, id) -> Some (Name id) | _ -> None
diff --git a/parsing/g_prim.ml4 b/parsing/g_prim.ml4
index 5e6e0e1ed..b1a2fa54b 100644
--- a/parsing/g_prim.ml4
+++ b/parsing/g_prim.ml4
@@ -12,7 +12,7 @@ open Libnames
open Topconstr
let prim_kw = ["{"; "}"; "["; "]"; "("; ")"; "'"]
-let _ = List.iter (fun s -> Lexer.add_token("",s)) prim_kw
+let _ = List.iter Lexer.add_keyword prim_kw
open Prim
open Nametab
diff --git a/parsing/g_tactic.ml4 b/parsing/g_tactic.ml4
index c1400f0c4..b39ea5770 100644
--- a/parsing/g_tactic.ml4
+++ b/parsing/g_tactic.ml4
@@ -15,54 +15,57 @@ open Genarg
open Topconstr
open Libnames
open Termops
+open Tok
let all_with delta = make_red_flag [FBeta;FIota;FZeta;delta]
let tactic_kw = [ "->"; "<-" ; "by" ]
-let _ = List.iter (fun s -> Lexer.add_token("",s)) tactic_kw
+let _ = List.iter Lexer.add_keyword tactic_kw
+
+let err () = raise Stream.Failure
(* Hack to parse "(x:=t)" as an explicit argument without conflicts with the *)
(* admissible notation "(x t)" *)
let test_lpar_id_coloneq =
Gram.Entry.of_parser "lpar_id_coloneq"
(fun strm ->
- match Stream.npeek 1 strm with
- | [("","(")] ->
- (match Stream.npeek 2 strm with
- | [_; ("IDENT",s)] ->
- (match Stream.npeek 3 strm with
- | [_; _; ("", ":=")] -> ()
- | _ -> raise Stream.Failure)
- | _ -> raise Stream.Failure)
- | _ -> raise Stream.Failure)
+ match stream_nth 0 strm with
+ | KEYWORD "(" ->
+ (match stream_nth 1 strm with
+ | IDENT _ ->
+ (match stream_nth 2 strm with
+ | KEYWORD ":=" -> ()
+ | _ -> err ())
+ | _ -> err ())
+ | _ -> err ())
(* idem for (x:=t) and (1:=t) *)
let test_lpar_idnum_coloneq =
Gram.Entry.of_parser "test_lpar_idnum_coloneq"
(fun strm ->
- match Stream.npeek 1 strm with
- | [("","(")] ->
- (match Stream.npeek 2 strm with
- | [_; (("IDENT"|"INT"),_)] ->
- (match Stream.npeek 3 strm with
- | [_; _; ("", ":=")] -> ()
- | _ -> raise Stream.Failure)
- | _ -> raise Stream.Failure)
- | _ -> raise Stream.Failure)
+ match stream_nth 0 strm with
+ | KEYWORD "(" ->
+ (match stream_nth 1 strm with
+ | IDENT _ | INT _ ->
+ (match stream_nth 2 strm with
+ | KEYWORD ":=" -> ()
+ | _ -> err ())
+ | _ -> err ())
+ | _ -> err ())
(* idem for (x:t) *)
let test_lpar_id_colon =
Gram.Entry.of_parser "lpar_id_colon"
(fun strm ->
- match Stream.npeek 1 strm with
- | [("","(")] ->
- (match Stream.npeek 2 strm with
- | [_; ("IDENT",id)] ->
- (match Stream.npeek 3 strm with
- | [_; _; ("", ":")] -> ()
- | _ -> raise Stream.Failure)
- | _ -> raise Stream.Failure)
- | _ -> raise Stream.Failure)
+ match stream_nth 0 strm with
+ | KEYWORD "(" ->
+ (match stream_nth 1 strm with
+ | IDENT _ ->
+ (match stream_nth 2 strm with
+ | KEYWORD ":" -> ()
+ | _ -> err ())
+ | _ -> err ())
+ | _ -> err ())
(* idem for (x1..xn:t) [n^2 complexity but exceptional use] *)
let check_for_coloneq =
@@ -70,36 +73,36 @@ let check_for_coloneq =
(fun strm ->
let rec skip_to_rpar p n =
match list_last (Stream.npeek n strm) with
- | ("","(") -> skip_to_rpar (p+1) (n+1)
- | ("",")") -> if p=0 then n+1 else skip_to_rpar (p-1) (n+1)
- | ("",".") -> raise Stream.Failure
+ | KEYWORD "(" -> skip_to_rpar (p+1) (n+1)
+ | KEYWORD ")" -> if p=0 then n+1 else skip_to_rpar (p-1) (n+1)
+ | KEYWORD "." -> err ()
| _ -> skip_to_rpar p (n+1) in
let rec skip_names n =
match list_last (Stream.npeek n strm) with
- | ("IDENT",_) | ("","_") -> skip_names (n+1)
- | ("",":") -> skip_to_rpar 0 (n+1) (* skip a constr *)
- | _ -> raise Stream.Failure in
+ | IDENT _ | KEYWORD "_" -> skip_names (n+1)
+ | KEYWORD ":" -> skip_to_rpar 0 (n+1) (* skip a constr *)
+ | _ -> err () in
let rec skip_binders n =
match list_last (Stream.npeek n strm) with
- | ("","(") -> skip_binders (skip_names (n+1))
- | ("IDENT",_) | ("","_") -> skip_binders (n+1)
- | ("",":=") -> ()
- | _ -> raise Stream.Failure in
+ | KEYWORD "(" -> skip_binders (skip_names (n+1))
+ | IDENT _ | KEYWORD "_" -> skip_binders (n+1)
+ | KEYWORD ":=" -> ()
+ | _ -> err () in
match Stream.npeek 1 strm with
- | [("","(")] -> skip_binders 2
- | _ -> raise Stream.Failure)
+ | [KEYWORD "("] -> skip_binders 2
+ | _ -> err ())
let guess_lpar_ipat s strm =
- match Stream.npeek 1 strm with
- | [("","(")] ->
- (match Stream.npeek 2 strm with
- | [_; ("",("("|"["))] -> ()
- | [_; ("IDENT",_)] ->
- (match Stream.npeek 3 strm with
- | [_; _; ("", s')] when s = s' -> ()
- | _ -> raise Stream.Failure)
- | _ -> raise Stream.Failure)
- | _ -> raise Stream.Failure
+ match stream_nth 0 strm with
+ | KEYWORD "(" ->
+ (match stream_nth 1 strm with
+ | KEYWORD ("("|"[") -> ()
+ | IDENT _ ->
+ (match stream_nth 2 strm with
+ | KEYWORD s' when s = s' -> ()
+ | _ -> err ())
+ | _ -> err ())
+ | _ -> err ()
let guess_lpar_coloneq =
Gram.Entry.of_parser "guess_lpar_coloneq" (guess_lpar_ipat ":=")
@@ -110,9 +113,9 @@ let guess_lpar_colon =
let lookup_at_as_coma =
Gram.Entry.of_parser "lookup_at_as_coma"
(fun strm ->
- match Stream.npeek 1 strm with
- | [("",(","|"at"|"as"))] -> ()
- | _ -> raise Stream.Failure)
+ match stream_nth 0 strm with
+ | KEYWORD (","|"at"|"as") -> ()
+ | _ -> err ())
open Constr
open Prim
diff --git a/parsing/g_vernac.ml4 b/parsing/g_vernac.ml4
index 8951cfa2f..e213660d9 100644
--- a/parsing/g_vernac.ml4
+++ b/parsing/g_vernac.ml4
@@ -28,7 +28,7 @@ open Vernac_
open Module
let vernac_kw = [ ";"; ","; ">->"; ":<"; "<:"; "where"; "at" ]
-let _ = List.iter (fun s -> Lexer.add_token ("",s)) vernac_kw
+let _ = List.iter Lexer.add_keyword vernac_kw
(* Rem: do not join the different GEXTEND into one, it breaks native *)
(* compilation on PowerPC and Sun architectures *)
diff --git a/parsing/grammar.mllib b/parsing/grammar.mllib
index 483538da6..98905d3c0 100644
--- a/parsing/grammar.mllib
+++ b/parsing/grammar.mllib
@@ -66,6 +66,7 @@ Topconstr
Genarg
Ppextend
Tacexpr
+Tok
Lexer
Extend
Vernacexpr
diff --git a/parsing/lexer.ml4 b/parsing/lexer.ml4
index 1db651033..175ce16a4 100644
--- a/parsing/lexer.ml4
+++ b/parsing/lexer.ml4
@@ -9,6 +9,7 @@
open Pp
open Util
open Token
+open Tok
(* Dictionaries: trees annotated with string options, each node being a map
from chars to dictionaries (the subtrees). A trie, in other words. *)
@@ -190,16 +191,6 @@ let add_keyword str =
let remove_keyword str =
token_tree := ttree_remove !token_tree str
-(* Adding a new token (keyword or special token). *)
-let add_token (con, str) = match con with
- | "" -> add_keyword str
- | "METAIDENT" | "LEFTQMARK" | "IDENT" | "FIELD" | "INT" | "STRING" | "EOI"
- -> ()
- | _ ->
- raise (Token.Error ("\
-the constructor \"" ^ con ^ "\" is not recognized by Lexer"))
-
-
(* Freeze and unfreeze the state of the lexer *)
type frozen_t = ttree
@@ -395,54 +386,51 @@ let process_chars bp c cs =
let t = progress_from_byte None (-1) !token_tree cs c in
let ep = Stream.count cs in
match t with
- | Some t -> (("", t), (bp, ep))
+ | Some t -> (KEYWORD t, (bp, ep))
| None ->
let ep' = bp + utf8_char_size cs c in
njunk (ep' - ep) cs;
err (bp, ep') Undefined_token
-let parse_after_dollar bp =
- parser
- | [< ' ('a'..'z' | 'A'..'Z' | '_' as c); len = ident_tail (store 0 c) >] ->
- ("METAIDENT", get_buff len)
- | [< s >] ->
- match lookup_utf8 s with
- | Utf8Token (UnicodeLetter, n) ->
- ("METAIDENT", get_buff (ident_tail (nstore n 0 s) s))
- | AsciiChar | Utf8Token _ | EmptyStream -> fst (process_chars bp '$' s)
+let token_of_special c s = match c with
+ | '$' -> METAIDENT s
+ | '.' -> FIELD s
+ | _ -> assert false
-(* Parse what follows a dot *)
-let parse_after_dot bp c =
+(* Parse what follows a dot / a dollar *)
+
+let parse_after_special c bp =
parser
- | [< ' ('a'..'z' | 'A'..'Z' | '_' as c); len = ident_tail (store 0 c) >] ->
- ("FIELD", get_buff len)
+ | [< ' ('a'..'z' | 'A'..'Z' | '_' as d); len = ident_tail (store 0 d) >] ->
+ token_of_special c (get_buff len)
| [< s >] ->
match lookup_utf8 s with
| Utf8Token (UnicodeLetter, n) ->
- ("FIELD", get_buff (ident_tail (nstore n 0 s) s))
- | AsciiChar | Utf8Token _ | EmptyStream ->
- fst (process_chars bp c s)
+ token_of_special c (get_buff (ident_tail (nstore n 0 s) s))
+ | AsciiChar | Utf8Token _ | EmptyStream -> fst (process_chars bp c s)
(* Parse what follows a question mark *)
+
let parse_after_qmark bp s =
match Stream.peek s with
- |Some ('a'..'z' | 'A'..'Z' | '_') -> ("LEFTQMARK", "")
- |None -> ("","?")
+ | Some ('a'..'z' | 'A'..'Z' | '_') -> LEFTQMARK
+ | None -> KEYWORD "?"
| _ ->
match lookup_utf8 s with
- | Utf8Token (UnicodeLetter, _) -> ("LEFTQMARK", "")
+ | Utf8Token (UnicodeLetter, _) -> LEFTQMARK
| AsciiChar | Utf8Token _ | EmptyStream -> fst (process_chars bp '?' s)
(* Parse a token in a char stream *)
+
let rec next_token = parser bp
| [< '' ' | '\t' | '\n' |'\r' as c; s >] ->
comm_loc bp; push_char c; next_token s
- | [< ''$'; t = parse_after_dollar bp >] ep ->
+ | [< ''$' as c; t = parse_after_special c bp >] ep ->
comment_stop bp; (t, (ep, bp))
- | [< ''.' as c; t = parse_after_dot bp c >] ep ->
+ | [< ''.' as c; t = parse_after_special c bp >] ep ->
comment_stop bp;
- if Flags.do_beautify() & t=("",".") then between_com := true;
+ if Flags.do_beautify() && t=KEYWORD "." then between_com := true;
(t, (bp,ep))
| [< ''?'; s >] ep ->
let t = parse_after_qmark bp s in comment_stop bp; (t, (ep, bp))
@@ -450,13 +438,13 @@ let rec next_token = parser bp
len = ident_tail (store 0 c); s >] ep ->
let id = get_buff len in
comment_stop bp;
- (try ("", find_keyword id s) with Not_found -> ("IDENT", id)), (bp, ep)
+ (try KEYWORD (find_keyword id s) with Not_found -> IDENT id), (bp, ep)
| [< ' ('0'..'9' as c); len = number (store 0 c) >] ep ->
comment_stop bp;
- (("INT", get_buff len), (bp, ep))
+ (INT (get_buff len), (bp, ep))
| [< ''\"'; len = string false bp 0 >] ep ->
comment_stop bp;
- (("STRING", get_buff len), (bp, ep))
+ (STRING (get_buff len), (bp, ep))
| [< ' ('(' as c);
t = parser
| [< ''*'; s >] ->
@@ -473,12 +461,12 @@ let rec next_token = parser bp
let id = get_buff len in
let ep = Stream.count s in
comment_stop bp;
- (try ("",find_keyword id s) with Not_found -> ("IDENT",id)), (bp, ep)
+ (try KEYWORD (find_keyword id s) with Not_found -> IDENT id), (bp, ep)
| AsciiChar | Utf8Token ((UnicodeSymbol | UnicodeIdentPart), _) ->
let t = process_chars bp (Stream.next s) s in
comment_stop bp; t
| EmptyStream ->
- comment_stop bp; (("EOI", ""), (bp, bp + 1))
+ comment_stop bp; (EOI, (bp, bp + 1))
(* Location table system for creating tables associating a token count
to its location in a char stream (the source) *)
@@ -528,6 +516,19 @@ type location_table = (int * int) option array array ref
let location_table () = !current_location_table
let restore_location_table t = current_location_table := t
+
+(* The lexer of Coq *)
+
+(* Note: removing a token.
+ We do nothing because [remove_token] is called only when removing a grammar
+ rule with [Grammar.delete_rule]. The latter command is called only when
+ unfreezing the state of the grammar entries (see GRAMMAR summary, file
+ env/metasyntax.ml). Therefore, instead of removing tokens one by one,
+ we unfreeze the state of the lexer. This restores the behaviour of the
+ lexer. B.B. *)
+
+IFDEF CAMLP5 THEN
+
(* Names of tokens, for this lexer, used in Grammar error messages *)
let token_text = function
@@ -541,34 +542,23 @@ let token_text = function
| (con, "") -> con
| (con, prm) -> con ^ " \"" ^ prm ^ "\""
-(* The lexer of Coq *)
-
-(* Note: removing a token.
- We do nothing because [remove_token] is called only when removing a grammar
- rule with [Grammar.delete_rule]. The latter command is called only when
- unfreezing the state of the grammar entries (see GRAMMAR summary, file
- env/metasyntax.ml). Therefore, instead of removing tokens one by one,
- we unfreeze the state of the lexer. This restores the behaviour of the
- lexer. B.B. *)
+(* Adding a new token (keyword or special token). *)
-IFDEF CAMLP5 THEN
+let add_token pat = match Tok.of_pattern pat with
+ | KEYWORD s -> add_keyword s
+ | _ -> ()
let lexer = {
Token.tok_func = func;
Token.tok_using = add_token;
Token.tok_removing = (fun _ -> ());
- Token.tok_match = default_match;
+ Token.tok_match = Tok.match_pattern;
Token.tok_comm = None;
Token.tok_text = token_text }
-ELSE
+ELSE (* official camlp4 for ocaml >= 3.10 *)
-let lexer = {
- Token.func = func;
- Token.using = add_token;
- Token.removing = (fun _ -> ());
- Token.tparse = (fun _ -> None);
- Token.text = token_text }
+TODO
END
@@ -607,6 +597,6 @@ let strip s =
let terminal s =
let s = strip s in
if s = "" then failwith "empty token";
- if is_ident_not_keyword s then ("IDENT", s)
- else if is_number s then ("INT", s)
- else ("", s)
+ if is_ident_not_keyword s then IDENT s
+ else if is_number s then INT s
+ else KEYWORD s
diff --git a/parsing/lexer.mli b/parsing/lexer.mli
index 09492ceb1..7f7d36b74 100644
--- a/parsing/lexer.mli
+++ b/parsing/lexer.mli
@@ -18,7 +18,7 @@ type error =
exception Error of error
-val add_token : string * string -> unit
+val add_keyword : string -> unit
val remove_keyword : string -> unit
val is_keyword : string -> bool
@@ -43,7 +43,7 @@ val restore_com_state: com_state -> unit
val set_xml_output_comment : (string -> unit) -> unit
-val terminal : string -> string * string
+val terminal : string -> Tok.t
(** The lexer of Coq *)
diff --git a/parsing/pcoq.ml4 b/parsing/pcoq.ml4
index d12388382..7d78f5371 100644
--- a/parsing/pcoq.ml4
+++ b/parsing/pcoq.ml4
@@ -24,7 +24,7 @@ IFDEF CAMLP5 THEN
module L =
struct
- type te = string * string
+ type te = Tok.t
let lexer = Lexer.lexer
end
@@ -36,6 +36,9 @@ TODO
END
+let gram_token_of_token tok = Gramext.Stoken (Tok.to_pattern tok)
+let gram_token_of_string s = gram_token_of_token (Lexer.terminal s)
+
let grammar_delete e pos reinit rls =
List.iter
(fun (n,ass,lev) ->
@@ -99,7 +102,7 @@ end
open Gramtypes
type camlp4_rule =
- Compat.token Gramext.g_symbol list * Gramext.g_action
+ Tok.t Gramext.g_symbol list * Gramext.g_action
type camlp4_entry_rules =
(* first two parameters are name and assoc iff a level is created *)
@@ -611,7 +614,7 @@ let rec symbol_of_constr_prod_entry_key assoc from forpat typ =
Gramext.Slist1sep
(symbol_of_constr_prod_entry_key assoc from forpat (ETConstr typ'),
Gramext.srules
- [List.map (fun x -> Gramext.Stoken x) tkl,
+ [List.map gram_token_of_token tkl,
List.fold_right (fun _ v -> Gramext.action (fun _ -> v)) tkl
(Gramext.action (fun loc -> ()))])
| _ ->
@@ -635,9 +638,9 @@ let rec symbol_of_prod_entry_key = function
| Amodifiers s ->
Gramext.srules
[([], Gramext.action(fun _loc -> []));
- ([Gramext.Stoken ("", "(");
- Gramext.Slist1sep ((symbol_of_prod_entry_key s), Gramext.Stoken ("", ","));
- Gramext.Stoken ("", ")")],
+ ([gram_token_of_string "(";
+ Gramext.Slist1sep ((symbol_of_prod_entry_key s), gram_token_of_string ",");
+ gram_token_of_string ")"],
Gramext.action (fun _ l _ _loc -> l))]
| Aself -> Gramext.Sself
| Anext -> Gramext.Snext
diff --git a/parsing/pcoq.mli b/parsing/pcoq.mli
index 23d85d94a..ada016094 100644
--- a/parsing/pcoq.mli
+++ b/parsing/pcoq.mli
@@ -16,13 +16,13 @@ open Topconstr
open Tacexpr
open Libnames
-(*********************************************************************
- The parser of Coq *)
+(** The parser of Coq *)
-module Gram : Grammar.S with type te = Compat.token
+module Gram : Grammar.S with type te = Tok.t
-(*********************************************************************
+(**
The parser of Coq is built from three kinds of rule declarations:
+
- dynamic rules declared at the evaluation of Coq files (using
e.g. Notation, Infix, or Tactic Notation)
- static rules explicitly defined in files g_*.ml4
@@ -96,11 +96,14 @@ module Gram : Grammar.S with type te = Compat.token
*)
+val gram_token_of_token : Tok.t -> Tok.t Gramext.g_symbol
+val gram_token_of_string : string -> Tok.t Gramext.g_symbol
+
(** The superclass of all grammar entries *)
type grammar_object
type camlp4_rule =
- Compat.token Gramext.g_symbol list * Gramext.g_action
+ Tok.t Gramext.g_symbol list * Gramext.g_action
type camlp4_entry_rules =
(** first two parameters are name and assoc iff a level is created *)
@@ -138,8 +141,7 @@ val parse_string : 'a Gram.Entry.e -> string -> 'a
val eoi_entry : 'a Gram.Entry.e -> 'a Gram.Entry.e
val map_entry : ('a -> 'b) -> 'a Gram.Entry.e -> 'b Gram.Entry.e
-(*********************************************************************
- Table of Coq statically defined grammar entries *)
+(** Table of Coq statically defined grammar entries *)
type gram_universe
@@ -250,8 +252,7 @@ module Vernac_ :
(** The main entry: reads an optional vernac command *)
val main_entry : (loc * vernac_expr) option Gram.Entry.e
-(*********************************************************************
- Mapping formal entries into concrete ones *)
+(** Mapping formal entries into concrete ones *)
(** Binding constr entry keys to entries and symbols *)
@@ -260,21 +261,19 @@ val interp_constr_entry_key : bool (** true for cases_pattern *) ->
val symbol_of_constr_prod_entry_key : Gramext.g_assoc option ->
constr_entry_key -> bool -> constr_prod_entry_key ->
- Compat.token Gramext.g_symbol
+ Tok.t Gramext.g_symbol
(** Binding general entry keys to symbols *)
val symbol_of_prod_entry_key :
Gram.te prod_entry_key -> Gram.te Gramext.g_symbol
-(*********************************************************************
- Interpret entry names of the form "ne_constr_list" as entry keys *)
+(** Interpret entry names of the form "ne_constr_list" as entry keys *)
val interp_entry_name : bool (** true to fail on unknown entry *) ->
int option -> string -> string -> entry_type * Gram.te prod_entry_key
-(*********************************************************************
- Registering/resetting the level of a constr entry *)
+(** Registering/resetting the level of a constr entry *)
val find_position :
bool (** true if for creation in pattern entry; false if in constr entry *) ->
diff --git a/parsing/tok.ml b/parsing/tok.ml
new file mode 100644
index 000000000..870f2d5e7
--- /dev/null
+++ b/parsing/tok.ml
@@ -0,0 +1,90 @@
+(***********************************************************************
+ v * The Coq Proof Assistant / The Coq Development Team
+ <O___,, * CNRS-Ecole Polytechnique-INRIA Futurs-Universite Paris Sud
+ \VV/ *************************************************************
+ // * This file is distributed under the terms of the
+ * GNU Lesser General Public License Version 2.1
+ ***********************************************************************)
+
+(** The type of token for the Coq lexer and parser *)
+
+type t =
+ | KEYWORD of string
+ | METAIDENT of string
+ | PATTERNIDENT of string
+ | IDENT of string
+ | FIELD of string
+ | INT of string
+ | STRING of string
+ | LEFTQMARK
+ | EOI
+
+let extract_string = function
+ | KEYWORD s -> s
+ | IDENT s -> s
+ | STRING s -> s
+ | METAIDENT s -> s
+ | PATTERNIDENT s -> s
+ | FIELD s -> s
+ | INT s -> s
+ | LEFTQMARK -> "?"
+ | EOI -> ""
+
+let to_string = function
+ | KEYWORD s -> Format.sprintf "%S" s
+ | IDENT s -> Format.sprintf "IDENT %S" s
+ | METAIDENT s -> Format.sprintf "METAIDENT %S" s
+ | PATTERNIDENT s -> Format.sprintf "PATTERNIDENT %S" s
+ | FIELD s -> Format.sprintf "FIELD %S" s
+ | INT s -> Format.sprintf "INT %s" s
+ | STRING s -> Format.sprintf "STRING %S" s
+ | LEFTQMARK -> "LEFTQMARK"
+ | EOI -> "EOI"
+
+let match_keyword kwd = function
+ | KEYWORD kwd' when kwd = kwd' -> true
+ | _ -> false
+
+let print ppf tok = Format.fprintf ppf "%s" (to_string tok)
+
+(** For camlp5, conversion from/to [Plexing.pattern],
+ and a match function analoguous to [Plexing.default_match] *)
+
+let of_pattern = function
+ | "", s -> KEYWORD s
+ | "IDENT", s -> IDENT s
+ | "METAIDENT", s -> METAIDENT s
+ | "PATTERNIDENT", s -> PATTERNIDENT s
+ | "FIELD", s -> FIELD s
+ | "INT", s -> INT s
+ | "STRING", s -> STRING s
+ | "LEFTQMARK", _ -> LEFTQMARK
+ | "EOI", _ -> EOI
+ | _ -> failwith "Tok.of_pattern: not a constructor"
+
+let to_pattern = function
+ | KEYWORD s -> "", s
+ | IDENT s -> "IDENT", s
+ | METAIDENT s -> "METAIDENT", s
+ | PATTERNIDENT s -> "PATTERNIDENT", s
+ | FIELD s -> "FIELD", s
+ | INT s -> "INT", s
+ | STRING s -> "STRING", s
+ | LEFTQMARK -> "LEFTQMARK", ""
+ | EOI -> "EOI", ""
+
+let match_pattern =
+ let err () = raise Stream.Failure in
+ function
+ | "", "" -> (function KEYWORD s -> s | _ -> err ())
+ | "IDENT", "" -> (function IDENT s -> s | _ -> err ())
+ | "METAIDENT", "" -> (function METAIDENT s -> s | _ -> err ())
+ | "PATTERNIDENT", "" -> (function PATTERNIDENT s -> s | _ -> err ())
+ | "FIELD", "" -> (function FIELD s -> s | _ -> err ())
+ | "INT", "" -> (function INT s -> s | _ -> err ())
+ | "STRING", "" -> (function STRING s -> s | _ -> err ())
+ | "LEFTQMARK", "" -> (function LEFTQMARK -> "" | _ -> err ())
+ | "EOI", "" -> (function EOI -> "" | _ -> err ())
+ | pat ->
+ let tok = of_pattern pat in
+ function tok' -> if tok = tok' then snd pat else err ()
diff --git a/parsing/tok.mli b/parsing/tok.mli
new file mode 100644
index 000000000..0e2730b17
--- /dev/null
+++ b/parsing/tok.mli
@@ -0,0 +1,29 @@
+(***********************************************************************
+ v * The Coq Proof Assistant / The Coq Development Team
+ <O___,, * CNRS-Ecole Polytechnique-INRIA Futurs-Universite Paris Sud
+ \VV/ *************************************************************
+ // * This file is distributed under the terms of the
+ * GNU Lesser General Public License Version 2.1
+ ***********************************************************************)
+
+(** The type of token for the Coq lexer and parser *)
+
+type t =
+ | KEYWORD of string
+ | METAIDENT of string
+ | PATTERNIDENT of string
+ | IDENT of string
+ | FIELD of string
+ | INT of string
+ | STRING of string
+ | LEFTQMARK
+ | EOI
+
+val extract_string : t -> string
+val to_string : t -> string
+val print : Format.formatter -> t -> unit
+val match_keyword : string -> t -> bool
+(** for camlp5 *)
+val of_pattern : string*string -> t
+val to_pattern : t -> string*string
+val match_pattern : string*string -> t -> string
diff --git a/toplevel/metasyntax.ml b/toplevel/metasyntax.ml
index 65aac9769..b99078106 100644
--- a/toplevel/metasyntax.ml
+++ b/toplevel/metasyntax.ml
@@ -20,6 +20,7 @@ open Vernacexpr
open Pcoq
open Rawterm
open Libnames
+open Tok
open Lexer
open Egrammar
open Notation
@@ -28,7 +29,7 @@ open Nameops
(**********************************************************************)
(* Tokens *)
-let cache_token (_,s) = add_token ("", s)
+let cache_token (_,s) = add_keyword s
let (inToken, outToken) =
declare_object {(default_object "TOKEN") with
@@ -581,20 +582,20 @@ let is_not_small_constr = function
| _ -> false
let rec define_keywords_aux = function
- | GramConstrNonTerminal(e,Some _) as n1 :: GramConstrTerminal("IDENT",k) :: l
+ | GramConstrNonTerminal(e,Some _) as n1 :: GramConstrTerminal(IDENT k) :: l
when is_not_small_constr e ->
message ("Defining '"^k^"' as keyword");
- Lexer.add_token("",k);
- n1 :: GramConstrTerminal("",k) :: define_keywords_aux l
+ Lexer.add_keyword k;
+ n1 :: GramConstrTerminal(KEYWORD k) :: define_keywords_aux l
| n :: l -> n :: define_keywords_aux l
| [] -> []
(* Ensure that IDENT articulation terminal symbols are keywords *)
let define_keywords = function
- | GramConstrTerminal("IDENT",k)::l ->
+ | GramConstrTerminal(IDENT k)::l ->
message ("Defining '"^k^"' as keyword");
- Lexer.add_token("",k);
- GramConstrTerminal("",k) :: define_keywords_aux l
+ Lexer.add_keyword k;
+ GramConstrTerminal(KEYWORD k) :: define_keywords_aux l
| l -> define_keywords_aux l
let distribute a ll = List.map (fun l -> a @ l) ll
diff --git a/toplevel/toplevel.ml b/toplevel/toplevel.ml
index 453552273..b9d643540 100644
--- a/toplevel/toplevel.ml
+++ b/toplevel/toplevel.ml
@@ -315,8 +315,8 @@ let print_toplevel_error exc =
(* Read the input stream until a dot is encountered *)
let parse_to_dot =
let rec dot st = match Stream.next st with
- | ("", ".") -> ()
- | ("EOI", "") -> raise End_of_input
+ | Tok.KEYWORD "." -> ()
+ | Tok.EOI -> raise End_of_input
| _ -> dot st
in
Gram.Entry.of_parser "Coqtoplevel.dot" dot