diff options
author | letouzey <letouzey@85f007b7-540e-0410-9357-904b9bb8a0f7> | 2010-05-19 15:29:44 +0000 |
---|---|---|
committer | letouzey <letouzey@85f007b7-540e-0410-9357-904b9bb8a0f7> | 2010-05-19 15:29:44 +0000 |
commit | 259dde7928696593c2d3c6de474f5cf50fa4417d (patch) | |
tree | 7fe225a0731c13b30cb10ae7098e096f38903366 /parsing | |
parent | e1feff1215562d8f99fedf73c87011e6d7edca19 (diff) |
Nicer representation of tokens, more independant of camlp*
Cf tok.ml, token isn't anymore string*string where first
string encodes the kind of the token, but rather a nice
sum type. Unfortunately, string*string (a.k.a Plexing.pattern)
is still used in some places of Camlp5, so there's a few
conversions back and forth. But the penalty should be quite low,
and having nicer tokens helps in the forthcoming integration
of support for camlp4 post 3.10
git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/coq/trunk@13018 85f007b7-540e-0410-9357-904b9bb8a0f7
Diffstat (limited to 'parsing')
-rw-r--r-- | parsing/argextend.ml4 | 4 | ||||
-rw-r--r-- | parsing/egrammar.ml | 6 | ||||
-rw-r--r-- | parsing/egrammar.mli | 2 | ||||
-rw-r--r-- | parsing/extend.ml | 2 | ||||
-rw-r--r-- | parsing/extend.mli | 2 | ||||
-rw-r--r-- | parsing/g_constr.ml4 | 73 | ||||
-rw-r--r-- | parsing/g_prim.ml4 | 2 | ||||
-rw-r--r-- | parsing/g_tactic.ml4 | 109 | ||||
-rw-r--r-- | parsing/g_vernac.ml4 | 2 | ||||
-rw-r--r-- | parsing/grammar.mllib | 1 | ||||
-rw-r--r-- | parsing/lexer.ml4 | 108 | ||||
-rw-r--r-- | parsing/lexer.mli | 4 | ||||
-rw-r--r-- | parsing/pcoq.ml4 | 15 | ||||
-rw-r--r-- | parsing/pcoq.mli | 27 | ||||
-rw-r--r-- | parsing/tok.ml | 90 | ||||
-rw-r--r-- | parsing/tok.mli | 29 |
16 files changed, 296 insertions, 180 deletions
diff --git a/parsing/argextend.ml4 b/parsing/argextend.ml4 index 72b9d0a50..fd4952a52 100644 --- a/parsing/argextend.ml4 +++ b/parsing/argextend.ml4 @@ -105,7 +105,7 @@ let make_act loc act pil = make (List.rev pil) let make_prod_item = function - | GramTerminal s -> <:expr< (Gramext.Stoken (Lexer.terminal $str:s$)) >> + | GramTerminal s -> <:expr< gram_token_of_string $str:s$ >> | GramNonTerminal (_,_,g,_) -> <:expr< Pcoq.symbol_of_prod_entry_key $mlexpr_of_prod_entry_key g$ >> @@ -249,7 +249,7 @@ EXTEND GramNonTerminal (loc, t, g, Some (Names.id_of_string s)) | s = STRING -> if String.length s > 0 && Util.is_letter s.[0] then - Lexer.add_token ("", s); + Lexer.add_keyword s; GramTerminal s ] ] ; diff --git a/parsing/egrammar.ml b/parsing/egrammar.ml index 337ddd7ed..a07c52e84 100644 --- a/parsing/egrammar.ml +++ b/parsing/egrammar.ml @@ -55,7 +55,7 @@ let cases_pattern_expr_of_name (loc,na) = match na with | Name id -> CPatAtom (loc,Some (Ident (loc,id))) type grammar_constr_prod_item = - | GramConstrTerminal of Token.pattern + | GramConstrTerminal of Tok.t | GramConstrNonTerminal of constr_prod_entry_key * identifier option | GramConstrListMark of int * bool (* tells action rule to make a list of the n previous parsed items; @@ -133,7 +133,7 @@ let make_cases_pattern_action let rec make_constr_prod_item assoc from forpat = function | GramConstrTerminal tok :: l -> - Gramext.Stoken tok :: make_constr_prod_item assoc from forpat l + gram_token_of_token tok :: make_constr_prod_item assoc from forpat l | GramConstrNonTerminal (nt, ovar) :: l -> symbol_of_constr_prod_entry_key assoc from forpat nt :: make_constr_prod_item assoc from forpat l @@ -203,7 +203,7 @@ type grammar_prod_item = loc * argument_type * Gram.te prod_entry_key * identifier option let make_prod_item = function - | GramTerminal s -> (Gramext.Stoken (Lexer.terminal s), None) + | GramTerminal s -> (gram_token_of_string s, None) | GramNonTerminal (_,t,e,po) -> (symbol_of_prod_entry_key e, Option.map (fun p -> (p,t)) po) diff --git a/parsing/egrammar.mli b/parsing/egrammar.mli index 1e9159933..874aed570 100644 --- a/parsing/egrammar.mli +++ b/parsing/egrammar.mli @@ -25,7 +25,7 @@ open Mod_subst (** For constr notations *) type grammar_constr_prod_item = - | GramConstrTerminal of Token.pattern + | GramConstrTerminal of Tok.t | GramConstrNonTerminal of constr_prod_entry_key * identifier option | GramConstrListMark of int * bool (* tells action rule to make a list of the n previous parsed items; diff --git a/parsing/extend.ml b/parsing/extend.ml index a3ef4893a..cc3551d32 100644 --- a/parsing/extend.ml +++ b/parsing/extend.ml @@ -46,7 +46,7 @@ type ('lev,'pos) constr_entry_key_gen = | ETConstr of ('lev * 'pos) | ETPattern | ETOther of string * string - | ETConstrList of ('lev * 'pos) * Token.pattern list + | ETConstrList of ('lev * 'pos) * Tok.t list (* Entries level (left-hand-side of grammar rules) *) type constr_entry_key = diff --git a/parsing/extend.mli b/parsing/extend.mli index 5ff475f79..99b83785c 100644 --- a/parsing/extend.mli +++ b/parsing/extend.mli @@ -46,7 +46,7 @@ type ('lev,'pos) constr_entry_key_gen = | ETConstr of ('lev * 'pos) | ETPattern | ETOther of string * string - | ETConstrList of ('lev * 'pos) * Token.pattern list + | ETConstrList of ('lev * 'pos) * Tok.t list (* Entries level (left-hand-side of grammar rules) *) type constr_entry_key = diff --git a/parsing/g_constr.ml4 b/parsing/g_constr.ml4 index 12ed0c4b6..9ce65226d 100644 --- a/parsing/g_constr.ml4 +++ b/parsing/g_constr.ml4 @@ -15,8 +15,8 @@ open Term open Names open Libnames open Topconstr - open Util +open Tok let constr_kw = [ "forall"; "fun"; "match"; "fix"; "cofix"; "with"; "in"; "for"; @@ -24,7 +24,7 @@ let constr_kw = "Prop"; "Set"; "Type"; ".("; "_"; ".."; "`{"; "`("; "{|"; "|}" ] -let _ = List.iter (fun s -> Lexer.add_token("",s)) constr_kw +let _ = List.iter Lexer.add_keyword constr_kw let mk_cast = function (c,(_,None)) -> c @@ -67,60 +67,61 @@ let mk_fix(loc,kw,id,dcls) = let mk_single_fix (loc,kw,dcl) = let (id,_,_,_,_) = dcl in mk_fix(loc,kw,id,[dcl]) +let err () = raise Stream.Failure + (* Hack to parse "(x:=t)" as an explicit argument without conflicts with the *) (* admissible notation "(x t)" *) let lpar_id_coloneq = Gram.Entry.of_parser "test_lpar_id_coloneq" (fun strm -> - match Stream.npeek 1 strm with - | [("","(")] -> - (match Stream.npeek 2 strm with - | [_; ("IDENT",s)] -> - (match Stream.npeek 3 strm with - | [_; _; ("", ":=")] -> - Stream.junk strm; Stream.junk strm; Stream.junk strm; + match stream_nth 0 strm with + | KEYWORD "(" -> + (match stream_nth 1 strm with + | IDENT s -> + (match stream_nth 2 strm with + | KEYWORD ":=" -> + stream_njunk 3 strm; Names.id_of_string s - | _ -> raise Stream.Failure) - | _ -> raise Stream.Failure) - | _ -> raise Stream.Failure) + | _ -> err ()) + | _ -> err ()) + | _ -> err ()) let impl_ident = Gram.Entry.of_parser "impl_ident" (fun strm -> - match Stream.npeek 1 strm with - | [(_,"{")] -> - (match Stream.npeek 2 strm with - | [_;("IDENT",("wf"|"struct"|"measure"))] -> - raise Stream.Failure - | [_;("IDENT",s)] -> - Stream.junk strm; Stream.junk strm; + match stream_nth 0 strm with + | KEYWORD "{" -> + (match stream_nth 1 strm with + | IDENT ("wf"|"struct"|"measure") -> err () + | IDENT s -> + stream_njunk 2 strm; Names.id_of_string s - | _ -> raise Stream.Failure) - | _ -> raise Stream.Failure) + | _ -> err ()) + | _ -> err ()) let ident_colon = Gram.Entry.of_parser "ident_colon" (fun strm -> - match Stream.npeek 1 strm with - | [("IDENT",s)] -> - (match Stream.npeek 2 strm with - | [_; ("", ":")] -> - Stream.junk strm; Stream.junk strm; + match stream_nth 0 strm with + | IDENT s -> + (match stream_nth 1 strm with + | KEYWORD ":" -> + stream_njunk 2 strm; Names.id_of_string s - | _ -> raise Stream.Failure) - | _ -> raise Stream.Failure) + | _ -> err ()) + | _ -> err ()) let ident_with = Gram.Entry.of_parser "ident_with" (fun strm -> - match Stream.npeek 1 strm with - | [("IDENT",s)] -> - (match Stream.npeek 2 strm with - | [_; ("", "with")] -> - Stream.junk strm; Stream.junk strm; - Names.id_of_string s - | _ -> raise Stream.Failure) - | _ -> raise Stream.Failure) + match stream_nth 0 strm with + | IDENT s -> + (match stream_nth 1 strm with + | KEYWORD "with" -> + stream_njunk 2 strm; + Names.id_of_string s + | _ -> err ()) + | _ -> err ()) let aliasvar = function CPatAlias (_, _, id) -> Some (Name id) | _ -> None diff --git a/parsing/g_prim.ml4 b/parsing/g_prim.ml4 index 5e6e0e1ed..b1a2fa54b 100644 --- a/parsing/g_prim.ml4 +++ b/parsing/g_prim.ml4 @@ -12,7 +12,7 @@ open Libnames open Topconstr let prim_kw = ["{"; "}"; "["; "]"; "("; ")"; "'"] -let _ = List.iter (fun s -> Lexer.add_token("",s)) prim_kw +let _ = List.iter Lexer.add_keyword prim_kw open Prim open Nametab diff --git a/parsing/g_tactic.ml4 b/parsing/g_tactic.ml4 index c1400f0c4..b39ea5770 100644 --- a/parsing/g_tactic.ml4 +++ b/parsing/g_tactic.ml4 @@ -15,54 +15,57 @@ open Genarg open Topconstr open Libnames open Termops +open Tok let all_with delta = make_red_flag [FBeta;FIota;FZeta;delta] let tactic_kw = [ "->"; "<-" ; "by" ] -let _ = List.iter (fun s -> Lexer.add_token("",s)) tactic_kw +let _ = List.iter Lexer.add_keyword tactic_kw + +let err () = raise Stream.Failure (* Hack to parse "(x:=t)" as an explicit argument without conflicts with the *) (* admissible notation "(x t)" *) let test_lpar_id_coloneq = Gram.Entry.of_parser "lpar_id_coloneq" (fun strm -> - match Stream.npeek 1 strm with - | [("","(")] -> - (match Stream.npeek 2 strm with - | [_; ("IDENT",s)] -> - (match Stream.npeek 3 strm with - | [_; _; ("", ":=")] -> () - | _ -> raise Stream.Failure) - | _ -> raise Stream.Failure) - | _ -> raise Stream.Failure) + match stream_nth 0 strm with + | KEYWORD "(" -> + (match stream_nth 1 strm with + | IDENT _ -> + (match stream_nth 2 strm with + | KEYWORD ":=" -> () + | _ -> err ()) + | _ -> err ()) + | _ -> err ()) (* idem for (x:=t) and (1:=t) *) let test_lpar_idnum_coloneq = Gram.Entry.of_parser "test_lpar_idnum_coloneq" (fun strm -> - match Stream.npeek 1 strm with - | [("","(")] -> - (match Stream.npeek 2 strm with - | [_; (("IDENT"|"INT"),_)] -> - (match Stream.npeek 3 strm with - | [_; _; ("", ":=")] -> () - | _ -> raise Stream.Failure) - | _ -> raise Stream.Failure) - | _ -> raise Stream.Failure) + match stream_nth 0 strm with + | KEYWORD "(" -> + (match stream_nth 1 strm with + | IDENT _ | INT _ -> + (match stream_nth 2 strm with + | KEYWORD ":=" -> () + | _ -> err ()) + | _ -> err ()) + | _ -> err ()) (* idem for (x:t) *) let test_lpar_id_colon = Gram.Entry.of_parser "lpar_id_colon" (fun strm -> - match Stream.npeek 1 strm with - | [("","(")] -> - (match Stream.npeek 2 strm with - | [_; ("IDENT",id)] -> - (match Stream.npeek 3 strm with - | [_; _; ("", ":")] -> () - | _ -> raise Stream.Failure) - | _ -> raise Stream.Failure) - | _ -> raise Stream.Failure) + match stream_nth 0 strm with + | KEYWORD "(" -> + (match stream_nth 1 strm with + | IDENT _ -> + (match stream_nth 2 strm with + | KEYWORD ":" -> () + | _ -> err ()) + | _ -> err ()) + | _ -> err ()) (* idem for (x1..xn:t) [n^2 complexity but exceptional use] *) let check_for_coloneq = @@ -70,36 +73,36 @@ let check_for_coloneq = (fun strm -> let rec skip_to_rpar p n = match list_last (Stream.npeek n strm) with - | ("","(") -> skip_to_rpar (p+1) (n+1) - | ("",")") -> if p=0 then n+1 else skip_to_rpar (p-1) (n+1) - | ("",".") -> raise Stream.Failure + | KEYWORD "(" -> skip_to_rpar (p+1) (n+1) + | KEYWORD ")" -> if p=0 then n+1 else skip_to_rpar (p-1) (n+1) + | KEYWORD "." -> err () | _ -> skip_to_rpar p (n+1) in let rec skip_names n = match list_last (Stream.npeek n strm) with - | ("IDENT",_) | ("","_") -> skip_names (n+1) - | ("",":") -> skip_to_rpar 0 (n+1) (* skip a constr *) - | _ -> raise Stream.Failure in + | IDENT _ | KEYWORD "_" -> skip_names (n+1) + | KEYWORD ":" -> skip_to_rpar 0 (n+1) (* skip a constr *) + | _ -> err () in let rec skip_binders n = match list_last (Stream.npeek n strm) with - | ("","(") -> skip_binders (skip_names (n+1)) - | ("IDENT",_) | ("","_") -> skip_binders (n+1) - | ("",":=") -> () - | _ -> raise Stream.Failure in + | KEYWORD "(" -> skip_binders (skip_names (n+1)) + | IDENT _ | KEYWORD "_" -> skip_binders (n+1) + | KEYWORD ":=" -> () + | _ -> err () in match Stream.npeek 1 strm with - | [("","(")] -> skip_binders 2 - | _ -> raise Stream.Failure) + | [KEYWORD "("] -> skip_binders 2 + | _ -> err ()) let guess_lpar_ipat s strm = - match Stream.npeek 1 strm with - | [("","(")] -> - (match Stream.npeek 2 strm with - | [_; ("",("("|"["))] -> () - | [_; ("IDENT",_)] -> - (match Stream.npeek 3 strm with - | [_; _; ("", s')] when s = s' -> () - | _ -> raise Stream.Failure) - | _ -> raise Stream.Failure) - | _ -> raise Stream.Failure + match stream_nth 0 strm with + | KEYWORD "(" -> + (match stream_nth 1 strm with + | KEYWORD ("("|"[") -> () + | IDENT _ -> + (match stream_nth 2 strm with + | KEYWORD s' when s = s' -> () + | _ -> err ()) + | _ -> err ()) + | _ -> err () let guess_lpar_coloneq = Gram.Entry.of_parser "guess_lpar_coloneq" (guess_lpar_ipat ":=") @@ -110,9 +113,9 @@ let guess_lpar_colon = let lookup_at_as_coma = Gram.Entry.of_parser "lookup_at_as_coma" (fun strm -> - match Stream.npeek 1 strm with - | [("",(","|"at"|"as"))] -> () - | _ -> raise Stream.Failure) + match stream_nth 0 strm with + | KEYWORD (","|"at"|"as") -> () + | _ -> err ()) open Constr open Prim diff --git a/parsing/g_vernac.ml4 b/parsing/g_vernac.ml4 index 8951cfa2f..e213660d9 100644 --- a/parsing/g_vernac.ml4 +++ b/parsing/g_vernac.ml4 @@ -28,7 +28,7 @@ open Vernac_ open Module let vernac_kw = [ ";"; ","; ">->"; ":<"; "<:"; "where"; "at" ] -let _ = List.iter (fun s -> Lexer.add_token ("",s)) vernac_kw +let _ = List.iter Lexer.add_keyword vernac_kw (* Rem: do not join the different GEXTEND into one, it breaks native *) (* compilation on PowerPC and Sun architectures *) diff --git a/parsing/grammar.mllib b/parsing/grammar.mllib index 483538da6..98905d3c0 100644 --- a/parsing/grammar.mllib +++ b/parsing/grammar.mllib @@ -66,6 +66,7 @@ Topconstr Genarg Ppextend Tacexpr +Tok Lexer Extend Vernacexpr diff --git a/parsing/lexer.ml4 b/parsing/lexer.ml4 index 1db651033..175ce16a4 100644 --- a/parsing/lexer.ml4 +++ b/parsing/lexer.ml4 @@ -9,6 +9,7 @@ open Pp open Util open Token +open Tok (* Dictionaries: trees annotated with string options, each node being a map from chars to dictionaries (the subtrees). A trie, in other words. *) @@ -190,16 +191,6 @@ let add_keyword str = let remove_keyword str = token_tree := ttree_remove !token_tree str -(* Adding a new token (keyword or special token). *) -let add_token (con, str) = match con with - | "" -> add_keyword str - | "METAIDENT" | "LEFTQMARK" | "IDENT" | "FIELD" | "INT" | "STRING" | "EOI" - -> () - | _ -> - raise (Token.Error ("\ -the constructor \"" ^ con ^ "\" is not recognized by Lexer")) - - (* Freeze and unfreeze the state of the lexer *) type frozen_t = ttree @@ -395,54 +386,51 @@ let process_chars bp c cs = let t = progress_from_byte None (-1) !token_tree cs c in let ep = Stream.count cs in match t with - | Some t -> (("", t), (bp, ep)) + | Some t -> (KEYWORD t, (bp, ep)) | None -> let ep' = bp + utf8_char_size cs c in njunk (ep' - ep) cs; err (bp, ep') Undefined_token -let parse_after_dollar bp = - parser - | [< ' ('a'..'z' | 'A'..'Z' | '_' as c); len = ident_tail (store 0 c) >] -> - ("METAIDENT", get_buff len) - | [< s >] -> - match lookup_utf8 s with - | Utf8Token (UnicodeLetter, n) -> - ("METAIDENT", get_buff (ident_tail (nstore n 0 s) s)) - | AsciiChar | Utf8Token _ | EmptyStream -> fst (process_chars bp '$' s) +let token_of_special c s = match c with + | '$' -> METAIDENT s + | '.' -> FIELD s + | _ -> assert false -(* Parse what follows a dot *) -let parse_after_dot bp c = +(* Parse what follows a dot / a dollar *) + +let parse_after_special c bp = parser - | [< ' ('a'..'z' | 'A'..'Z' | '_' as c); len = ident_tail (store 0 c) >] -> - ("FIELD", get_buff len) + | [< ' ('a'..'z' | 'A'..'Z' | '_' as d); len = ident_tail (store 0 d) >] -> + token_of_special c (get_buff len) | [< s >] -> match lookup_utf8 s with | Utf8Token (UnicodeLetter, n) -> - ("FIELD", get_buff (ident_tail (nstore n 0 s) s)) - | AsciiChar | Utf8Token _ | EmptyStream -> - fst (process_chars bp c s) + token_of_special c (get_buff (ident_tail (nstore n 0 s) s)) + | AsciiChar | Utf8Token _ | EmptyStream -> fst (process_chars bp c s) (* Parse what follows a question mark *) + let parse_after_qmark bp s = match Stream.peek s with - |Some ('a'..'z' | 'A'..'Z' | '_') -> ("LEFTQMARK", "") - |None -> ("","?") + | Some ('a'..'z' | 'A'..'Z' | '_') -> LEFTQMARK + | None -> KEYWORD "?" | _ -> match lookup_utf8 s with - | Utf8Token (UnicodeLetter, _) -> ("LEFTQMARK", "") + | Utf8Token (UnicodeLetter, _) -> LEFTQMARK | AsciiChar | Utf8Token _ | EmptyStream -> fst (process_chars bp '?' s) (* Parse a token in a char stream *) + let rec next_token = parser bp | [< '' ' | '\t' | '\n' |'\r' as c; s >] -> comm_loc bp; push_char c; next_token s - | [< ''$'; t = parse_after_dollar bp >] ep -> + | [< ''$' as c; t = parse_after_special c bp >] ep -> comment_stop bp; (t, (ep, bp)) - | [< ''.' as c; t = parse_after_dot bp c >] ep -> + | [< ''.' as c; t = parse_after_special c bp >] ep -> comment_stop bp; - if Flags.do_beautify() & t=("",".") then between_com := true; + if Flags.do_beautify() && t=KEYWORD "." then between_com := true; (t, (bp,ep)) | [< ''?'; s >] ep -> let t = parse_after_qmark bp s in comment_stop bp; (t, (ep, bp)) @@ -450,13 +438,13 @@ let rec next_token = parser bp len = ident_tail (store 0 c); s >] ep -> let id = get_buff len in comment_stop bp; - (try ("", find_keyword id s) with Not_found -> ("IDENT", id)), (bp, ep) + (try KEYWORD (find_keyword id s) with Not_found -> IDENT id), (bp, ep) | [< ' ('0'..'9' as c); len = number (store 0 c) >] ep -> comment_stop bp; - (("INT", get_buff len), (bp, ep)) + (INT (get_buff len), (bp, ep)) | [< ''\"'; len = string false bp 0 >] ep -> comment_stop bp; - (("STRING", get_buff len), (bp, ep)) + (STRING (get_buff len), (bp, ep)) | [< ' ('(' as c); t = parser | [< ''*'; s >] -> @@ -473,12 +461,12 @@ let rec next_token = parser bp let id = get_buff len in let ep = Stream.count s in comment_stop bp; - (try ("",find_keyword id s) with Not_found -> ("IDENT",id)), (bp, ep) + (try KEYWORD (find_keyword id s) with Not_found -> IDENT id), (bp, ep) | AsciiChar | Utf8Token ((UnicodeSymbol | UnicodeIdentPart), _) -> let t = process_chars bp (Stream.next s) s in comment_stop bp; t | EmptyStream -> - comment_stop bp; (("EOI", ""), (bp, bp + 1)) + comment_stop bp; (EOI, (bp, bp + 1)) (* Location table system for creating tables associating a token count to its location in a char stream (the source) *) @@ -528,6 +516,19 @@ type location_table = (int * int) option array array ref let location_table () = !current_location_table let restore_location_table t = current_location_table := t + +(* The lexer of Coq *) + +(* Note: removing a token. + We do nothing because [remove_token] is called only when removing a grammar + rule with [Grammar.delete_rule]. The latter command is called only when + unfreezing the state of the grammar entries (see GRAMMAR summary, file + env/metasyntax.ml). Therefore, instead of removing tokens one by one, + we unfreeze the state of the lexer. This restores the behaviour of the + lexer. B.B. *) + +IFDEF CAMLP5 THEN + (* Names of tokens, for this lexer, used in Grammar error messages *) let token_text = function @@ -541,34 +542,23 @@ let token_text = function | (con, "") -> con | (con, prm) -> con ^ " \"" ^ prm ^ "\"" -(* The lexer of Coq *) - -(* Note: removing a token. - We do nothing because [remove_token] is called only when removing a grammar - rule with [Grammar.delete_rule]. The latter command is called only when - unfreezing the state of the grammar entries (see GRAMMAR summary, file - env/metasyntax.ml). Therefore, instead of removing tokens one by one, - we unfreeze the state of the lexer. This restores the behaviour of the - lexer. B.B. *) +(* Adding a new token (keyword or special token). *) -IFDEF CAMLP5 THEN +let add_token pat = match Tok.of_pattern pat with + | KEYWORD s -> add_keyword s + | _ -> () let lexer = { Token.tok_func = func; Token.tok_using = add_token; Token.tok_removing = (fun _ -> ()); - Token.tok_match = default_match; + Token.tok_match = Tok.match_pattern; Token.tok_comm = None; Token.tok_text = token_text } -ELSE +ELSE (* official camlp4 for ocaml >= 3.10 *) -let lexer = { - Token.func = func; - Token.using = add_token; - Token.removing = (fun _ -> ()); - Token.tparse = (fun _ -> None); - Token.text = token_text } +TODO END @@ -607,6 +597,6 @@ let strip s = let terminal s = let s = strip s in if s = "" then failwith "empty token"; - if is_ident_not_keyword s then ("IDENT", s) - else if is_number s then ("INT", s) - else ("", s) + if is_ident_not_keyword s then IDENT s + else if is_number s then INT s + else KEYWORD s diff --git a/parsing/lexer.mli b/parsing/lexer.mli index 09492ceb1..7f7d36b74 100644 --- a/parsing/lexer.mli +++ b/parsing/lexer.mli @@ -18,7 +18,7 @@ type error = exception Error of error -val add_token : string * string -> unit +val add_keyword : string -> unit val remove_keyword : string -> unit val is_keyword : string -> bool @@ -43,7 +43,7 @@ val restore_com_state: com_state -> unit val set_xml_output_comment : (string -> unit) -> unit -val terminal : string -> string * string +val terminal : string -> Tok.t (** The lexer of Coq *) diff --git a/parsing/pcoq.ml4 b/parsing/pcoq.ml4 index d12388382..7d78f5371 100644 --- a/parsing/pcoq.ml4 +++ b/parsing/pcoq.ml4 @@ -24,7 +24,7 @@ IFDEF CAMLP5 THEN module L = struct - type te = string * string + type te = Tok.t let lexer = Lexer.lexer end @@ -36,6 +36,9 @@ TODO END +let gram_token_of_token tok = Gramext.Stoken (Tok.to_pattern tok) +let gram_token_of_string s = gram_token_of_token (Lexer.terminal s) + let grammar_delete e pos reinit rls = List.iter (fun (n,ass,lev) -> @@ -99,7 +102,7 @@ end open Gramtypes type camlp4_rule = - Compat.token Gramext.g_symbol list * Gramext.g_action + Tok.t Gramext.g_symbol list * Gramext.g_action type camlp4_entry_rules = (* first two parameters are name and assoc iff a level is created *) @@ -611,7 +614,7 @@ let rec symbol_of_constr_prod_entry_key assoc from forpat typ = Gramext.Slist1sep (symbol_of_constr_prod_entry_key assoc from forpat (ETConstr typ'), Gramext.srules - [List.map (fun x -> Gramext.Stoken x) tkl, + [List.map gram_token_of_token tkl, List.fold_right (fun _ v -> Gramext.action (fun _ -> v)) tkl (Gramext.action (fun loc -> ()))]) | _ -> @@ -635,9 +638,9 @@ let rec symbol_of_prod_entry_key = function | Amodifiers s -> Gramext.srules [([], Gramext.action(fun _loc -> [])); - ([Gramext.Stoken ("", "("); - Gramext.Slist1sep ((symbol_of_prod_entry_key s), Gramext.Stoken ("", ",")); - Gramext.Stoken ("", ")")], + ([gram_token_of_string "("; + Gramext.Slist1sep ((symbol_of_prod_entry_key s), gram_token_of_string ","); + gram_token_of_string ")"], Gramext.action (fun _ l _ _loc -> l))] | Aself -> Gramext.Sself | Anext -> Gramext.Snext diff --git a/parsing/pcoq.mli b/parsing/pcoq.mli index 23d85d94a..ada016094 100644 --- a/parsing/pcoq.mli +++ b/parsing/pcoq.mli @@ -16,13 +16,13 @@ open Topconstr open Tacexpr open Libnames -(********************************************************************* - The parser of Coq *) +(** The parser of Coq *) -module Gram : Grammar.S with type te = Compat.token +module Gram : Grammar.S with type te = Tok.t -(********************************************************************* +(** The parser of Coq is built from three kinds of rule declarations: + - dynamic rules declared at the evaluation of Coq files (using e.g. Notation, Infix, or Tactic Notation) - static rules explicitly defined in files g_*.ml4 @@ -96,11 +96,14 @@ module Gram : Grammar.S with type te = Compat.token *) +val gram_token_of_token : Tok.t -> Tok.t Gramext.g_symbol +val gram_token_of_string : string -> Tok.t Gramext.g_symbol + (** The superclass of all grammar entries *) type grammar_object type camlp4_rule = - Compat.token Gramext.g_symbol list * Gramext.g_action + Tok.t Gramext.g_symbol list * Gramext.g_action type camlp4_entry_rules = (** first two parameters are name and assoc iff a level is created *) @@ -138,8 +141,7 @@ val parse_string : 'a Gram.Entry.e -> string -> 'a val eoi_entry : 'a Gram.Entry.e -> 'a Gram.Entry.e val map_entry : ('a -> 'b) -> 'a Gram.Entry.e -> 'b Gram.Entry.e -(********************************************************************* - Table of Coq statically defined grammar entries *) +(** Table of Coq statically defined grammar entries *) type gram_universe @@ -250,8 +252,7 @@ module Vernac_ : (** The main entry: reads an optional vernac command *) val main_entry : (loc * vernac_expr) option Gram.Entry.e -(********************************************************************* - Mapping formal entries into concrete ones *) +(** Mapping formal entries into concrete ones *) (** Binding constr entry keys to entries and symbols *) @@ -260,21 +261,19 @@ val interp_constr_entry_key : bool (** true for cases_pattern *) -> val symbol_of_constr_prod_entry_key : Gramext.g_assoc option -> constr_entry_key -> bool -> constr_prod_entry_key -> - Compat.token Gramext.g_symbol + Tok.t Gramext.g_symbol (** Binding general entry keys to symbols *) val symbol_of_prod_entry_key : Gram.te prod_entry_key -> Gram.te Gramext.g_symbol -(********************************************************************* - Interpret entry names of the form "ne_constr_list" as entry keys *) +(** Interpret entry names of the form "ne_constr_list" as entry keys *) val interp_entry_name : bool (** true to fail on unknown entry *) -> int option -> string -> string -> entry_type * Gram.te prod_entry_key -(********************************************************************* - Registering/resetting the level of a constr entry *) +(** Registering/resetting the level of a constr entry *) val find_position : bool (** true if for creation in pattern entry; false if in constr entry *) -> diff --git a/parsing/tok.ml b/parsing/tok.ml new file mode 100644 index 000000000..870f2d5e7 --- /dev/null +++ b/parsing/tok.ml @@ -0,0 +1,90 @@ +(*********************************************************************** + v * The Coq Proof Assistant / The Coq Development Team + <O___,, * CNRS-Ecole Polytechnique-INRIA Futurs-Universite Paris Sud + \VV/ ************************************************************* + // * This file is distributed under the terms of the + * GNU Lesser General Public License Version 2.1 + ***********************************************************************) + +(** The type of token for the Coq lexer and parser *) + +type t = + | KEYWORD of string + | METAIDENT of string + | PATTERNIDENT of string + | IDENT of string + | FIELD of string + | INT of string + | STRING of string + | LEFTQMARK + | EOI + +let extract_string = function + | KEYWORD s -> s + | IDENT s -> s + | STRING s -> s + | METAIDENT s -> s + | PATTERNIDENT s -> s + | FIELD s -> s + | INT s -> s + | LEFTQMARK -> "?" + | EOI -> "" + +let to_string = function + | KEYWORD s -> Format.sprintf "%S" s + | IDENT s -> Format.sprintf "IDENT %S" s + | METAIDENT s -> Format.sprintf "METAIDENT %S" s + | PATTERNIDENT s -> Format.sprintf "PATTERNIDENT %S" s + | FIELD s -> Format.sprintf "FIELD %S" s + | INT s -> Format.sprintf "INT %s" s + | STRING s -> Format.sprintf "STRING %S" s + | LEFTQMARK -> "LEFTQMARK" + | EOI -> "EOI" + +let match_keyword kwd = function + | KEYWORD kwd' when kwd = kwd' -> true + | _ -> false + +let print ppf tok = Format.fprintf ppf "%s" (to_string tok) + +(** For camlp5, conversion from/to [Plexing.pattern], + and a match function analoguous to [Plexing.default_match] *) + +let of_pattern = function + | "", s -> KEYWORD s + | "IDENT", s -> IDENT s + | "METAIDENT", s -> METAIDENT s + | "PATTERNIDENT", s -> PATTERNIDENT s + | "FIELD", s -> FIELD s + | "INT", s -> INT s + | "STRING", s -> STRING s + | "LEFTQMARK", _ -> LEFTQMARK + | "EOI", _ -> EOI + | _ -> failwith "Tok.of_pattern: not a constructor" + +let to_pattern = function + | KEYWORD s -> "", s + | IDENT s -> "IDENT", s + | METAIDENT s -> "METAIDENT", s + | PATTERNIDENT s -> "PATTERNIDENT", s + | FIELD s -> "FIELD", s + | INT s -> "INT", s + | STRING s -> "STRING", s + | LEFTQMARK -> "LEFTQMARK", "" + | EOI -> "EOI", "" + +let match_pattern = + let err () = raise Stream.Failure in + function + | "", "" -> (function KEYWORD s -> s | _ -> err ()) + | "IDENT", "" -> (function IDENT s -> s | _ -> err ()) + | "METAIDENT", "" -> (function METAIDENT s -> s | _ -> err ()) + | "PATTERNIDENT", "" -> (function PATTERNIDENT s -> s | _ -> err ()) + | "FIELD", "" -> (function FIELD s -> s | _ -> err ()) + | "INT", "" -> (function INT s -> s | _ -> err ()) + | "STRING", "" -> (function STRING s -> s | _ -> err ()) + | "LEFTQMARK", "" -> (function LEFTQMARK -> "" | _ -> err ()) + | "EOI", "" -> (function EOI -> "" | _ -> err ()) + | pat -> + let tok = of_pattern pat in + function tok' -> if tok = tok' then snd pat else err () diff --git a/parsing/tok.mli b/parsing/tok.mli new file mode 100644 index 000000000..0e2730b17 --- /dev/null +++ b/parsing/tok.mli @@ -0,0 +1,29 @@ +(*********************************************************************** + v * The Coq Proof Assistant / The Coq Development Team + <O___,, * CNRS-Ecole Polytechnique-INRIA Futurs-Universite Paris Sud + \VV/ ************************************************************* + // * This file is distributed under the terms of the + * GNU Lesser General Public License Version 2.1 + ***********************************************************************) + +(** The type of token for the Coq lexer and parser *) + +type t = + | KEYWORD of string + | METAIDENT of string + | PATTERNIDENT of string + | IDENT of string + | FIELD of string + | INT of string + | STRING of string + | LEFTQMARK + | EOI + +val extract_string : t -> string +val to_string : t -> string +val print : Format.formatter -> t -> unit +val match_keyword : string -> t -> bool +(** for camlp5 *) +val of_pattern : string*string -> t +val to_pattern : t -> string*string +val match_pattern : string*string -> t -> string |