From f99f60902d7492902110fb091c52e58e1ed9cd32 Mon Sep 17 00:00:00 2001 From: Pierre-Marie Pédrot Date: Thu, 19 Oct 2017 18:17:33 +0200 Subject: Use a homebrew parser to replace the GEXTEND extension points of Camlp5. The parser is stupid and the syntax is almost the same as the previous one. The only difference is that one needs to wrap OCaml code between { braces } so that quoting works out of the box. Files requiring such a syntax are handled specifically by the type system and need to have a .mlg extension instead of a .ml4 one. --- grammar/coqpp_ast.mli | 80 +++++++++++++ grammar/coqpp_lex.mll | 163 +++++++++++++++++++++++++++ grammar/coqpp_main.ml | 292 ++++++++++++++++++++++++++++++++++++++++++++++++ grammar/coqpp_parse.mly | 190 +++++++++++++++++++++++++++++++ grammar/q_util.mlp | 2 +- 5 files changed, 726 insertions(+), 1 deletion(-) create mode 100644 grammar/coqpp_ast.mli create mode 100644 grammar/coqpp_lex.mll create mode 100644 grammar/coqpp_main.ml create mode 100644 grammar/coqpp_parse.mly (limited to 'grammar') diff --git a/grammar/coqpp_ast.mli b/grammar/coqpp_ast.mli new file mode 100644 index 000000000..245e530ae --- /dev/null +++ b/grammar/coqpp_ast.mli @@ -0,0 +1,80 @@ +(************************************************************************) +(* v * The Coq Proof Assistant / The Coq Development Team *) +(* Buffer.add_string ocaml_buf "{" + | Extend -> () + in + incr num_braces + +let end_ocaml lexbuf = + let () = decr num_braces in + if !num_braces < 0 then lex_error lexbuf "Unexpected end of OCaml code" + else if !num_braces = 0 then + let s = Buffer.contents ocaml_buf in + let () = Buffer.reset ocaml_buf in + Some (CODE { Coqpp_ast.code = s }) + else + let () = Buffer.add_string ocaml_buf "}" in + None + +} + +let letter = ['a'-'z' 'A'-'Z'] +let letterlike = ['_' 'a'-'z' 'A'-'Z'] +let alphanum = ['_' 'a'-'z' 'A'-'Z' '0'-'9' '\''] +let ident = letterlike alphanum* +let qualid = ident ('.' ident)* +let space = [' ' '\t' '\r'] + +rule extend = parse +| "(*" { start_comment (); comment lexbuf } +| "{" { start_ocaml (); ocaml lexbuf } +| "GRAMMAR" { GRAMMAR } +| "VERNAC" { VERNAC } +| "TACTIC" { TACTIC } +| "EXTEND" { EXTEND } +| "END" { END } +(** Camlp5 specific keywords *) +| "GLOBAL" { GLOBAL } +| "FIRST" { FIRST } +| "LAST" { LAST } +| "BEFORE" { BEFORE } +| "AFTER" { AFTER } +| "LEVEL" { LEVEL } +| "LEFTA" { LEFTA } +| "RIGHTA" { RIGHTA } +| "NONA" { NONA } +(** Standard *) +| ident { IDENT (Lexing.lexeme lexbuf) } +| qualid { QUALID (Lexing.lexeme lexbuf) } +| space { extend lexbuf } +| '\"' { string lexbuf } +| '\n' { newline lexbuf; extend lexbuf } +| '[' { LBRACKET } +| ']' { RBRACKET } +| '|' { PIPE } +| "->" { ARROW } +| ',' { COMMA } +| ':' { COLON } +| ';' { SEMICOLON } +| '(' { LPAREN } +| ')' { RPAREN } +| '=' { EQUAL } +| _ { lex_error lexbuf "syntax error" } +| eof { EOF } + +and ocaml = parse +| "{" { start_ocaml (); ocaml lexbuf } +| "}" { match end_ocaml lexbuf with Some tk -> tk | None -> ocaml lexbuf } +| '\n' { newline lexbuf; Buffer.add_char ocaml_buf '\n'; ocaml lexbuf } +| '\"' { Buffer.add_char ocaml_buf '\"'; ocaml_string lexbuf } +| (_ as c) { Buffer.add_char ocaml_buf c; ocaml lexbuf } +| eof { lex_unexpected_eof lexbuf "OCaml code" } + +and comment = parse +| "*)" { match end_comment lexbuf with Some _ -> extend lexbuf | None -> comment lexbuf } +| "(*" { start_comment lexbuf; comment lexbuf } +| '\n' { newline lexbuf; Buffer.add_char comment_buf '\n'; comment lexbuf } +| (_ as c) { Buffer.add_char comment_buf c; comment lexbuf } +| eof { lex_unexpected_eof lexbuf "comment" } + +and string = parse +| '\"' + { + let s = Buffer.contents string_buf in + let () = Buffer.reset string_buf in + STRING s + } +| "\\\"" { Buffer.add_char string_buf '\"'; string lexbuf } +| '\n' { newline lexbuf; Buffer.add_char string_buf '\n'; string lexbuf } +| (_ as c) { Buffer.add_char string_buf c; string lexbuf } +| eof { lex_unexpected_eof lexbuf "string" } + +and ocaml_string = parse +| "\\\"" { Buffer.add_string ocaml_buf "\\\""; ocaml_string lexbuf } +| '\"' { Buffer.add_char ocaml_buf '\"'; ocaml lexbuf } +| (_ as c) { Buffer.add_char ocaml_buf c; ocaml_string lexbuf } +| eof { lex_unexpected_eof lexbuf "OCaml string" } + +{ + +let token lexbuf = match mode () with +| OCaml -> ocaml lexbuf +| Extend -> extend lexbuf + +} diff --git a/grammar/coqpp_main.ml b/grammar/coqpp_main.ml new file mode 100644 index 000000000..23a5104e2 --- /dev/null +++ b/grammar/coqpp_main.ml @@ -0,0 +1,292 @@ +(************************************************************************) +(* v * The Coq Proof Assistant / The Coq Development Team *) +(* + let () = close_in chan in + let () = Printf.eprintf "%s\n%!" (pr_loc loc) in + fatal msg + | Parsing.Parse_error -> + let () = close_in chan in + let loc = Coqpp_lex.loc lexbuf in + let () = Printf.eprintf "%s\n%!" (pr_loc loc) in + fatal "syntax error" + in + let () = close_in chan in + ans + +module StringSet = Set.Make(String) + +let string_split s = + let len = String.length s in + let rec split n = + try + let pos = String.index_from s n '.' in + let dir = String.sub s n (pos-n) in + dir :: split (succ pos) + with + | Not_found -> [String.sub s n (len-n)] + in + if len == 0 then [] else split 0 + +module GramExt : +sig + +val print_ast : Format.formatter -> grammar_ext -> unit + +end = +struct + +let is_uident s = match s.[0] with +| 'A'..'Z' -> true +| _ -> false + +let is_qualified = is_uident + +let get_local_entries ext = + let global = StringSet.of_list ext.gramext_globals in + let map e = e.gentry_name in + let entries = List.map map ext.gramext_entries in + let local = List.filter (fun e -> not (is_qualified e || StringSet.mem e global)) entries in + let rec uniquize seen = function + | [] -> [] + | id :: rem -> + let rem = uniquize (StringSet.add id seen) rem in + if StringSet.mem id seen then rem else id :: rem + in + uniquize StringSet.empty local + +let print_local chan ext = + let locals = get_local_entries ext in + match locals with + | [] -> () + | e :: locals -> + let mk_e chan e = fprintf chan "%s.Entry.create \"%s\"" ext.gramext_name e in + let () = fprintf chan "@[let %s =@ @[%a@]@]@ " e mk_e e in + let iter e = fprintf chan "@[and %s =@ @[%a@]@]@ " e mk_e e in + let () = List.iter iter locals in + fprintf chan "in@ " + +let print_string chan s = fprintf chan "\"%s\"" s + +let print_list chan pr l = + let rec prl chan = function + | [] -> () + | [x] -> fprintf chan "%a" pr x + | x :: l -> fprintf chan "%a;@ %a" pr x prl l + in + fprintf chan "@[[%a]@]" prl l + +let print_opt chan pr = function +| None -> fprintf chan "None" +| Some x -> fprintf chan "Some@ (%a)" pr x + +let print_position chan pos = match pos with +| First -> fprintf chan "Extend.First" +| Last -> fprintf chan "Extend.Last" +| Before s -> fprintf chan "Extend.Before@ \"%s\"" s +| After s -> fprintf chan "Extend.After@ \"%s\"" s +| Level s -> fprintf chan "Extend.Level@ \"%s\"" s + +let print_assoc chan = function +| LeftA -> fprintf chan "Extend.LeftA" +| RightA -> fprintf chan "Extend.RightA" +| NonA -> fprintf chan "Extend.NonA" + +type symb = +| SymbToken of string * string option +| SymbEntry of string * string option +| SymbSelf +| SymbNext +| SymbList0 of symb * symb option +| SymbList1 of symb * symb option +| SymbOpt of symb +| SymbRules of ((string option * symb) list * code) list + +let is_token s = match string_split s with +| [s] -> is_uident s +| _ -> false + +let rec parse_tokens = function +| [GSymbString s] -> SymbToken ("", Some s) +| [GSymbQualid ("SELF", None)] -> SymbSelf +| [GSymbQualid ("NEXT", None)] -> SymbNext +| [GSymbQualid ("LIST0", None); tkn] -> + SymbList0 (parse_token tkn, None) +| [GSymbQualid ("LIST1", None); tkn] -> + SymbList1 (parse_token tkn, None) +| [GSymbQualid ("LIST0", None); tkn; GSymbQualid ("SEP", None); tkn'] -> + SymbList0 (parse_token tkn, Some (parse_token tkn')) +| [GSymbQualid ("LIST1", None); tkn; GSymbQualid ("SEP", None); tkn'] -> + SymbList1 (parse_token tkn, Some (parse_token tkn')) +| [GSymbQualid ("OPT", None); tkn] -> + SymbOpt (parse_token tkn) +| [GSymbQualid (e, None)] when is_token e -> SymbToken (e, None) +| [GSymbQualid (e, None); GSymbString s] when is_token e -> SymbToken (e, Some s) +| [GSymbQualid (e, lvl)] when not (is_token e) -> SymbEntry (e, lvl) +| [GSymbParen tkns] -> parse_tokens tkns +| [GSymbProd prds] -> + let map p = + let map (pat, tkns) = (pat, parse_tokens tkns) in + (List.map map p.gprod_symbs, p.gprod_body) + in + SymbRules (List.map map prds) +| t -> + let rec db_token = function + | GSymbString s -> Printf.sprintf "\"%s\"" s + | GSymbQualid (s, _) -> Printf.sprintf "%s" s + | GSymbParen s -> Printf.sprintf "(%s)" (db_tokens s) + | GSymbProd _ -> Printf.sprintf "[...]" + and db_tokens tkns = + let s = List.map db_token tkns in + String.concat " " s + in + fatal (Printf.sprintf "Invalid token: %s" (db_tokens t)) + +and parse_token tkn = parse_tokens [tkn] + +let print_fun chan (vars, body) = + let vars = List.rev vars in + let iter = function + | None -> fprintf chan "_@ " + | Some id -> fprintf chan "%s@ " id + in + let () = fprintf chan "fun@ " in + let () = List.iter iter vars in + (** FIXME: use Coq locations in the macros *) + let () = fprintf chan "loc ->@ @[%s@]" body.code in + () + +(** Meta-program instead of calling Tok.of_pattern here because otherwise + violates value restriction *) +let print_tok chan = function +| "", s -> fprintf chan "Tok.KEYWORD %a" print_string s +| "IDENT", s -> fprintf chan "Tok.IDENT %a" print_string s +| "PATTERNIDENT", s -> fprintf chan "Tok.PATTERNIDENT %a" print_string s +| "FIELD", s -> fprintf chan "Tok.FIELD %a" print_string s +| "INT", s -> fprintf chan "Tok.INT %a" print_string s +| "STRING", s -> fprintf chan "Tok.STRING %a" print_string s +| "LEFTQMARK", _ -> fprintf chan "Tok.LEFTQMARK" +| "BULLET", s -> fprintf chan "Tok.BULLET %a" print_string s +| "EOI", _ -> fprintf chan "Tok.EOI" +| _ -> failwith "Tok.of_pattern: not a constructor" + +let rec print_prod chan p = + let (vars, tkns) = List.split p.gprod_symbs in + let f = (vars, p.gprod_body) in + let tkn = List.rev_map parse_tokens tkns in + fprintf chan "@[Extend.Rule@ (@[%a@],@ @[(%a)@])@]" print_symbols tkn print_fun f + +and print_symbols chan = function +| [] -> fprintf chan "Extend.Stop" +| tkn :: tkns -> + fprintf chan "Extend.Next @[(%a,@ %a)@]" print_symbols tkns print_symbol tkn + +and print_symbol chan tkn = match tkn with +| SymbToken (t, s) -> + let s = match s with None -> "" | Some s -> s in + fprintf chan "(Extend.Atoken (%a))" print_tok (t, s) +| SymbEntry (e, None) -> + fprintf chan "(Extend.Aentry %s)" e +| SymbEntry (e, Some l) -> + fprintf chan "(Extend.Aentryl (%s, %a))" e print_string l +| SymbSelf -> + fprintf chan "Extend.Aself" +| SymbNext -> + fprintf chan "Extend.Anext" +| SymbList0 (s, None) -> + fprintf chan "(Extend.Alist0 %a)" print_symbol s +| SymbList0 (s, Some sep) -> + fprintf chan "(Extend.Alist0sep (%a, %a))" print_symbol s print_symbol sep +| SymbList1 (s, None) -> + fprintf chan "(Extend.Alist1 %a)" print_symbol s +| SymbList1 (s, Some sep) -> + fprintf chan "(Extend.Alist1sep (%a, %a))" print_symbol s print_symbol sep +| SymbOpt s -> + fprintf chan "(Extend.Aopt %a)" print_symbol s +| SymbRules rules -> + let pr chan (r, body) = + let (vars, tkn) = List.split r in + let tkn = List.rev tkn in + fprintf chan "Extend.Rules @[({ Extend.norec_rule = %a },@ (%a))@]" print_symbols tkn print_fun (vars, body) + in + let pr chan rules = print_list chan pr rules in + fprintf chan "(Extend.Arules %a)" pr (List.rev rules) + +let print_rule chan r = + let pr_lvl chan lvl = print_opt chan print_string lvl in + let pr_asc chan asc = print_opt chan print_assoc asc in + let pr_prd chan prd = print_list chan print_prod prd in + fprintf chan "@[(%a,@ %a,@ %a)@]" pr_lvl r.grule_label pr_asc r.grule_assoc pr_prd (List.rev r.grule_prods) + +let print_entry chan gram e = + let print_position_opt chan pos = print_opt chan print_position pos in + let print_rules chan rules = print_list chan print_rule rules in + fprintf chan "let () =@ @[%s.safe_extend@ %s@ @[(%a, %a)@]@]@ in@ " + gram e.gentry_name print_position_opt e.gentry_pos print_rules e.gentry_rules + +let print_ast chan ext = + let () = fprintf chan "let _ = @[" in + let () = fprintf chan "@[%a@]" print_local ext in + let () = List.iter (fun e -> print_entry chan ext.gramext_name e) ext.gramext_entries in + let () = fprintf chan "()@]\n" in + () + +end + +let pr_ast chan = function +| Code s -> fprintf chan "%s@\n" s.code +| Comment s -> fprintf chan "%s@\n" s +| GramExt gram -> fprintf chan "%a@\n" GramExt.print_ast gram +| VernacExt -> fprintf chan "VERNACEXT@\n" +| TacticExt (id, rules) -> + let pr_tok = function + | ExtTerminal s -> Printf.sprintf "%s" s + | ExtNonTerminal (s, _) -> Printf.sprintf "%s" s + in + let pr_tacrule r = + let toks = String.concat " " (List.map pr_tok r.tac_toks) in + Printf.sprintf "[%s] -> {%s}" toks r.tac_body.code + in + let rules = String.concat ", " (List.map pr_tacrule rules) in + fprintf chan "TACTICEXT (%s, [%s])@\n" id rules + +let () = + let () = + if Array.length Sys.argv <> 2 then fatal "Expected exactly one command line argument" + in + let file = Sys.argv.(1) in + let output = Filename.chop_extension file ^ ".ml" in + let ast = parse_file file in + let chan = open_out output in + let fmt = formatter_of_out_channel chan in + let iter ast = Format.fprintf fmt "@[%a@]%!"pr_ast ast in + let () = List.iter iter ast in + let () = close_out chan in + exit 0 diff --git a/grammar/coqpp_parse.mly b/grammar/coqpp_parse.mly new file mode 100644 index 000000000..aa22d2717 --- /dev/null +++ b/grammar/coqpp_parse.mly @@ -0,0 +1,190 @@ +/************************************************************************/ +/* v * The Coq Proof Assistant / The Coq Development Team */ +/* CODE +%token COMMENT +%token IDENT QUALID +%token STRING +%token VERNAC TACTIC GRAMMAR EXTEND END +%token LBRACKET RBRACKET PIPE ARROW COMMA EQUAL +%token LPAREN RPAREN COLON SEMICOLON +%token GLOBAL FIRST LAST BEFORE AFTER LEVEL LEFTA RIGHTA NONA +%token EOF + +%type file +%start file + +%% + +file: +| nodes EOF + { $1 } +; + +nodes: +| + { [] } +| node nodes + { $1 :: $2 } +; + +node: +| CODE { Code $1 } +| COMMENT { Comment $1 } +| grammar_extend { $1 } +| vernac_extend { $1 } +| tactic_extend { $1 } +; + +grammar_extend: +| GRAMMAR EXTEND qualid_or_ident globals gram_entries END + { GramExt { gramext_name = $3; gramext_globals = $4; gramext_entries = $5 } } +; + +vernac_extend: +| VERNAC EXTEND IDENT END { VernacExt } +; + +tactic_extend: +| TACTIC EXTEND IDENT tactic_rules END { TacticExt ($3, $4) } +; + +tactic_rules: +| tactic_rule { [$1] } +| tactic_rule tactic_rules { $1 :: $2 } +; + +tactic_rule: +| PIPE LBRACKET ext_tokens RBRACKET ARROW CODE + { { tac_toks = $3; tac_body = $6 } } +; + +ext_tokens: +| { [] } +| ext_token ext_tokens { $1 :: $2 } +; + +ext_token: +| STRING { ExtTerminal $1 } +| IDENT { ExtNonTerminal ($1, TokNone) } +| IDENT LPAREN IDENT RPAREN { ExtNonTerminal ($1, TokName $3) } +| IDENT LPAREN IDENT COMMA STRING RPAREN { ExtNonTerminal ($1, TokNameSep ($3, $5)) } +; + +qualid_or_ident: +| QUALID { $1 } +| IDENT { $1 } +; + +globals: +| { [] } +| GLOBAL COLON idents SEMICOLON { $3 } +; + +idents: +| { [] } +| qualid_or_ident idents { $1 :: $2 } +; + +gram_entries: +| { [] } +| gram_entry gram_entries { $1 :: $2 } +; + +gram_entry: +| qualid_or_ident COLON position_opt LBRACKET levels RBRACKET SEMICOLON + { { gentry_name = $1; gentry_pos = $3; gentry_rules = $5; } } +; + +position_opt: +| { None } +| position { Some $1 } +; + +position: +| FIRST { First } +| LAST { Last } +| BEFORE STRING { Before $2 } +| AFTER STRING { After $2 } +| LEVEL STRING { Level $2 } +; + +string_opt: +| { None } +| STRING { Some $1 } +; + +assoc_opt: +| { None } +| assoc { Some $1 } +; + +assoc: +| LEFTA { LeftA } +| RIGHTA { RightA } +| NONA { NonA } +; + +levels: +| level { [$1] } +| level PIPE levels { $1 :: $3 } +; + +level: +| string_opt assoc_opt LBRACKET rules_opt RBRACKET + { { grule_label = $1; grule_assoc = $2; grule_prods = $4; } } +; + +rules_opt: +| { [] } +| rules { $1 } +; + +rules: +| rule { [$1] } +| rule PIPE rules { $1 :: $3 } +; + +rule: +| symbols_opt ARROW CODE + { { gprod_symbs = $1; gprod_body = $3; } } +; + +symbols_opt: +| { [] } +| symbols { $1 } +; + +symbols: +| symbol { [$1] } +| symbol SEMICOLON symbols { $1 :: $3 } +; + +symbol: +| IDENT EQUAL gram_tokens { (Some $1, $3) } +| gram_tokens { (None, $1) } +; + +gram_token: +| qualid_or_ident { GSymbQualid ($1, None) } +| qualid_or_ident LEVEL STRING { GSymbQualid ($1, Some $3) } +| LPAREN gram_tokens RPAREN { GSymbParen $2 } +| LBRACKET rules RBRACKET { GSymbProd $2 } +| STRING { GSymbString $1 } +; + +gram_tokens: +| gram_token { [$1] } +| gram_token gram_tokens { $1 :: $2 } +; diff --git a/grammar/q_util.mlp b/grammar/q_util.mlp index 6cdd2ec19..0b8d7fda7 100644 --- a/grammar/q_util.mlp +++ b/grammar/q_util.mlp @@ -75,7 +75,7 @@ let rec mlexpr_of_prod_entry_key f = function (** Keep in sync with Pcoq! *) assert (e = "tactic"); if l = 5 then <:expr< Extend.Aentry Pltac.binder_tactic >> - else <:expr< Extend.Aentryl (Pltac.tactic_expr) $mlexpr_of_int l$ >> + else <:expr< Extend.Aentryl (Pltac.tactic_expr) $mlexpr_of_string (string_of_int l)$ >> let rec type_of_user_symbol = function | Ulist1 s | Ulist1sep (s, _) | Ulist0 s | Ulist0sep (s, _) -> -- cgit v1.2.3