summaryrefslogtreecommitdiff
path: root/ide/coq_lex.mll
blob: 03ce950fdd1e541d0e11288e9aed50988906d8b2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
(************************************************************************)
(*  v      *   The Coq Proof Assistant  /  The Coq Development Team     *)
(* <O___,, * CNRS-Ecole Polytechnique-INRIA Futurs-Universite Paris Sud *)
(*   \VV/  **************************************************************)
(*    //   *      This file is distributed under the terms of the       *)
(*         *       GNU Lesser General Public License Version 2.1        *)
(************************************************************************)

(* $Id$ *)

{
  open Lexing

  type token =
    | Comment
    | Keyword
    | Declaration
    | ProofDeclaration
    | Qed
    | String

  (* Without this table, the automaton would be too big and
     ocamllex would fail *)
  let is_one_word_command =
    let h = Hashtbl.create 97 in
    List.iter (fun s -> Hashtbl.add h s ())
      [  "Add" ; "Check"; "Eval"; "Extraction" ;
	 "Load" ; "Undo"; "Goal";
	 "Proof" ; "Print";"Save" ;
	 "End" ; "Section"; "Chapter"; "Transparent"; "Opaque"; "Comments"
      ];
    Hashtbl.mem h

  let is_constr_kw =
    let h = Hashtbl.create 97 in
      List.iter (fun s -> Hashtbl.add h s ())
	[ "forall"; "fun"; "match"; "fix"; "cofix"; "with"; "for";
	  "end"; "as"; "let"; "in"; "if"; "then"; "else"; "return";
	  "Prop"; "Set"; "Type" ];
      Hashtbl.mem h

  (* Without this table, the automaton would be too big and
     ocamllex would fail *)
  let is_one_word_declaration =
    let h = Hashtbl.create 97 in
      List.iter (fun s -> Hashtbl.add h s ())
	[ (* Definitions *)
	  "Definition" ; "Let" ; "Example" ; "SubClass" ;
          "Fixpoint" ; "CoFixpoint" ; "Scheme" ; "Function" ;
          (* Assumptions *)
	  "Hypothesis" ; "Variable" ; "Axiom" ; "Parameter" ; "Conjecture" ;
	  "Hypotheses" ; "Variables" ; "Axioms" ; "Parameters";
          (* Inductive *)
          "Inductive" ; "CoInductive" ; "Record" ; "Structure" ;
          (* Other *)
	  "Ltac" ; "Typeclasses"; "Instance"; "Include"; "Context"; "Class"
	];
      Hashtbl.mem h

  let is_proof_declaration =
    let h = Hashtbl.create 97 in
    List.iter (fun s -> Hashtbl.add h s ())
      [ "Theorem" ; "Lemma" ; " Fact" ; "Remark" ; "Corollary" ;
        "Proposition" ; "Property" ];
    Hashtbl.mem h

  let is_proof_end =
    let h = Hashtbl.create 97 in
    List.iter (fun s -> Hashtbl.add h s ())
      [ "Qed" ; "Defined" ; "Admitted" ];
    Hashtbl.mem h

  let start = ref true
}

let space =
  [' ' '\010' '\013' '\009' '\012']

let firstchar =
  ['$' 'A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255']
let identchar =
  ['$' 'A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '\'' '0'-'9']
let ident = firstchar identchar*

let sentence_sep = '.' [ ' ' '\n' '\t' ]

let multiword_declaration =
  "Module" (space+ "Type")?
| "Program" space+ ident
| "Existing" space+ "Instance"
| "Canonical" space+ "Structure"

let locality = ("Local" space+)?

let multiword_command =
  "Set" (space+ ident)*
| "Unset" (space+ ident)*
| "Open" space+ locality "Scope"
| "Close" space+ locality "Scope"
| "Bind" space+ "Scope"
| "Arguments" space+ "Scope"
| "Reserved" space+ "Notation" space+ locality
| "Delimit" space+ "Scope"
| "Next" space+ "Obligation"
| "Solve" space+ "Obligations"
| "Require" space+ ("Import"|"Export")?
| "Infix" space+ locality
| "Notation" space+ locality
| "Hint" space+ locality ident
| "Reset" (space+ "Initial")?
| "Tactic" space+ "Notation"
| "Implicit" space+ "Arguments"
| "Implicit" space+ ("Type"|"Types")
| "Combined" space+ "Scheme"
| "Extraction" space+ (("Language" space+ ("Ocaml"|"Haskell"|"Scheme"|"Toplevel"))|
    ("Library"|"Inline"|"NoInline"|"Blacklist"))
| "Recursive" space+ "Extraction" (space+ "Library")?
| ("Print"|"Reset") space+ "Extraction" space+ ("Inline"|"Blacklist")
| "Extract" space+ (("Inlined" space+) "Constant"| "Inductive")

(* At least still missing: "Inline" + decl, variants of "Identity
  Coercion", variants of Print, Add, ... *)

rule coq_string = parse
  | "\"\"" { coq_string lexbuf }
  | "\"" { Lexing.lexeme_end lexbuf }
  | eof { Lexing.lexeme_end lexbuf }
  | _ { coq_string lexbuf }

and comment = parse
  | "(*" { ignore (comment lexbuf); comment lexbuf }
  | "\"" { ignore (coq_string lexbuf); comment lexbuf }
  | "*)" { Lexing.lexeme_end lexbuf }
  | eof { Lexing.lexeme_end lexbuf }
  | _ { comment lexbuf }

and sentence stamp = parse
  | space+ { sentence stamp lexbuf }
  | "(*" {
      let comm_start = Lexing.lexeme_start lexbuf in
      let comm_end = comment lexbuf in
      stamp comm_start comm_end Comment;
      sentence stamp lexbuf
    }
  | "\"" {
      let str_start = Lexing.lexeme_start lexbuf in
      let str_end = coq_string lexbuf in
      stamp str_start str_end String;
      start := false;
      sentence stamp lexbuf
    }
  | multiword_declaration {
      if !start then begin
        start := false;
        stamp (Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf) Declaration
      end;
      sentence stamp lexbuf
    }
  | multiword_command {
      if !start then begin
        start := false;
        stamp (Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf) Keyword
      end;
      sentence stamp lexbuf }
  | ident as id {
      if !start then begin
        start := false;
        if id <> "Time" then begin
            if is_proof_end id then
              stamp (Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf) Qed
            else if is_one_word_command id then
              stamp (Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf) Keyword
            else if is_one_word_declaration id then
              stamp (Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf) Declaration
            else if is_proof_declaration id then
              stamp (Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf) ProofDeclaration
        end
      end else begin
        if is_constr_kw id then
	  stamp (Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf) Keyword
      end;
      sentence stamp lexbuf }
  | ".."
  | _    { sentence stamp lexbuf}
  | sentence_sep { }
  | eof { raise Not_found }

{
  let find_end_offset stamp slice =
    let lb = Lexing.from_string slice in
    start := true;
    sentence stamp lb;
    Lexing.lexeme_end lb
}