summaryrefslogtreecommitdiff
path: root/ide/coq_lex.mll
diff options
context:
space:
mode:
authorGravatar Enrico Tassi <gareuselesinge@debian.org>2015-01-25 14:42:51 +0100
committerGravatar Enrico Tassi <gareuselesinge@debian.org>2015-01-25 14:42:51 +0100
commit7cfc4e5146be5666419451bdd516f1f3f264d24a (patch)
treee4197645da03dc3c7cc84e434cc31d0a0cca7056 /ide/coq_lex.mll
parent420f78b2caeaaddc6fe484565b2d0e49c66888e5 (diff)
Imported Upstream version 8.5~beta1+dfsg
Diffstat (limited to 'ide/coq_lex.mll')
-rw-r--r--ide/coq_lex.mll174
1 files changed, 38 insertions, 136 deletions
diff --git a/ide/coq_lex.mll b/ide/coq_lex.mll
index 1de102d5..e333c0b2 100644
--- a/ide/coq_lex.mll
+++ b/ide/coq_lex.mll
@@ -1,161 +1,57 @@
(************************************************************************)
(* v * The Coq Proof Assistant / The Coq Development Team *)
-(* <O___,, * INRIA - CNRS - LIX - LRI - PPS - Copyright 1999-2014 *)
+(* <O___,, * INRIA - CNRS - LIX - LRI - PPS - Copyright 1999-2015 *)
(* \VV/ **************************************************************)
(* // * This file is distributed under the terms of the *)
(* * GNU Lesser General Public License Version 2.1 *)
(************************************************************************)
{
- open Lexing
-
- type token =
- | Comment
- | Keyword
- | Declaration
- | ProofDeclaration
- | Qed
- | String
-
- (* Without this table, the automaton would be too big and
- ocamllex would fail *)
-
- let tag_of_ident =
- let one_word_commands =
- [ "Add" ; "Check"; "Eval"; "Extraction" ;
- "Load" ; "Undo"; "Goal";
- "Proof" ; "Print";"Save" ; "Restart";
- "End" ; "Section"; "Chapter"; "Transparent"; "Opaque"; "Comments" ]
- in
- let one_word_declarations =
- [ (* Definitions *)
- "Definition" ; "Let" ; "Example" ; "SubClass" ;
- "Fixpoint" ; "CoFixpoint" ; "Scheme" ; "Function" ;
- (* Assumptions *)
- "Hypothesis" ; "Variable" ; "Axiom" ; "Parameter" ; "Conjecture" ;
- "Hypotheses" ; "Variables" ; "Axioms" ; "Parameters";
- (* Inductive *)
- "Inductive" ; "CoInductive" ; "Record" ; "Structure" ;
- (* Other *)
- "Ltac" ; "Instance"; "Include"; "Context"; "Class" ;
- "Arguments" ]
- in
- let proof_declarations =
- [ "Theorem" ; "Lemma" ; " Fact" ; "Remark" ; "Corollary" ;
- "Proposition" ; "Property" ]
- in
- let proof_ends =
- [ "Qed" ; "Defined" ; "Admitted"; "Abort" ]
- in
- let constr_keywords =
- [ "forall"; "fun"; "match"; "fix"; "cofix"; "with"; "for";
- "end"; "as"; "let"; "in"; "if"; "then"; "else"; "return";
- "Prop"; "Set"; "Type" ]
- in
- let h = Hashtbl.create 97 in (* for vernac *)
- let h' = Hashtbl.create 97 in (* for constr *)
- List.iter (fun s -> Hashtbl.add h s Keyword) one_word_commands;
- List.iter (fun s -> Hashtbl.add h s Declaration) one_word_declarations;
- List.iter (fun s -> Hashtbl.add h s ProofDeclaration) proof_declarations;
- List.iter (fun s -> Hashtbl.add h s Qed) proof_ends;
- List.iter (fun s -> Hashtbl.add h' s Keyword) constr_keywords;
- (fun initial id -> Hashtbl.find (if initial then h else h') id)
-
exception Unterminated
- let here f lexbuf = f (Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf)
+ let utf8_adjust = ref 0
+ let utf8_lexeme_start lexbuf =
+ Lexing.lexeme_start lexbuf - !utf8_adjust
}
-let space =
- [' ' '\n' '\r' '\t' '\012'] (* '\012' is form-feed *)
+let space = [' ' '\n' '\r' '\t' '\012'] (* '\012' is form-feed *)
-let firstchar =
- ['$' 'A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255']
-let identchar =
- ['$' 'A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '\'' '0'-'9']
-let ident = firstchar identchar*
-
-let undotted_sep = [ '{' '}' '-' '+' '*' ]
+let undotted_sep = '{' | '}' | '-'+ | '+'+ | '*'+
let dot_sep = '.' (space | eof)
-let multiword_declaration =
- "Module" (space+ "Type")?
-| "Program" space+ ident
-| "Existing" space+ "Instance" "s"?
-| "Canonical" space+ "Structure"
-
-let locality = (space+ "Local")?
-
-let multiword_command =
- ("Uns" | "S")" et" (space+ ident)*
-| (("Open" | "Close") locality | "Bind" | " Delimit" )
- space+ "Scope"
-| (("Reserved" space+)? "Notation" | "Infix") locality space+
-| "Next" space+ "Obligation"
-| "Solve" space+ "Obligations"
-| "Require" space+ ("Import"|"Export")?
-| "Hint" locality space+ ident
-| "Reset" (space+ "Initial")?
-| "Tactic" space+ "Notation"
-| "Implicit" space+ "Type" "s"?
-| "Combined" space+ "Scheme"
-| "Extraction" space+ (("Language" space+ ("Ocaml"|"Haskell"|"Scheme"|"Toplevel"))|
- ("Library"|"Inline"|"NoInline"|"Blacklist"))
-| "Recursive" space+ "Extraction" (space+ "Library")?
-| ("Print"|"Reset") space+ "Extraction" space+ ("Inline"|"Blacklist")
-| "Extract" space+ (("Inlined" space+) "Constant"| "Inductive")
-| "Typeclasses" space+ ("eauto" | "Transparent" | "Opaque")
-| ("Generalizable" space+) ("All" | "No")? "Variable" "s"?
-
-(* At least still missing: "Inline" + decl, variants of "Identity
- Coercion", variants of Print, Add, ... *)
+let utf8_extra_byte = [ '\x80' - '\xBF' ]
rule coq_string = parse
| "\"\"" { coq_string lexbuf }
- | "\"" { Lexing.lexeme_end lexbuf }
- | eof { Lexing.lexeme_end lexbuf }
+ | "\"" { () }
+ | eof { () }
+ | utf8_extra_byte { incr utf8_adjust; coq_string lexbuf }
| _ { coq_string lexbuf }
and comment = parse
- | "(*" { ignore (comment lexbuf); comment lexbuf }
- | "\"" { ignore (coq_string lexbuf); comment lexbuf }
- | "*)" { (true, Lexing.lexeme_start lexbuf + 2) }
- | eof { (false, Lexing.lexeme_end lexbuf) }
+ | "(*" { let _ = comment lexbuf in comment lexbuf }
+ | "\"" { let () = coq_string lexbuf in comment lexbuf }
+ | "*)" { Some (utf8_lexeme_start lexbuf) }
+ | eof { None }
+ | utf8_extra_byte { incr utf8_adjust; comment lexbuf }
| _ { comment lexbuf }
+(** NB : [mkiter] should be called on increasing offsets *)
+
and sentence initial stamp = parse
| "(*" {
- let comm_start = Lexing.lexeme_start lexbuf in
- let trully_terminated,comm_end = comment lexbuf in
- stamp comm_start comm_end Comment;
- if not trully_terminated then raise Unterminated;
- (* A comment alone is a sentence.
- A comment in a sentence doesn't terminate the sentence.
- Note: comm_end is the first position _after_ the comment,
- as required when tagging a zone, hence the -1 to locate the
- ")" terminating the comment.
- *)
- if initial then comm_end - 1 else sentence false stamp lexbuf
+ match comment lexbuf with
+ | None -> raise Unterminated
+ | Some comm_last ->
+ stamp comm_last Tags.Script.comment;
+ sentence initial stamp lexbuf
}
| "\"" {
- let str_start = Lexing.lexeme_start lexbuf in
- let str_end = coq_string lexbuf in
- stamp str_start str_end String;
- sentence false stamp lexbuf
- }
- | multiword_declaration {
- if initial then here stamp lexbuf Declaration;
+ let () = coq_string lexbuf in
sentence false stamp lexbuf
}
- | multiword_command {
- if initial then here stamp lexbuf Keyword;
- sentence false stamp lexbuf
- }
- | ident as id {
- (try here stamp lexbuf (tag_of_ident initial id) with Not_found -> ());
- sentence false stamp lexbuf }
| ".." {
(* We must have a particular rule for parsing "..", where no dot
is a terminator, even if we have a blank afterwards
@@ -164,32 +60,38 @@ and sentence initial stamp = parse
special case, where the third dot is a terminator. *)
sentence false stamp lexbuf
}
- | dot_sep { Lexing.lexeme_start lexbuf } (* The usual "." terminator *)
+ | dot_sep {
+ (* The usual "." terminator *)
+ stamp (utf8_lexeme_start lexbuf) Tags.Script.sentence;
+ sentence true stamp lexbuf
+ }
| undotted_sep {
(* Separators like { or } and bullets * - + are only active
at the start of a sentence *)
- if initial then Lexing.lexeme_start lexbuf
- else sentence false stamp lexbuf
+ if initial then stamp (utf8_lexeme_start lexbuf + String.length (Lexing.lexeme lexbuf) - 1) Tags.Script.sentence;
+ sentence initial stamp lexbuf
}
| space+ {
(* Parsing spaces is the only situation preserving initiality *)
sentence initial stamp lexbuf
}
+ | utf8_extra_byte { incr utf8_adjust; sentence false stamp lexbuf }
+ | eof { if initial then () else raise Unterminated }
| _ {
(* Any other characters *)
sentence false stamp lexbuf
}
- | eof { raise Unterminated }
{
- (** Parse a sentence in string [slice], tagging relevant parts with
- function [stamp], and returning the position of the first
- sentence delimitor (either "." or "{" or "}" or the end of a comment).
- It will raise [Unterminated] when no end of sentence is found.
+ (** Parse sentences in string [slice], tagging last characters
+ of sentences with the [stamp] function.
+ It will raise [Unterminated] if [slice] ends with an unfinished
+ sentence.
*)
- let delimit_sentence stamp slice =
+ let delimit_sentences stamp slice =
+ utf8_adjust := 0;
sentence true stamp (Lexing.from_string slice)
}