diff options
author | Enrico Tassi <gareuselesinge@debian.org> | 2015-01-25 14:42:51 +0100 |
---|---|---|
committer | Enrico Tassi <gareuselesinge@debian.org> | 2015-01-25 14:42:51 +0100 |
commit | 7cfc4e5146be5666419451bdd516f1f3f264d24a (patch) | |
tree | e4197645da03dc3c7cc84e434cc31d0a0cca7056 /ide/coq_lex.mll | |
parent | 420f78b2caeaaddc6fe484565b2d0e49c66888e5 (diff) |
Imported Upstream version 8.5~beta1+dfsg
Diffstat (limited to 'ide/coq_lex.mll')
-rw-r--r-- | ide/coq_lex.mll | 174 |
1 files changed, 38 insertions, 136 deletions
diff --git a/ide/coq_lex.mll b/ide/coq_lex.mll index 1de102d5..e333c0b2 100644 --- a/ide/coq_lex.mll +++ b/ide/coq_lex.mll @@ -1,161 +1,57 @@ (************************************************************************) (* v * The Coq Proof Assistant / The Coq Development Team *) -(* <O___,, * INRIA - CNRS - LIX - LRI - PPS - Copyright 1999-2014 *) +(* <O___,, * INRIA - CNRS - LIX - LRI - PPS - Copyright 1999-2015 *) (* \VV/ **************************************************************) (* // * This file is distributed under the terms of the *) (* * GNU Lesser General Public License Version 2.1 *) (************************************************************************) { - open Lexing - - type token = - | Comment - | Keyword - | Declaration - | ProofDeclaration - | Qed - | String - - (* Without this table, the automaton would be too big and - ocamllex would fail *) - - let tag_of_ident = - let one_word_commands = - [ "Add" ; "Check"; "Eval"; "Extraction" ; - "Load" ; "Undo"; "Goal"; - "Proof" ; "Print";"Save" ; "Restart"; - "End" ; "Section"; "Chapter"; "Transparent"; "Opaque"; "Comments" ] - in - let one_word_declarations = - [ (* Definitions *) - "Definition" ; "Let" ; "Example" ; "SubClass" ; - "Fixpoint" ; "CoFixpoint" ; "Scheme" ; "Function" ; - (* Assumptions *) - "Hypothesis" ; "Variable" ; "Axiom" ; "Parameter" ; "Conjecture" ; - "Hypotheses" ; "Variables" ; "Axioms" ; "Parameters"; - (* Inductive *) - "Inductive" ; "CoInductive" ; "Record" ; "Structure" ; - (* Other *) - "Ltac" ; "Instance"; "Include"; "Context"; "Class" ; - "Arguments" ] - in - let proof_declarations = - [ "Theorem" ; "Lemma" ; " Fact" ; "Remark" ; "Corollary" ; - "Proposition" ; "Property" ] - in - let proof_ends = - [ "Qed" ; "Defined" ; "Admitted"; "Abort" ] - in - let constr_keywords = - [ "forall"; "fun"; "match"; "fix"; "cofix"; "with"; "for"; - "end"; "as"; "let"; "in"; "if"; "then"; "else"; "return"; - "Prop"; "Set"; "Type" ] - in - let h = Hashtbl.create 97 in (* for vernac *) - let h' = Hashtbl.create 97 in (* for constr *) - List.iter (fun s -> Hashtbl.add h s Keyword) one_word_commands; - List.iter (fun s -> Hashtbl.add h s Declaration) one_word_declarations; - List.iter (fun s -> Hashtbl.add h s ProofDeclaration) proof_declarations; - List.iter (fun s -> Hashtbl.add h s Qed) proof_ends; - List.iter (fun s -> Hashtbl.add h' s Keyword) constr_keywords; - (fun initial id -> Hashtbl.find (if initial then h else h') id) - exception Unterminated - let here f lexbuf = f (Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf) + let utf8_adjust = ref 0 + let utf8_lexeme_start lexbuf = + Lexing.lexeme_start lexbuf - !utf8_adjust } -let space = - [' ' '\n' '\r' '\t' '\012'] (* '\012' is form-feed *) +let space = [' ' '\n' '\r' '\t' '\012'] (* '\012' is form-feed *) -let firstchar = - ['$' 'A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255'] -let identchar = - ['$' 'A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '\'' '0'-'9'] -let ident = firstchar identchar* - -let undotted_sep = [ '{' '}' '-' '+' '*' ] +let undotted_sep = '{' | '}' | '-'+ | '+'+ | '*'+ let dot_sep = '.' (space | eof) -let multiword_declaration = - "Module" (space+ "Type")? -| "Program" space+ ident -| "Existing" space+ "Instance" "s"? -| "Canonical" space+ "Structure" - -let locality = (space+ "Local")? - -let multiword_command = - ("Uns" | "S")" et" (space+ ident)* -| (("Open" | "Close") locality | "Bind" | " Delimit" ) - space+ "Scope" -| (("Reserved" space+)? "Notation" | "Infix") locality space+ -| "Next" space+ "Obligation" -| "Solve" space+ "Obligations" -| "Require" space+ ("Import"|"Export")? -| "Hint" locality space+ ident -| "Reset" (space+ "Initial")? -| "Tactic" space+ "Notation" -| "Implicit" space+ "Type" "s"? -| "Combined" space+ "Scheme" -| "Extraction" space+ (("Language" space+ ("Ocaml"|"Haskell"|"Scheme"|"Toplevel"))| - ("Library"|"Inline"|"NoInline"|"Blacklist")) -| "Recursive" space+ "Extraction" (space+ "Library")? -| ("Print"|"Reset") space+ "Extraction" space+ ("Inline"|"Blacklist") -| "Extract" space+ (("Inlined" space+) "Constant"| "Inductive") -| "Typeclasses" space+ ("eauto" | "Transparent" | "Opaque") -| ("Generalizable" space+) ("All" | "No")? "Variable" "s"? - -(* At least still missing: "Inline" + decl, variants of "Identity - Coercion", variants of Print, Add, ... *) +let utf8_extra_byte = [ '\x80' - '\xBF' ] rule coq_string = parse | "\"\"" { coq_string lexbuf } - | "\"" { Lexing.lexeme_end lexbuf } - | eof { Lexing.lexeme_end lexbuf } + | "\"" { () } + | eof { () } + | utf8_extra_byte { incr utf8_adjust; coq_string lexbuf } | _ { coq_string lexbuf } and comment = parse - | "(*" { ignore (comment lexbuf); comment lexbuf } - | "\"" { ignore (coq_string lexbuf); comment lexbuf } - | "*)" { (true, Lexing.lexeme_start lexbuf + 2) } - | eof { (false, Lexing.lexeme_end lexbuf) } + | "(*" { let _ = comment lexbuf in comment lexbuf } + | "\"" { let () = coq_string lexbuf in comment lexbuf } + | "*)" { Some (utf8_lexeme_start lexbuf) } + | eof { None } + | utf8_extra_byte { incr utf8_adjust; comment lexbuf } | _ { comment lexbuf } +(** NB : [mkiter] should be called on increasing offsets *) + and sentence initial stamp = parse | "(*" { - let comm_start = Lexing.lexeme_start lexbuf in - let trully_terminated,comm_end = comment lexbuf in - stamp comm_start comm_end Comment; - if not trully_terminated then raise Unterminated; - (* A comment alone is a sentence. - A comment in a sentence doesn't terminate the sentence. - Note: comm_end is the first position _after_ the comment, - as required when tagging a zone, hence the -1 to locate the - ")" terminating the comment. - *) - if initial then comm_end - 1 else sentence false stamp lexbuf + match comment lexbuf with + | None -> raise Unterminated + | Some comm_last -> + stamp comm_last Tags.Script.comment; + sentence initial stamp lexbuf } | "\"" { - let str_start = Lexing.lexeme_start lexbuf in - let str_end = coq_string lexbuf in - stamp str_start str_end String; - sentence false stamp lexbuf - } - | multiword_declaration { - if initial then here stamp lexbuf Declaration; + let () = coq_string lexbuf in sentence false stamp lexbuf } - | multiword_command { - if initial then here stamp lexbuf Keyword; - sentence false stamp lexbuf - } - | ident as id { - (try here stamp lexbuf (tag_of_ident initial id) with Not_found -> ()); - sentence false stamp lexbuf } | ".." { (* We must have a particular rule for parsing "..", where no dot is a terminator, even if we have a blank afterwards @@ -164,32 +60,38 @@ and sentence initial stamp = parse special case, where the third dot is a terminator. *) sentence false stamp lexbuf } - | dot_sep { Lexing.lexeme_start lexbuf } (* The usual "." terminator *) + | dot_sep { + (* The usual "." terminator *) + stamp (utf8_lexeme_start lexbuf) Tags.Script.sentence; + sentence true stamp lexbuf + } | undotted_sep { (* Separators like { or } and bullets * - + are only active at the start of a sentence *) - if initial then Lexing.lexeme_start lexbuf - else sentence false stamp lexbuf + if initial then stamp (utf8_lexeme_start lexbuf + String.length (Lexing.lexeme lexbuf) - 1) Tags.Script.sentence; + sentence initial stamp lexbuf } | space+ { (* Parsing spaces is the only situation preserving initiality *) sentence initial stamp lexbuf } + | utf8_extra_byte { incr utf8_adjust; sentence false stamp lexbuf } + | eof { if initial then () else raise Unterminated } | _ { (* Any other characters *) sentence false stamp lexbuf } - | eof { raise Unterminated } { - (** Parse a sentence in string [slice], tagging relevant parts with - function [stamp], and returning the position of the first - sentence delimitor (either "." or "{" or "}" or the end of a comment). - It will raise [Unterminated] when no end of sentence is found. + (** Parse sentences in string [slice], tagging last characters + of sentences with the [stamp] function. + It will raise [Unterminated] if [slice] ends with an unfinished + sentence. *) - let delimit_sentence stamp slice = + let delimit_sentences stamp slice = + utf8_adjust := 0; sentence true stamp (Lexing.from_string slice) } |