aboutsummaryrefslogtreecommitdiffhomepage
path: root/ide/coq_lex.mll
diff options
context:
space:
mode:
authorGravatar letouzey <letouzey@85f007b7-540e-0410-9357-904b9bb8a0f7>2012-12-07 16:39:50 +0000
committerGravatar letouzey <letouzey@85f007b7-540e-0410-9357-904b9bb8a0f7>2012-12-07 16:39:50 +0000
commiteda60e3598ae499120913c37ccf8295e2a75d29d (patch)
tree5eed6d46be7b163a55d5599856079b603a6c7267 /ide/coq_lex.mll
parent16fd813e55280b9aa95cff788b0099b776a75c74 (diff)
Nicer code around Coq_lex
Instead of many Coq_lex.delimit_sentence followed by String.sub, we let Coq_lex find the different sentences at once. The offset converter (from byte offset to utf8 offset) is optimized to compute these offsets incrementally instead of re-visiting the whole string buffer again and again. git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/coq/trunk@16043 85f007b7-540e-0410-9357-904b9bb8a0f7
Diffstat (limited to 'ide/coq_lex.mll')
-rw-r--r--ide/coq_lex.mll59
1 files changed, 32 insertions, 27 deletions
diff --git a/ide/coq_lex.mll b/ide/coq_lex.mll
index 2f3763880..c1f75b6ff 100644
--- a/ide/coq_lex.mll
+++ b/ide/coq_lex.mll
@@ -24,27 +24,28 @@ rule coq_string = parse
| _ { coq_string lexbuf }
and comment = parse
- | "(*" { ignore (comment lexbuf); comment lexbuf }
+ | "(*" { let _ = comment lexbuf in comment lexbuf }
| "\"" { let () = coq_string lexbuf in comment lexbuf }
- | "*)" { (true, Lexing.lexeme_start lexbuf + 2) }
- | eof { (false, Lexing.lexeme_end lexbuf) }
+ | "*)" { Some (Lexing.lexeme_start lexbuf + 1) }
+ | eof { None }
| _ { comment lexbuf }
-and sentence initial = parse
+(** NB : [mkiter] should be called on increasing offsets *)
+
+and sentence initial stamp = parse
| "(*" {
- let trully_terminated,comm_end = comment lexbuf in
- if not trully_terminated then raise Unterminated;
- (* A comment alone is a sentence.
- A comment in a sentence doesn't terminate the sentence.
- Note: comm_end is the first position _after_ the comment,
- as required when tagging a zone, hence the -1 to locate the
- ")" terminating the comment.
- *)
- if initial then true, comm_end - 1 else sentence false lexbuf
+ match comment lexbuf with
+ | None -> raise Unterminated
+ | Some comm_last ->
+ (* A comment alone is a sentence.
+ A comment in a sentence doesn't terminate the sentence.
+ Note: comm_end is the position of the comment final ')' *)
+ if initial then stamp comm_last Tags.Script.comment_sentence;
+ sentence initial stamp lexbuf
}
| "\"" {
let () = coq_string lexbuf in
- sentence false lexbuf
+ sentence false stamp lexbuf
}
| ".." {
(* We must have a particular rule for parsing "..", where no dot
@@ -52,34 +53,38 @@ and sentence initial = parse
(cf. for instance the syntax for recursive notation).
This rule and the following one also allow to treat the "..."
special case, where the third dot is a terminator. *)
- sentence false lexbuf
+ sentence false stamp lexbuf
+ }
+ | dot_sep {
+ (* The usual "." terminator *)
+ stamp (Lexing.lexeme_start lexbuf) Tags.Script.sentence;
+ sentence true stamp lexbuf
}
- | dot_sep { false, Lexing.lexeme_start lexbuf } (* The usual "." terminator *)
| undotted_sep {
(* Separators like { or } and bullets * - + are only active
at the start of a sentence *)
- if initial then false, Lexing.lexeme_start lexbuf
- else sentence false lexbuf
+ if initial then stamp (Lexing.lexeme_start lexbuf) Tags.Script.sentence;
+ sentence initial stamp lexbuf
}
| space+ {
(* Parsing spaces is the only situation preserving initiality *)
- sentence initial lexbuf
+ sentence initial stamp lexbuf
}
| _ {
(* Any other characters *)
- sentence false lexbuf
+ sentence false stamp lexbuf
}
- | eof { raise Unterminated }
+ | eof { if initial then () else raise Unterminated }
{
- (** Parse a sentence in string [slice], tagging relevant parts with
- function [stamp], and returning the position of the first
- sentence delimitor (either "." or "{" or "}" or the end of a comment).
- It will raise [Unterminated] when no end of sentence is found.
+ (** Parse sentences in string [slice], tagging last characters
+ of sentences with the [stamp] function.
+ It will raise [Unterminated] if [slice] ends with an unfinished
+ sentence.
*)
- let delimit_sentence slice =
- sentence true (Lexing.from_string slice)
+ let delimit_sentences stamp slice =
+ sentence true stamp (Lexing.from_string slice)
}