diff options
author | letouzey <letouzey@85f007b7-540e-0410-9357-904b9bb8a0f7> | 2012-12-07 16:39:50 +0000 |
---|---|---|
committer | letouzey <letouzey@85f007b7-540e-0410-9357-904b9bb8a0f7> | 2012-12-07 16:39:50 +0000 |
commit | eda60e3598ae499120913c37ccf8295e2a75d29d (patch) | |
tree | 5eed6d46be7b163a55d5599856079b603a6c7267 /ide/coq_lex.mll | |
parent | 16fd813e55280b9aa95cff788b0099b776a75c74 (diff) |
Nicer code around Coq_lex
Instead of many Coq_lex.delimit_sentence followed by String.sub,
we let Coq_lex find the different sentences at once.
The offset converter (from byte offset to utf8 offset) is optimized
to compute these offsets incrementally instead of re-visiting the
whole string buffer again and again.
git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/coq/trunk@16043 85f007b7-540e-0410-9357-904b9bb8a0f7
Diffstat (limited to 'ide/coq_lex.mll')
-rw-r--r-- | ide/coq_lex.mll | 59 |
1 files changed, 32 insertions, 27 deletions
diff --git a/ide/coq_lex.mll b/ide/coq_lex.mll index 2f3763880..c1f75b6ff 100644 --- a/ide/coq_lex.mll +++ b/ide/coq_lex.mll @@ -24,27 +24,28 @@ rule coq_string = parse | _ { coq_string lexbuf } and comment = parse - | "(*" { ignore (comment lexbuf); comment lexbuf } + | "(*" { let _ = comment lexbuf in comment lexbuf } | "\"" { let () = coq_string lexbuf in comment lexbuf } - | "*)" { (true, Lexing.lexeme_start lexbuf + 2) } - | eof { (false, Lexing.lexeme_end lexbuf) } + | "*)" { Some (Lexing.lexeme_start lexbuf + 1) } + | eof { None } | _ { comment lexbuf } -and sentence initial = parse +(** NB : [mkiter] should be called on increasing offsets *) + +and sentence initial stamp = parse | "(*" { - let trully_terminated,comm_end = comment lexbuf in - if not trully_terminated then raise Unterminated; - (* A comment alone is a sentence. - A comment in a sentence doesn't terminate the sentence. - Note: comm_end is the first position _after_ the comment, - as required when tagging a zone, hence the -1 to locate the - ")" terminating the comment. - *) - if initial then true, comm_end - 1 else sentence false lexbuf + match comment lexbuf with + | None -> raise Unterminated + | Some comm_last -> + (* A comment alone is a sentence. + A comment in a sentence doesn't terminate the sentence. + Note: comm_end is the position of the comment final ')' *) + if initial then stamp comm_last Tags.Script.comment_sentence; + sentence initial stamp lexbuf } | "\"" { let () = coq_string lexbuf in - sentence false lexbuf + sentence false stamp lexbuf } | ".." { (* We must have a particular rule for parsing "..", where no dot @@ -52,34 +53,38 @@ and sentence initial = parse (cf. for instance the syntax for recursive notation). This rule and the following one also allow to treat the "..." special case, where the third dot is a terminator. *) - sentence false lexbuf + sentence false stamp lexbuf + } + | dot_sep { + (* The usual "." terminator *) + stamp (Lexing.lexeme_start lexbuf) Tags.Script.sentence; + sentence true stamp lexbuf } - | dot_sep { false, Lexing.lexeme_start lexbuf } (* The usual "." terminator *) | undotted_sep { (* Separators like { or } and bullets * - + are only active at the start of a sentence *) - if initial then false, Lexing.lexeme_start lexbuf - else sentence false lexbuf + if initial then stamp (Lexing.lexeme_start lexbuf) Tags.Script.sentence; + sentence initial stamp lexbuf } | space+ { (* Parsing spaces is the only situation preserving initiality *) - sentence initial lexbuf + sentence initial stamp lexbuf } | _ { (* Any other characters *) - sentence false lexbuf + sentence false stamp lexbuf } - | eof { raise Unterminated } + | eof { if initial then () else raise Unterminated } { - (** Parse a sentence in string [slice], tagging relevant parts with - function [stamp], and returning the position of the first - sentence delimitor (either "." or "{" or "}" or the end of a comment). - It will raise [Unterminated] when no end of sentence is found. + (** Parse sentences in string [slice], tagging last characters + of sentences with the [stamp] function. + It will raise [Unterminated] if [slice] ends with an unfinished + sentence. *) - let delimit_sentence slice = - sentence true (Lexing.from_string slice) + let delimit_sentences stamp slice = + sentence true stamp (Lexing.from_string slice) } |