diff options
Diffstat (limited to 'ide/coq_lex.mll')
-rw-r--r-- | ide/coq_lex.mll | 22 |
1 files changed, 16 insertions, 6 deletions
diff --git a/ide/coq_lex.mll b/ide/coq_lex.mll index c1f75b6ff..d1d7c6629 100644 --- a/ide/coq_lex.mll +++ b/ide/coq_lex.mll @@ -8,26 +8,34 @@ { exception Unterminated + + let utf8_adjust = ref 0 + + let utf8_lexeme_start lexbuf = + Lexing.lexeme_start lexbuf - !utf8_adjust } -let space = - [' ' '\n' '\r' '\t' '\012'] (* '\012' is form-feed *) +let space = [' ' '\n' '\r' '\t' '\012'] (* '\012' is form-feed *) let undotted_sep = [ '{' '}' '-' '+' '*' ] let dot_sep = '.' (space | eof) +let utf8_extra_byte = [ '\x80' - '\xBF' ] + rule coq_string = parse | "\"\"" { coq_string lexbuf } | "\"" { () } | eof { () } + | utf8_extra_byte { incr utf8_adjust; coq_string lexbuf } | _ { coq_string lexbuf } and comment = parse | "(*" { let _ = comment lexbuf in comment lexbuf } | "\"" { let () = coq_string lexbuf in comment lexbuf } - | "*)" { Some (Lexing.lexeme_start lexbuf + 1) } + | "*)" { Some (utf8_lexeme_start lexbuf + 1) } | eof { None } + | utf8_extra_byte { incr utf8_adjust; comment lexbuf } | _ { comment lexbuf } (** NB : [mkiter] should be called on increasing offsets *) @@ -57,24 +65,25 @@ and sentence initial stamp = parse } | dot_sep { (* The usual "." terminator *) - stamp (Lexing.lexeme_start lexbuf) Tags.Script.sentence; + stamp (utf8_lexeme_start lexbuf) Tags.Script.sentence; sentence true stamp lexbuf } | undotted_sep { (* Separators like { or } and bullets * - + are only active at the start of a sentence *) - if initial then stamp (Lexing.lexeme_start lexbuf) Tags.Script.sentence; + if initial then stamp (utf8_lexeme_start lexbuf) Tags.Script.sentence; sentence initial stamp lexbuf } | space+ { (* Parsing spaces is the only situation preserving initiality *) sentence initial stamp lexbuf } + | utf8_extra_byte { incr utf8_adjust; sentence false stamp lexbuf } + | eof { if initial then () else raise Unterminated } | _ { (* Any other characters *) sentence false stamp lexbuf } - | eof { if initial then () else raise Unterminated } { @@ -85,6 +94,7 @@ and sentence initial stamp = parse *) let delimit_sentences stamp slice = + utf8_adjust := 0; sentence true stamp (Lexing.from_string slice) } |