diff options
-rw-r--r-- | ide/coq_lex.mll | 59 | ||||
-rw-r--r-- | ide/coqide.ml | 33 | ||||
-rw-r--r-- | ide/ideutils.ml | 25 |
3 files changed, 70 insertions, 47 deletions
diff --git a/ide/coq_lex.mll b/ide/coq_lex.mll index 2f3763880..c1f75b6ff 100644 --- a/ide/coq_lex.mll +++ b/ide/coq_lex.mll @@ -24,27 +24,28 @@ rule coq_string = parse | _ { coq_string lexbuf } and comment = parse - | "(*" { ignore (comment lexbuf); comment lexbuf } + | "(*" { let _ = comment lexbuf in comment lexbuf } | "\"" { let () = coq_string lexbuf in comment lexbuf } - | "*)" { (true, Lexing.lexeme_start lexbuf + 2) } - | eof { (false, Lexing.lexeme_end lexbuf) } + | "*)" { Some (Lexing.lexeme_start lexbuf + 1) } + | eof { None } | _ { comment lexbuf } -and sentence initial = parse +(** NB : [mkiter] should be called on increasing offsets *) + +and sentence initial stamp = parse | "(*" { - let trully_terminated,comm_end = comment lexbuf in - if not trully_terminated then raise Unterminated; - (* A comment alone is a sentence. - A comment in a sentence doesn't terminate the sentence. - Note: comm_end is the first position _after_ the comment, - as required when tagging a zone, hence the -1 to locate the - ")" terminating the comment. - *) - if initial then true, comm_end - 1 else sentence false lexbuf + match comment lexbuf with + | None -> raise Unterminated + | Some comm_last -> + (* A comment alone is a sentence. + A comment in a sentence doesn't terminate the sentence. + Note: comm_end is the position of the comment final ')' *) + if initial then stamp comm_last Tags.Script.comment_sentence; + sentence initial stamp lexbuf } | "\"" { let () = coq_string lexbuf in - sentence false lexbuf + sentence false stamp lexbuf } | ".." { (* We must have a particular rule for parsing "..", where no dot @@ -52,34 +53,38 @@ and sentence initial = parse (cf. for instance the syntax for recursive notation). This rule and the following one also allow to treat the "..." special case, where the third dot is a terminator. *) - sentence false lexbuf + sentence false stamp lexbuf + } + | dot_sep { + (* The usual "." terminator *) + stamp (Lexing.lexeme_start lexbuf) Tags.Script.sentence; + sentence true stamp lexbuf } - | dot_sep { false, Lexing.lexeme_start lexbuf } (* The usual "." terminator *) | undotted_sep { (* Separators like { or } and bullets * - + are only active at the start of a sentence *) - if initial then false, Lexing.lexeme_start lexbuf - else sentence false lexbuf + if initial then stamp (Lexing.lexeme_start lexbuf) Tags.Script.sentence; + sentence initial stamp lexbuf } | space+ { (* Parsing spaces is the only situation preserving initiality *) - sentence initial lexbuf + sentence initial stamp lexbuf } | _ { (* Any other characters *) - sentence false lexbuf + sentence false stamp lexbuf } - | eof { raise Unterminated } + | eof { if initial then () else raise Unterminated } { - (** Parse a sentence in string [slice], tagging relevant parts with - function [stamp], and returning the position of the first - sentence delimitor (either "." or "{" or "}" or the end of a comment). - It will raise [Unterminated] when no end of sentence is found. + (** Parse sentences in string [slice], tagging last characters + of sentences with the [stamp] function. + It will raise [Unterminated] if [slice] ends with an unfinished + sentence. *) - let delimit_sentence slice = - sentence true (Lexing.from_string slice) + let delimit_sentences stamp slice = + sentence true stamp (Lexing.from_string slice) } diff --git a/ide/coqide.ml b/ide/coqide.ml index de04e0891..9dfbf2f2e 100644 --- a/ide/coqide.ml +++ b/ide/coqide.ml @@ -218,27 +218,20 @@ let get_current_word () = May raise [Coq_lex.Unterminated] when the zone ends with an unterminated sentence. *) -let split_slice_lax (buffer: GText.buffer) from upto = - buffer#remove_tag ~start:from ~stop:upto Tags.Script.comment_sentence; - buffer#remove_tag ~start:from ~stop:upto Tags.Script.sentence; - let slice = buffer#get_text ~start:from ~stop:upto () in - let rec split_substring str = - let off_conv = byte_offset_to_char_offset str in - let slice_len = String.length str in - let is_comment,end_off = Coq_lex.delimit_sentence str in - let start = from#forward_chars (off_conv end_off) in - let stop = start#forward_char in - let tag = - if is_comment then Tags.Script.comment_sentence else Tags.Script.sentence - in - buffer#apply_tag ~start ~stop tag; - let next = end_off + 1 in - if next < slice_len then begin - ignore (from#nocopy#forward_chars (off_conv next)); - split_substring (String.sub str next (slice_len - next)) - end +let split_slice_lax (buffer: GText.buffer) start stop = + buffer#remove_tag ~start ~stop Tags.Script.comment_sentence; + buffer#remove_tag ~start ~stop Tags.Script.sentence; + let slice = buffer#get_text ~start ~stop () in + let mkiter = + (* caveat : partial application with effects *) + let convert = byte_offset_to_char_offset slice in + fun off -> start#forward_chars (convert off) + in + let apply_tag off tag = + let start = mkiter off in + buffer#apply_tag ~start ~stop:start#forward_char tag in - split_substring slice + Coq_lex.delimit_sentences apply_tag slice (** Searching forward and backward a position fulfilling some condition *) diff --git a/ide/ideutils.ml b/ide/ideutils.ml index e5d5ebaa8..ae6db1a7a 100644 --- a/ide/ideutils.ml +++ b/ide/ideutils.ml @@ -32,8 +32,15 @@ let get_insert input_buffer = input_buffer#get_iter_at_mark `INSERT (** A utf8 char is either a single byte (ascii char, 0xxxxxxx) or multi-byte (with a leading byte 11xxxxxx and extra bytes 10xxxxxx) *) +let is_char_start c = ((Char.code c) lsr 6 <> 2) let is_extra_byte c = ((Char.code c) lsr 6 = 2) +(** For a string buffer that may contain utf8 chars, + we convert a byte offset into a char offset + by only counting char-starting bytes. + Normally the string buffer starts with a char-starting byte + (buffer produced by a [#get_text]) *) + let byte_offset_to_char_offset s byte_offset = let extra_bytes = ref 0 in for i = 0 to min byte_offset (String.length s - 1) do @@ -41,6 +48,24 @@ let byte_offset_to_char_offset s byte_offset = done; byte_offset - !extra_bytes +(** For multiple offset conversions in a long buffer, + we proceed incrementally by storing last known positions. + Offsets should be asked in increasing order + and correspond to char-starting byte. *) + +let incremental_byte_offset_to_char_offset s = + let bytes = ref 0 + and chars = ref 0 + and len = String.length s + in + fun byte_offset -> + assert (!bytes <= byte_offset); + for i = !bytes + 1 to byte_offset do + if i >= len || is_char_start s.[i] then incr chars + done; + bytes := byte_offset; + !chars + let print_id id = Minilib.log ("GOT sig id :"^(string_of_int (Obj.magic id))) |