aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--ide/coq_lex.mll59
-rw-r--r--ide/coqide.ml33
-rw-r--r--ide/ideutils.ml25
3 files changed, 70 insertions, 47 deletions
diff --git a/ide/coq_lex.mll b/ide/coq_lex.mll
index 2f3763880..c1f75b6ff 100644
--- a/ide/coq_lex.mll
+++ b/ide/coq_lex.mll
@@ -24,27 +24,28 @@ rule coq_string = parse
| _ { coq_string lexbuf }
and comment = parse
- | "(*" { ignore (comment lexbuf); comment lexbuf }
+ | "(*" { let _ = comment lexbuf in comment lexbuf }
| "\"" { let () = coq_string lexbuf in comment lexbuf }
- | "*)" { (true, Lexing.lexeme_start lexbuf + 2) }
- | eof { (false, Lexing.lexeme_end lexbuf) }
+ | "*)" { Some (Lexing.lexeme_start lexbuf + 1) }
+ | eof { None }
| _ { comment lexbuf }
-and sentence initial = parse
+(** NB : [mkiter] should be called on increasing offsets *)
+
+and sentence initial stamp = parse
| "(*" {
- let trully_terminated,comm_end = comment lexbuf in
- if not trully_terminated then raise Unterminated;
- (* A comment alone is a sentence.
- A comment in a sentence doesn't terminate the sentence.
- Note: comm_end is the first position _after_ the comment,
- as required when tagging a zone, hence the -1 to locate the
- ")" terminating the comment.
- *)
- if initial then true, comm_end - 1 else sentence false lexbuf
+ match comment lexbuf with
+ | None -> raise Unterminated
+ | Some comm_last ->
+ (* A comment alone is a sentence.
+ A comment in a sentence doesn't terminate the sentence.
+ Note: comm_end is the position of the comment final ')' *)
+ if initial then stamp comm_last Tags.Script.comment_sentence;
+ sentence initial stamp lexbuf
}
| "\"" {
let () = coq_string lexbuf in
- sentence false lexbuf
+ sentence false stamp lexbuf
}
| ".." {
(* We must have a particular rule for parsing "..", where no dot
@@ -52,34 +53,38 @@ and sentence initial = parse
(cf. for instance the syntax for recursive notation).
This rule and the following one also allow to treat the "..."
special case, where the third dot is a terminator. *)
- sentence false lexbuf
+ sentence false stamp lexbuf
+ }
+ | dot_sep {
+ (* The usual "." terminator *)
+ stamp (Lexing.lexeme_start lexbuf) Tags.Script.sentence;
+ sentence true stamp lexbuf
}
- | dot_sep { false, Lexing.lexeme_start lexbuf } (* The usual "." terminator *)
| undotted_sep {
(* Separators like { or } and bullets * - + are only active
at the start of a sentence *)
- if initial then false, Lexing.lexeme_start lexbuf
- else sentence false lexbuf
+ if initial then stamp (Lexing.lexeme_start lexbuf) Tags.Script.sentence;
+ sentence initial stamp lexbuf
}
| space+ {
(* Parsing spaces is the only situation preserving initiality *)
- sentence initial lexbuf
+ sentence initial stamp lexbuf
}
| _ {
(* Any other characters *)
- sentence false lexbuf
+ sentence false stamp lexbuf
}
- | eof { raise Unterminated }
+ | eof { if initial then () else raise Unterminated }
{
- (** Parse a sentence in string [slice], tagging relevant parts with
- function [stamp], and returning the position of the first
- sentence delimitor (either "." or "{" or "}" or the end of a comment).
- It will raise [Unterminated] when no end of sentence is found.
+ (** Parse sentences in string [slice], tagging last characters
+ of sentences with the [stamp] function.
+ It will raise [Unterminated] if [slice] ends with an unfinished
+ sentence.
*)
- let delimit_sentence slice =
- sentence true (Lexing.from_string slice)
+ let delimit_sentences stamp slice =
+ sentence true stamp (Lexing.from_string slice)
}
diff --git a/ide/coqide.ml b/ide/coqide.ml
index de04e0891..9dfbf2f2e 100644
--- a/ide/coqide.ml
+++ b/ide/coqide.ml
@@ -218,27 +218,20 @@ let get_current_word () =
May raise [Coq_lex.Unterminated] when the zone ends with
an unterminated sentence. *)
-let split_slice_lax (buffer: GText.buffer) from upto =
- buffer#remove_tag ~start:from ~stop:upto Tags.Script.comment_sentence;
- buffer#remove_tag ~start:from ~stop:upto Tags.Script.sentence;
- let slice = buffer#get_text ~start:from ~stop:upto () in
- let rec split_substring str =
- let off_conv = byte_offset_to_char_offset str in
- let slice_len = String.length str in
- let is_comment,end_off = Coq_lex.delimit_sentence str in
- let start = from#forward_chars (off_conv end_off) in
- let stop = start#forward_char in
- let tag =
- if is_comment then Tags.Script.comment_sentence else Tags.Script.sentence
- in
- buffer#apply_tag ~start ~stop tag;
- let next = end_off + 1 in
- if next < slice_len then begin
- ignore (from#nocopy#forward_chars (off_conv next));
- split_substring (String.sub str next (slice_len - next))
- end
+let split_slice_lax (buffer: GText.buffer) start stop =
+ buffer#remove_tag ~start ~stop Tags.Script.comment_sentence;
+ buffer#remove_tag ~start ~stop Tags.Script.sentence;
+ let slice = buffer#get_text ~start ~stop () in
+ let mkiter =
+ (* caveat : partial application with effects *)
+ let convert = byte_offset_to_char_offset slice in
+ fun off -> start#forward_chars (convert off)
+ in
+ let apply_tag off tag =
+ let start = mkiter off in
+ buffer#apply_tag ~start ~stop:start#forward_char tag
in
- split_substring slice
+ Coq_lex.delimit_sentences apply_tag slice
(** Searching forward and backward a position fulfilling some condition *)
diff --git a/ide/ideutils.ml b/ide/ideutils.ml
index e5d5ebaa8..ae6db1a7a 100644
--- a/ide/ideutils.ml
+++ b/ide/ideutils.ml
@@ -32,8 +32,15 @@ let get_insert input_buffer = input_buffer#get_iter_at_mark `INSERT
(** A utf8 char is either a single byte (ascii char, 0xxxxxxx)
or multi-byte (with a leading byte 11xxxxxx and extra bytes 10xxxxxx) *)
+let is_char_start c = ((Char.code c) lsr 6 <> 2)
let is_extra_byte c = ((Char.code c) lsr 6 = 2)
+(** For a string buffer that may contain utf8 chars,
+ we convert a byte offset into a char offset
+ by only counting char-starting bytes.
+ Normally the string buffer starts with a char-starting byte
+ (buffer produced by a [#get_text]) *)
+
let byte_offset_to_char_offset s byte_offset =
let extra_bytes = ref 0 in
for i = 0 to min byte_offset (String.length s - 1) do
@@ -41,6 +48,24 @@ let byte_offset_to_char_offset s byte_offset =
done;
byte_offset - !extra_bytes
+(** For multiple offset conversions in a long buffer,
+ we proceed incrementally by storing last known positions.
+ Offsets should be asked in increasing order
+ and correspond to char-starting byte. *)
+
+let incremental_byte_offset_to_char_offset s =
+ let bytes = ref 0
+ and chars = ref 0
+ and len = String.length s
+ in
+ fun byte_offset ->
+ assert (!bytes <= byte_offset);
+ for i = !bytes + 1 to byte_offset do
+ if i >= len || is_char_start s.[i] then incr chars
+ done;
+ bytes := byte_offset;
+ !chars
+
let print_id id =
Minilib.log ("GOT sig id :"^(string_of_int (Obj.magic id)))