aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--parsing/lexer.ml423
-rw-r--r--toplevel/toplevel.ml24
2 files changed, 27 insertions, 20 deletions
diff --git a/parsing/lexer.ml4 b/parsing/lexer.ml4
index 8ec6b886e..4b15e200c 100644
--- a/parsing/lexer.ml4
+++ b/parsing/lexer.ml4
@@ -103,6 +103,14 @@ let error_utf8 cs =
Stream.junk cs; (* consume the char to avoid read it and fail again *)
err (bp, bp+1) Illegal_character
+let utf8_char_size cs = function
+ (* Utf8 leading byte *)
+ | '\x00'..'\x7F' -> 1
+ | '\xC0'..'\xDF' -> 2
+ | '\xE0'..'\xEF' -> 3
+ | '\xF0'..'\xF7' -> 4
+ | _ (* '\x80'..\xBF'|'\xF8'..'\xFF' *) -> error_utf8 cs
+
let njunk n = Util.repeat n Stream.junk
let check_utf8_trailing_byte cs c =
@@ -377,14 +385,8 @@ and progress_utf8 last nj n c tt cs =
with Not_found ->
last
-and progress_from_byte last nj tt cs = function
- (* Utf8 leading byte *)
- | '\x00'..'\x7F' as c -> progress_utf8 last nj 1 c tt cs
- | '\xC0'..'\xDF' as c -> progress_utf8 last nj 2 c tt cs
- | '\xE0'..'\xEF' as c -> progress_utf8 last nj 3 c tt cs
- | '\xF0'..'\xF7' as c -> progress_utf8 last nj 4 c tt cs
- | _ (* '\x80'..\xBF'|'\xF8'..'\xFF' *) ->
- error_utf8 cs
+and progress_from_byte last nj tt cs c =
+ progress_utf8 last nj (utf8_char_size cs c) c tt cs
let find_keyword id s =
let tt = ttree_find !token_tree id in
@@ -398,7 +400,10 @@ let process_chars bp c cs =
let ep = Stream.count cs in
match t with
| Some t -> (("", t), (bp, ep))
- | None -> err (bp, ep) Undefined_token
+ | None ->
+ let ep' = bp + utf8_char_size cs c in
+ njunk (ep' - ep) cs;
+ err (bp, ep') Undefined_token
let parse_after_dollar bp =
parser
diff --git a/toplevel/toplevel.ml b/toplevel/toplevel.ml
index baa63bc35..ee821a48d 100644
--- a/toplevel/toplevel.ml
+++ b/toplevel/toplevel.ml
@@ -111,18 +111,19 @@ let dotted_location (b,e) =
else
(String.make (e-b-1) '.', " ")
-let blanching_string s i n =
- let s = String.sub s i n in
- for i = 0 to String.length s - 1 do
+let blanch_utf8_string s bp ep =
+ let s' = String.make (ep-bp) ' ' in
+ let j = ref 0 in
+ for i = bp to ep - 1 do
let n = Char.code s.[i] in
(* Heuristic: assume utf-8 chars are printed using a single
fixed-size char and therefore contract all utf-8 code into one
- space and trailing null chars; in any case, preserve tabulation so
+ space; in any case, preserve tabulation so
that its effective interpretation in terms of spacing is preserved *)
- if 0xC0 > n && n >= 0x80 then s.[i] <- '\000'
- else if s.[i] <> '\t' then s.[i] <- ' '
- done; s
-
+ if s.[i] = '\t' then s'.[!j] <- '\t';
+ if n < 0x80 || 0xC0 <= n then incr j
+ done;
+ String.sub s' 0 !j
let print_highlight_location ib loc =
let (bp,ep) = unloc loc in
@@ -131,9 +132,10 @@ let print_highlight_location ib loc =
let highlight_lines =
match get_bols_of_loc ib (bp,ep) with
| ([],(bl,el)) ->
+ let shift = blanch_utf8_string ib.str bl bp in
+ let span = String.length (blanch_utf8_string ib.str bp ep) in
(str"> " ++ str(String.sub ib.str bl (el-bl-1)) ++ fnl () ++
- str"> " ++ str(blanching_string ib.str bl (bp-bl)) ++
- str(String.make (ep-bp) '^'))
+ str"> " ++ str(shift) ++ str(String.make span '^'))
| ((b1,e1)::ml,(bn,en)) ->
let (d1,s1) = dotted_location (b1,bp) in
let (dn,sn) = dotted_location (ep,en) in
@@ -325,7 +327,7 @@ let parse_to_dot =
let rec discard_to_dot () =
try
Gram.Entry.parse parse_to_dot top_buffer.tokens
- with Stdpp.Exc_located(_,Token.Error _) ->
+ with Stdpp.Exc_located(_,(Token.Error _|Lexer.Error _)) ->
discard_to_dot()