aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib
diff options
context:
space:
mode:
authorGravatar miquel <miquel@85f007b7-540e-0410-9357-904b9bb8a0f7>2000-10-19 15:46:44 +0000
committerGravatar miquel <miquel@85f007b7-540e-0410-9357-904b9bb8a0f7>2000-10-19 15:46:44 +0000
commitd3e2f28966bb6d3e5766b3da2ee333cdc3438b13 (patch)
tree5e8510d56de255594dbc10bc5dfaebe07ba89aba /lib
parent2ae228a7cac719e4fc949fe1f14115f68e93353f (diff)
Use UTF-8 as default encoding for computing length of strings in pretty
print boxes. Full compatibility with *pure* ascii, an quasi full compatibility with latin1 encoding. git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/coq/trunk@730 85f007b7-540e-0410-9357-904b9bb8a0f7
Diffstat (limited to 'lib')
-rw-r--r--lib/pp.ml37
1 files changed, 36 insertions, 1 deletions
diff --git a/lib/pp.ml b/lib/pp.ml
index 0588970bc..d29eaf6f1 100644
--- a/lib/pp.ml
+++ b/lib/pp.ml
@@ -44,9 +44,44 @@ type 'a ppdir_token =
type std_ppcmds = (int*string) ppcmd_token Stream.t
type 'a ppdirs = 'a ppdir_token Stream.t
+(* Compute length of an UTF-8 encoded string
+ Rem 1 : utf8_length <= String.length (equal if pure ascii)
+ Rem 2 : if used for an iso8859_1 encoded string, the result is
+ wrong in very rare cases. Such a wrong case corresponds to any
+ sequence of a character in range 192..253 immediately followed by a
+ character in range 128..191 (typical case in french is "déçu" which
+ is counted 3 instead of 4); then no real harm to use always
+ utf8_length even if using an iso8859_1 encoding *)
+
+let utf8_length s =
+ let len = String.length s
+ and cnt = ref 0
+ and nc = ref 0
+ and p = ref 0 in
+ while !p < len do
+ begin
+ match s.[!p] with
+ | '\000'..'\127' -> nc := 0 (* ascii char *)
+ | '\128'..'\191' -> nc := 0 (* cannot start with a continuation byte *)
+ | '\192'..'\223' -> nc := 1 (* expect 1 continuation byte *)
+ | '\224'..'\239' -> nc := 2 (* expect 2 continuation bytes *)
+ | '\240'..'\247' -> nc := 3 (* expect 3 continuation bytes *)
+ | '\248'..'\251' -> nc := 4 (* expect 4 continuation bytes *)
+ | '\252'..'\253' -> nc := 5 (* expect 5 continuation bytes *)
+ | '\254'..'\255' -> nc := 0 (* invalid byte *)
+ end ;
+ incr p ;
+ while !p < len && !nc > 0 do
+ match s.[!p] with
+ | '\128'..'\191' (* next continuation byte *) -> incr p ; decr nc
+ | _ (* not a continuation byte *) -> nc := 0
+ done ;
+ incr cnt
+ done ;
+ !cnt
(* formatting commands *)
-let sTR s = Ppcmd_print (String.length s,s)
+let sTR s = Ppcmd_print (utf8_length s,s)
let sTRas (i,s) = Ppcmd_print (i,s)
let bRK (a,b) = Ppcmd_print_break (a,b)
let tBRK (a,b) = Ppcmd_print_tbreak (a,b)