aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/util.ml
diff options
context:
space:
mode:
authorGravatar letouzey <letouzey@85f007b7-540e-0410-9357-904b9bb8a0f7>2010-06-21 16:50:10 +0000
committerGravatar letouzey <letouzey@85f007b7-540e-0410-9357-904b9bb8a0f7>2010-06-21 16:50:10 +0000
commit629048d0bc2a7210eed268ee6484deb2cc11141c (patch)
treeacc30e162f22f5413eecfa389b05a90b55b37ac9 /lib/util.ml
parenta45efe87f26cc5b22ee586b58344ca3854e80e84 (diff)
Extraction: replace unicode characters in ident by ascii encodings (fix #2158,#2179)
Any unicode character above 128 is replaced by __Uxxxx_ where xxxx is the hexa code for the unicode index of this character. For instance <alpha> is turned into __U03b1_. I know, this is ugly. Better solutions are welcome, but I'm afraid we can't do much better as long as ocaml and haskell don't accept unicode letters in idents. At least, this way we're pretty sure this translating won't create name conflit, as long as extraction users avoid __ in their names, something that they should already do btw (see for instance extraction of coinductive types in ocaml). Yes, I should add a test and a warning/error in case of use of __ someday. NB: this commit belongs proudly to the quick'n'dirty category git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/coq/trunk@13173 85f007b7-540e-0410-9357-904b9bb8a0f7
Diffstat (limited to 'lib/util.ml')
-rw-r--r--lib/util.ml20
1 files changed, 20 insertions, 0 deletions
diff --git a/lib/util.ml b/lib/util.ml
index ea5da9e3c..ad48e7981 100644
--- a/lib/util.ml
+++ b/lib/util.ml
@@ -363,6 +363,26 @@ let lowercase_first_char_utf8 s =
let j, n = next_utf8 s 0 in
utf8_of_unicode (lowercase_unicode n)
+(** For extraction, we need to encode unicode character into ascii ones *)
+
+let ascii_of_ident s =
+ let check_ascii s =
+ let ok = ref true in
+ String.iter (fun c -> if Char.code c >= 128 then ok := false) s;
+ !ok
+ in
+ if check_ascii s then s else
+ let i = ref 0 and out = ref "" in
+ begin try while true do
+ let j, n = next_utf8 s !i in
+ out :=
+ if n >= 128
+ then Printf.sprintf "%s__U%04x_" !out n
+ else Printf.sprintf "%s%c" !out s.[!i];
+ i := !i + j
+ done with End_of_input -> () end;
+ !out
+
(* Lists *)
let rec list_compare cmp l1 l2 =