aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar letouzey <letouzey@85f007b7-540e-0410-9357-904b9bb8a0f7>2010-06-21 16:50:10 +0000
committerGravatar letouzey <letouzey@85f007b7-540e-0410-9357-904b9bb8a0f7>2010-06-21 16:50:10 +0000
commit629048d0bc2a7210eed268ee6484deb2cc11141c (patch)
treeacc30e162f22f5413eecfa389b05a90b55b37ac9
parenta45efe87f26cc5b22ee586b58344ca3854e80e84 (diff)
Extraction: replace unicode characters in ident by ascii encodings (fix #2158,#2179)
Any unicode character above 128 is replaced by __Uxxxx_ where xxxx is the hexa code for the unicode index of this character. For instance <alpha> is turned into __U03b1_. I know, this is ugly. Better solutions are welcome, but I'm afraid we can't do much better as long as ocaml and haskell don't accept unicode letters in idents. At least, this way we're pretty sure this translating won't create name conflit, as long as extraction users avoid __ in their names, something that they should already do btw (see for instance extraction of coinductive types in ocaml). Yes, I should add a test and a warning/error in case of use of __ someday. NB: this commit belongs proudly to the quick'n'dirty category git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/coq/trunk@13173 85f007b7-540e-0410-9357-904b9bb8a0f7
-rw-r--r--lib/util.ml20
-rw-r--r--lib/util.mli1
-rw-r--r--plugins/extraction/common.ml8
3 files changed, 28 insertions, 1 deletions
diff --git a/lib/util.ml b/lib/util.ml
index ea5da9e3c..ad48e7981 100644
--- a/lib/util.ml
+++ b/lib/util.ml
@@ -363,6 +363,26 @@ let lowercase_first_char_utf8 s =
let j, n = next_utf8 s 0 in
utf8_of_unicode (lowercase_unicode n)
+(** For extraction, we need to encode unicode character into ascii ones *)
+
+let ascii_of_ident s =
+ let check_ascii s =
+ let ok = ref true in
+ String.iter (fun c -> if Char.code c >= 128 then ok := false) s;
+ !ok
+ in
+ if check_ascii s then s else
+ let i = ref 0 and out = ref "" in
+ begin try while true do
+ let j, n = next_utf8 s !i in
+ out :=
+ if n >= 128
+ then Printf.sprintf "%s__U%04x_" !out n
+ else Printf.sprintf "%s%c" !out s.[!i];
+ i := !i + j
+ done with End_of_input -> () end;
+ !out
+
(* Lists *)
let rec list_compare cmp l1 l2 =
diff --git a/lib/util.mli b/lib/util.mli
index 8c811d439..0976d742c 100644
--- a/lib/util.mli
+++ b/lib/util.mli
@@ -107,6 +107,7 @@ val classify_unicode : int -> utf8_status
val check_ident : string -> unit
val check_ident_soft : string -> unit
val lowercase_first_char_utf8 : string -> string
+val ascii_of_ident : string -> string
(** {6 Lists. } *)
diff --git a/plugins/extraction/common.ml b/plugins/extraction/common.ml
index 18b3642e2..bde54e1d8 100644
--- a/plugins/extraction/common.ml
+++ b/plugins/extraction/common.ml
@@ -20,6 +20,8 @@ open Mlutil
open Modutil
open Mod_subst
+let string_of_id id = ascii_of_ident (Names.string_of_id id)
+
(*s Some pretty-print utility functions. *)
let pp_par par st = if par then str "(" ++ st ++ str ")" else st
@@ -72,7 +74,11 @@ let is_upper s = match s.[0] with 'A' .. 'Z' -> true | _ -> false
let is_lower s = match s.[0] with 'a' .. 'z' | '_' -> true | _ -> false
let lowercase_id id = id_of_string (String.uncapitalize (string_of_id id))
-let uppercase_id id = id_of_string (String.capitalize (string_of_id id))
+let uppercase_id id =
+ let s = string_of_id id in
+ assert (s<>"");
+ if s.[0] = '_' then id_of_string ("Coq_"^s)
+ else id_of_string (String.capitalize s)
type kind = Term | Type | Cons | Mod