diff options
author | letouzey <letouzey@85f007b7-540e-0410-9357-904b9bb8a0f7> | 2010-06-21 16:50:10 +0000 |
---|---|---|
committer | letouzey <letouzey@85f007b7-540e-0410-9357-904b9bb8a0f7> | 2010-06-21 16:50:10 +0000 |
commit | 629048d0bc2a7210eed268ee6484deb2cc11141c (patch) | |
tree | acc30e162f22f5413eecfa389b05a90b55b37ac9 | |
parent | a45efe87f26cc5b22ee586b58344ca3854e80e84 (diff) |
Extraction: replace unicode characters in ident by ascii encodings (fix #2158,#2179)
Any unicode character above 128 is replaced by __Uxxxx_ where xxxx is
the hexa code for the unicode index of this character. For instance
<alpha> is turned into __U03b1_. I know, this is ugly. Better
solutions are welcome, but I'm afraid we can't do much better as long
as ocaml and haskell don't accept unicode letters in idents. At
least, this way we're pretty sure this translating won't create name
conflit, as long as extraction users avoid __ in their names,
something that they should already do btw (see for instance
extraction of coinductive types in ocaml). Yes, I should add a test
and a warning/error in case of use of __ someday.
NB: this commit belongs proudly to the quick'n'dirty category
git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/coq/trunk@13173 85f007b7-540e-0410-9357-904b9bb8a0f7
-rw-r--r-- | lib/util.ml | 20 | ||||
-rw-r--r-- | lib/util.mli | 1 | ||||
-rw-r--r-- | plugins/extraction/common.ml | 8 |
3 files changed, 28 insertions, 1 deletions
diff --git a/lib/util.ml b/lib/util.ml index ea5da9e3c..ad48e7981 100644 --- a/lib/util.ml +++ b/lib/util.ml @@ -363,6 +363,26 @@ let lowercase_first_char_utf8 s = let j, n = next_utf8 s 0 in utf8_of_unicode (lowercase_unicode n) +(** For extraction, we need to encode unicode character into ascii ones *) + +let ascii_of_ident s = + let check_ascii s = + let ok = ref true in + String.iter (fun c -> if Char.code c >= 128 then ok := false) s; + !ok + in + if check_ascii s then s else + let i = ref 0 and out = ref "" in + begin try while true do + let j, n = next_utf8 s !i in + out := + if n >= 128 + then Printf.sprintf "%s__U%04x_" !out n + else Printf.sprintf "%s%c" !out s.[!i]; + i := !i + j + done with End_of_input -> () end; + !out + (* Lists *) let rec list_compare cmp l1 l2 = diff --git a/lib/util.mli b/lib/util.mli index 8c811d439..0976d742c 100644 --- a/lib/util.mli +++ b/lib/util.mli @@ -107,6 +107,7 @@ val classify_unicode : int -> utf8_status val check_ident : string -> unit val check_ident_soft : string -> unit val lowercase_first_char_utf8 : string -> string +val ascii_of_ident : string -> string (** {6 Lists. } *) diff --git a/plugins/extraction/common.ml b/plugins/extraction/common.ml index 18b3642e2..bde54e1d8 100644 --- a/plugins/extraction/common.ml +++ b/plugins/extraction/common.ml @@ -20,6 +20,8 @@ open Mlutil open Modutil open Mod_subst +let string_of_id id = ascii_of_ident (Names.string_of_id id) + (*s Some pretty-print utility functions. *) let pp_par par st = if par then str "(" ++ st ++ str ")" else st @@ -72,7 +74,11 @@ let is_upper s = match s.[0] with 'A' .. 'Z' -> true | _ -> false let is_lower s = match s.[0] with 'a' .. 'z' | '_' -> true | _ -> false let lowercase_id id = id_of_string (String.uncapitalize (string_of_id id)) -let uppercase_id id = id_of_string (String.capitalize (string_of_id id)) +let uppercase_id id = + let s = string_of_id id in + assert (s<>""); + if s.[0] = '_' then id_of_string ("Coq_"^s) + else id_of_string (String.capitalize s) type kind = Term | Type | Cons | Mod |