aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib
diff options
context:
space:
mode:
authorGravatar regisgia <regisgia@85f007b7-540e-0410-9357-904b9bb8a0f7>2013-09-02 09:20:59 +0000
committerGravatar regisgia <regisgia@85f007b7-540e-0410-9357-904b9bb8a0f7>2013-09-02 09:20:59 +0000
commit16328ec755874a9b704dde4ec22a75123e2f9f96 (patch)
treea897f40877b2343315245eb403d4a2f708cfd53d /lib
parenta318e3c86bf8e9e849817a96069268bcf0a9d2e2 (diff)
* lib/Unicode:
Workaround. Some characters seems to be missing in Camomile's category tables. We add them manually. git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/coq/trunk@16755 85f007b7-540e-0410-9357-904b9bb8a0f7
Diffstat (limited to 'lib')
-rw-r--r--lib/unicode.ml94
1 files changed, 53 insertions, 41 deletions
diff --git a/lib/unicode.ml b/lib/unicode.ml
index 75b3b5899..febf805ae 100644
--- a/lib/unicode.ml
+++ b/lib/unicode.ml
@@ -65,44 +65,54 @@ let classify =
(* General tables. *)
mk_lookup_table_from_unicode_tables_for Symbol
[
- Unicodetable.sm; (* Symbol, maths. *)
- Unicodetable.sc; (* Symbol, currency. *)
- Unicodetable.so; (* Symbol, modifier. *)
- Unicodetable.pd; (* Punctation, dash. *)
- Unicodetable.pc; (* Punctation, connector. *)
- Unicodetable.pe; (* Punctation, open. *)
- Unicodetable.ps; (* Punctation, close. *)
- Unicodetable.pi; (* Punctation, initial quote. *)
- Unicodetable.pf; (* Punctation, final quote. *)
- Unicodetable.po; (* Punctation, other. *)
+ Unicodetable.sm; (* Symbol, maths. *)
+ Unicodetable.sc; (* Symbol, currency. *)
+ Unicodetable.so; (* Symbol, modifier. *)
+ Unicodetable.pd; (* Punctation, dash. *)
+ Unicodetable.pc; (* Punctation, connector. *)
+ Unicodetable.pe; (* Punctation, open. *)
+ Unicodetable.ps; (* Punctation, close. *)
+ Unicodetable.pi; (* Punctation, initial quote. *)
+ Unicodetable.pf; (* Punctation, final quote. *)
+ Unicodetable.po; (* Punctation, other. *)
];
mk_lookup_table_from_unicode_tables_for Letter
[
- Unicodetable.lu; (* Letter, uppercase. *)
- Unicodetable.ll; (* Letter, lowercase. *)
- Unicodetable.lt; (* Letter, titlecase. *)
- Unicodetable.lo; (* Letter, others. *)
+ Unicodetable.lu; (* Letter, uppercase. *)
+ Unicodetable.ll; (* Letter, lowercase. *)
+ Unicodetable.lt; (* Letter, titlecase. *)
+ Unicodetable.lo; (* Letter, others. *)
];
mk_lookup_table_from_unicode_tables_for IdentPart
[
- Unicodetable.nd; (* Number, decimal digits. *)
- Unicodetable.nl; (* Number, letter. *)
- Unicodetable.no; (* Number, other. *)
+ Unicodetable.nd; (* Number, decimal digits. *)
+ Unicodetable.nl; (* Number, letter. *)
+ Unicodetable.no; (* Number, other. *)
];
- (* Exceptions (from a previous version of this function). *)
+
+ (* Workaround. Some characters seems to be missing in
+ Camomile's category tables. We add them manually. *)
+ mk_lookup_table_from_unicode_tables_for Letter
+ [
+ [(0x01D00, 0x01D7F)]; (* Phonetic Extensions. *)
+ [(0x01D80, 0x01DBF)]; (* Phonetic Extensions Suppl. *)
+ [(0x01DC0, 0x01DFF)]; (* Combining Diacritical Marks Suppl.*)
+ ];
+
+ (* Exceptions (from a previous version of this function). *)
mk_lookup_table_from_unicode_tables_for Symbol
[
- single 0x000B2; (* Squared. *)
- single 0x0002E; (* Dot. *)
+ single 0x000B2; (* Squared. *)
+ single 0x0002E; (* Dot. *)
];
mk_lookup_table_from_unicode_tables_for Letter
[
- single 0x005F; (* Underscore. *)
- single 0x00A0; (* Non breaking space. *)
+ single 0x005F; (* Underscore. *)
+ single 0x00A0; (* Non breaking space. *)
];
mk_lookup_table_from_unicode_tables_for IdentPart
[
- single 0x0027; (* Special space. *)
+ single 0x0027; (* Special space. *)
];
(* Lookup *)
lookup
@@ -162,30 +172,32 @@ let initial_refutation j n s =
| Letter -> None
| _ ->
let c = String.sub s 0 j in
- Some (false,"Invalid character '"^c^"' at beginning of identifier \""^s^"\".")
+ Some (false,
+ "Invalid character '"^c^"' at beginning of identifier \""^s^"\".")
let trailing_refutation i j n s =
match classify n with
| Letter | IdentPart -> None
| _ ->
let c = String.sub s i j in
- Some (false,"Invalid character '"^c^"' in identifier \""^s^"\".")
+ Some (false,
+ "Invalid character '"^c^"' in identifier \""^s^"\".")
let ident_refutation s =
if s = ".." then None else try
let j, n = next_utf8 s 0 in
match initial_refutation j n s with
- |None ->
- begin try
- let rec aux i =
- let j, n = next_utf8 s i in
- match trailing_refutation i j n s with
- |None -> aux (i + j)
- |x -> x
- in aux j
- with End_of_input -> None
- end
- |x -> x
+ |None ->
+ begin try
+ let rec aux i =
+ let j, n = next_utf8 s i in
+ match trailing_refutation i j n s with
+ |None -> aux (i + j)
+ |x -> x
+ in aux j
+ with End_of_input -> None
+ end
+ |x -> x
with
| End_of_input -> Some (true,"The empty string is not an identifier.")
| Unsupported -> Some (true,s^": unsupported character in utf8 sequence.")
@@ -196,8 +208,8 @@ let lowercase_unicode =
fun unicode ->
try
match Segmenttree.lookup unicode tree with
- | `Abs c -> c
- | `Delta d -> unicode + d
+ | `Abs c -> c
+ | `Delta d -> unicode + d
with Not_found -> unicode
let lowercase_first_char s =
@@ -218,9 +230,9 @@ let ascii_of_ident s =
begin try while true do
let j, n = next_utf8 s !i in
out :=
- if n >= 128
- then Printf.sprintf "%s__U%04x_" !out n
- else Printf.sprintf "%s%c" !out s.[!i];
+ if n >= 128
+ then Printf.sprintf "%s__U%04x_" !out n
+ else Printf.sprintf "%s%c" !out s.[!i];
i := !i + j
done with End_of_input -> () end;
!out