diff options
author | regisgia <regisgia@85f007b7-540e-0410-9357-904b9bb8a0f7> | 2013-09-02 09:20:59 +0000 |
---|---|---|
committer | regisgia <regisgia@85f007b7-540e-0410-9357-904b9bb8a0f7> | 2013-09-02 09:20:59 +0000 |
commit | 16328ec755874a9b704dde4ec22a75123e2f9f96 (patch) | |
tree | a897f40877b2343315245eb403d4a2f708cfd53d /lib | |
parent | a318e3c86bf8e9e849817a96069268bcf0a9d2e2 (diff) |
* lib/Unicode:
Workaround. Some characters seems to be missing in
Camomile's category tables. We add them manually.
git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/coq/trunk@16755 85f007b7-540e-0410-9357-904b9bb8a0f7
Diffstat (limited to 'lib')
-rw-r--r-- | lib/unicode.ml | 94 |
1 files changed, 53 insertions, 41 deletions
diff --git a/lib/unicode.ml b/lib/unicode.ml index 75b3b5899..febf805ae 100644 --- a/lib/unicode.ml +++ b/lib/unicode.ml @@ -65,44 +65,54 @@ let classify = (* General tables. *) mk_lookup_table_from_unicode_tables_for Symbol [ - Unicodetable.sm; (* Symbol, maths. *) - Unicodetable.sc; (* Symbol, currency. *) - Unicodetable.so; (* Symbol, modifier. *) - Unicodetable.pd; (* Punctation, dash. *) - Unicodetable.pc; (* Punctation, connector. *) - Unicodetable.pe; (* Punctation, open. *) - Unicodetable.ps; (* Punctation, close. *) - Unicodetable.pi; (* Punctation, initial quote. *) - Unicodetable.pf; (* Punctation, final quote. *) - Unicodetable.po; (* Punctation, other. *) + Unicodetable.sm; (* Symbol, maths. *) + Unicodetable.sc; (* Symbol, currency. *) + Unicodetable.so; (* Symbol, modifier. *) + Unicodetable.pd; (* Punctation, dash. *) + Unicodetable.pc; (* Punctation, connector. *) + Unicodetable.pe; (* Punctation, open. *) + Unicodetable.ps; (* Punctation, close. *) + Unicodetable.pi; (* Punctation, initial quote. *) + Unicodetable.pf; (* Punctation, final quote. *) + Unicodetable.po; (* Punctation, other. *) ]; mk_lookup_table_from_unicode_tables_for Letter [ - Unicodetable.lu; (* Letter, uppercase. *) - Unicodetable.ll; (* Letter, lowercase. *) - Unicodetable.lt; (* Letter, titlecase. *) - Unicodetable.lo; (* Letter, others. *) + Unicodetable.lu; (* Letter, uppercase. *) + Unicodetable.ll; (* Letter, lowercase. *) + Unicodetable.lt; (* Letter, titlecase. *) + Unicodetable.lo; (* Letter, others. *) ]; mk_lookup_table_from_unicode_tables_for IdentPart [ - Unicodetable.nd; (* Number, decimal digits. *) - Unicodetable.nl; (* Number, letter. *) - Unicodetable.no; (* Number, other. *) + Unicodetable.nd; (* Number, decimal digits. *) + Unicodetable.nl; (* Number, letter. *) + Unicodetable.no; (* Number, other. *) ]; - (* Exceptions (from a previous version of this function). *) + + (* Workaround. Some characters seems to be missing in + Camomile's category tables. We add them manually. *) + mk_lookup_table_from_unicode_tables_for Letter + [ + [(0x01D00, 0x01D7F)]; (* Phonetic Extensions. *) + [(0x01D80, 0x01DBF)]; (* Phonetic Extensions Suppl. *) + [(0x01DC0, 0x01DFF)]; (* Combining Diacritical Marks Suppl.*) + ]; + + (* Exceptions (from a previous version of this function). *) mk_lookup_table_from_unicode_tables_for Symbol [ - single 0x000B2; (* Squared. *) - single 0x0002E; (* Dot. *) + single 0x000B2; (* Squared. *) + single 0x0002E; (* Dot. *) ]; mk_lookup_table_from_unicode_tables_for Letter [ - single 0x005F; (* Underscore. *) - single 0x00A0; (* Non breaking space. *) + single 0x005F; (* Underscore. *) + single 0x00A0; (* Non breaking space. *) ]; mk_lookup_table_from_unicode_tables_for IdentPart [ - single 0x0027; (* Special space. *) + single 0x0027; (* Special space. *) ]; (* Lookup *) lookup @@ -162,30 +172,32 @@ let initial_refutation j n s = | Letter -> None | _ -> let c = String.sub s 0 j in - Some (false,"Invalid character '"^c^"' at beginning of identifier \""^s^"\".") + Some (false, + "Invalid character '"^c^"' at beginning of identifier \""^s^"\".") let trailing_refutation i j n s = match classify n with | Letter | IdentPart -> None | _ -> let c = String.sub s i j in - Some (false,"Invalid character '"^c^"' in identifier \""^s^"\".") + Some (false, + "Invalid character '"^c^"' in identifier \""^s^"\".") let ident_refutation s = if s = ".." then None else try let j, n = next_utf8 s 0 in match initial_refutation j n s with - |None -> - begin try - let rec aux i = - let j, n = next_utf8 s i in - match trailing_refutation i j n s with - |None -> aux (i + j) - |x -> x - in aux j - with End_of_input -> None - end - |x -> x + |None -> + begin try + let rec aux i = + let j, n = next_utf8 s i in + match trailing_refutation i j n s with + |None -> aux (i + j) + |x -> x + in aux j + with End_of_input -> None + end + |x -> x with | End_of_input -> Some (true,"The empty string is not an identifier.") | Unsupported -> Some (true,s^": unsupported character in utf8 sequence.") @@ -196,8 +208,8 @@ let lowercase_unicode = fun unicode -> try match Segmenttree.lookup unicode tree with - | `Abs c -> c - | `Delta d -> unicode + d + | `Abs c -> c + | `Delta d -> unicode + d with Not_found -> unicode let lowercase_first_char s = @@ -218,9 +230,9 @@ let ascii_of_ident s = begin try while true do let j, n = next_utf8 s !i in out := - if n >= 128 - then Printf.sprintf "%s__U%04x_" !out n - else Printf.sprintf "%s%c" !out s.[!i]; + if n >= 128 + then Printf.sprintf "%s__U%04x_" !out n + else Printf.sprintf "%s%c" !out s.[!i]; i := !i + j done with End_of_input -> () end; !out |