aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib
diff options
context:
space:
mode:
authorGravatar Hugo Herbelin <Hugo.Herbelin@inria.fr>2016-10-13 15:57:14 +0200
committerGravatar Hugo Herbelin <Hugo.Herbelin@inria.fr>2016-10-17 20:22:17 +0200
commit57c6ffd23836364168ffd1c66dbddbecf830c7c6 (patch)
tree051f302085ef8a33f90bd0533053d158e1b29b6d /lib
parent4204581ccb8bdf0f6c4298029c010c6deb643594 (diff)
Stopping warning on unrecognized unicode character in notation (fixing #5136).
The warning was pointless since the notation was accepted and parsed anyway. We now treat unrecognized unicode characters like ordinary undefined tokens (e.g. "#" in a bare Coq). For instance, "aₚ", or ".ₚ", or "?ₚ" now fail with "Undefined token" rather than "Unsupported Unicode character".
Diffstat (limited to 'lib')
-rw-r--r--lib/unicode.ml8
-rw-r--r--lib/unicode.mli12
2 files changed, 6 insertions, 14 deletions
diff --git a/lib/unicode.ml b/lib/unicode.ml
index dc852d981..ced5e258c 100644
--- a/lib/unicode.ml
+++ b/lib/unicode.ml
@@ -8,9 +8,7 @@
(** Unicode utilities *)
-type status = Letter | IdentPart | Symbol
-
-exception Unsupported
+type status = Letter | IdentPart | Symbol | Unknown
(* The following table stores classes of Unicode characters that
are used by the lexer. There are 3 different classes so 2 bits are
@@ -29,6 +27,7 @@ let mask i = function
| Letter -> 1 lsl ((i land 7) lsl 1) (* 01 *)
| IdentPart -> 2 lsl ((i land 7) lsl 1) (* 10 *)
| Symbol -> 3 lsl ((i land 7) lsl 1) (* 11 *)
+ | Unknown -> 0 lsl ((i land 7) lsl 1) (* 00 *)
(* Helper to reset 2 bits in a word. *)
let reset_mask i =
@@ -55,7 +54,7 @@ let lookup x =
if v = 1 then Letter
else if v = 2 then IdentPart
else if v = 3 then Symbol
- else raise Unsupported
+ else Unknown
(* [classify] discriminates between 3 different kinds of
symbols based on the standard unicode classification (extracted from
@@ -215,7 +214,6 @@ let ident_refutation s =
|x -> x
with
| End_of_input -> Some (true,"The empty string is not an identifier.")
- | Unsupported -> Some (true,s^": unsupported character in utf8 sequence.")
| Invalid_argument _ -> Some (true,s^": invalid utf8 sequence.")
let lowercase_unicode =
diff --git a/lib/unicode.mli b/lib/unicode.mli
index 1f8bd44ee..2609e1968 100644
--- a/lib/unicode.mli
+++ b/lib/unicode.mli
@@ -8,22 +8,16 @@
(** Unicode utilities *)
-type status = Letter | IdentPart | Symbol
+type status = Letter | IdentPart | Symbol | Unknown
-(** This exception is raised when UTF-8 the input string contains unsupported UTF-8 characters. *)
-exception Unsupported
-
-(** Classify a unicode char into 3 classes.
- @raise Unsupported if the input string contains unsupported UTF-8 characters. *)
+(** Classify a unicode char into 3 classes or unknown. *)
val classify : int -> status
(** Return [None] if a given string can be used as a (Coq) identifier.
- Return [Some (b,s)] otherwise, where [s] is an explanation and [b] is severity.
- @raise Unsupported if the input string contains unsupported UTF-8 characters. *)
+ Return [Some (b,s)] otherwise, where [s] is an explanation and [b] is severity. *)
val ident_refutation : string -> (bool * string) option
(** First char of a string, converted to lowercase
- @raise Unsupported if the input string contains unsupported UTF-8 characters.
@raise Assert_failure if the input string is empty. *)
val lowercase_first_char : string -> string