aboutsummaryrefslogtreecommitdiffhomepage
path: root/parsing
diff options
context:
space:
mode:
authorGravatar herbelin <herbelin@85f007b7-540e-0410-9357-904b9bb8a0f7>2010-10-17 19:59:39 +0000
committerGravatar herbelin <herbelin@85f007b7-540e-0410-9357-904b9bb8a0f7>2010-10-17 19:59:39 +0000
commit2521bbc7e9805fd57d2852c1e9631250def11d57 (patch)
tree6cdc089a8a07a033bc5089edb5bbaa3adb193412 /parsing
parentd25dfc1a30ab227178f8cd4304f28a35f4d7e19d (diff)
About "unsupported" unicode characters in notations.
- 8.2 (bug-fix): reverted check for unicode early at notation definition time (an unsupported "cadratin" space, 0x2003, was used in CoRN!) [by the way, what to do with unicode spacing characters in general?] - trunk: improved error message, removed redundant code git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/coq/trunk@13561 85f007b7-540e-0410-9357-904b9bb8a0f7
Diffstat (limited to 'parsing')
-rw-r--r--parsing/lexer.ml438
1 files changed, 25 insertions, 13 deletions
diff --git a/parsing/lexer.ml4 b/parsing/lexer.ml4
index 3c392acb6..5e2b10a7a 100644
--- a/parsing/lexer.ml4
+++ b/parsing/lexer.ml4
@@ -79,6 +79,7 @@ module Error = struct
| Unterminated_string
| Undefined_token
| Bad_token of string
+ | UnsupportedUnicode of int
exception E of t
@@ -89,7 +90,9 @@ module Error = struct
| Unterminated_comment -> "Unterminated comment"
| Unterminated_string -> "Unterminated string"
| Undefined_token -> "Undefined token"
- | Bad_token tok -> Format.sprintf "Bad token %S" tok)
+ | Bad_token tok -> Format.sprintf "Bad token %S" tok
+ | UnsupportedUnicode x ->
+ Printf.sprintf "Unsupported Unicode character (0x%x)" x)
let print ppf x = Format.fprintf ppf "%s@." (to_string x)
@@ -107,9 +110,9 @@ type token_kind =
| AsciiChar
| EmptyStream
-let error_unsupported_unicode_character n cs =
+let error_unsupported_unicode_character n unicode cs =
let bp = Stream.count cs in
- err (bp,bp+n) (Bad_token "Unsupported Unicode character")
+ err (bp,bp+n) (UnsupportedUnicode unicode)
let error_utf8 cs =
let bp = Stream.count cs in
@@ -158,7 +161,8 @@ let lookup_utf8_tail c cs =
| _ -> error_utf8 cs
in
try classify_unicode unicode, n
- with UnsupportedUtf8 -> njunk n cs; error_unsupported_unicode_character n cs
+ with UnsupportedUtf8 ->
+ njunk n cs; error_unsupported_unicode_character n unicode cs
let lookup_utf8 cs =
match Stream.peek cs with
@@ -166,17 +170,26 @@ let lookup_utf8 cs =
| Some ('\x80'..'\xFF' as c) -> Utf8Token (lookup_utf8_tail c cs)
| None -> EmptyStream
-let check_special_token str =
+let unlocated f x =
+ try f x with Loc.Exc_located (_,exc) -> raise exc
+
+let check_keyword str =
let rec loop_symb = parser
| [< ' (' ' | '\n' | '\r' | '\t' | '"') >] -> bad_token str
| [< s >] ->
- match lookup_utf8 s with
+ match unlocated lookup_utf8 s with
| Utf8Token (_,n) -> njunk n s; loop_symb s
| AsciiChar -> Stream.junk s; loop_symb s
| EmptyStream -> ()
in
loop_symb (Stream.of_string str)
+let check_keyword_to_add s =
+ try check_keyword s
+ with Error.E (UnsupportedUnicode unicode) ->
+ Flags.if_verbose msg_warning
+ (strbrk (Printf.sprintf "Token '%s' contains unicode character 0x%x which will not be parsable." s unicode))
+
let check_ident str =
let rec loop_id intail = parser
| [< ' ('a'..'z' | 'A'..'Z' | '_'); s >] ->
@@ -184,7 +197,7 @@ let check_ident str =
| [< ' ('0'..'9' | ''') when intail; s >] ->
loop_id true s
| [< s >] ->
- match lookup_utf8 s with
+ match unlocated lookup_utf8 s with
| Utf8Token (UnicodeLetter, n) -> njunk n s; loop_id true s
| Utf8Token (UnicodeIdentPart, n) when intail -> njunk n s; loop_id true s
| EmptyStream -> ()
@@ -192,10 +205,6 @@ let check_ident str =
in
loop_id false (Stream.of_string str)
-let check_keyword str =
- try check_special_token str
- with Error.E _ -> check_ident str
-
(* Keyword and symbol dictionary *)
let token_tree = ref empty_ttree
@@ -204,8 +213,11 @@ let is_keyword s =
with Not_found -> false
let add_keyword str =
- check_keyword str;
- token_tree := ttree_add !token_tree str
+ if not (is_keyword str) then
+ begin
+ check_keyword_to_add str;
+ token_tree := ttree_add !token_tree str
+ end
let remove_keyword str =
token_tree := ttree_remove !token_tree str