aboutsummaryrefslogtreecommitdiffhomepage
path: root/parsing/cLexer.ml4
diff options
context:
space:
mode:
authorGravatar Hugo Herbelin <Hugo.Herbelin@inria.fr>2016-10-13 18:05:12 +0200
committerGravatar Hugo Herbelin <Hugo.Herbelin@inria.fr>2016-10-17 20:22:17 +0200
commit561349466556f02b8d2e1cb8f2b846c188243bf9 (patch)
treeadad932b0aafc320f8acde9f5664c589cbb109d7 /parsing/cLexer.ml4
parent81ee9f1cb152a82cc4c116dd47294f2ae6eee0ed (diff)
Extra warning about unicode character of unknown status following an ident.
This covers the case e.g. of "xₚ" (until the table of unicode characters is upgraded!).
Diffstat (limited to 'parsing/cLexer.ml4')
-rw-r--r--parsing/cLexer.ml410
1 files changed, 10 insertions, 0 deletions
diff --git a/parsing/cLexer.ml4 b/parsing/cLexer.ml4
index 740578aad..31025075c 100644
--- a/parsing/cLexer.ml4
+++ b/parsing/cLexer.ml4
@@ -244,6 +244,12 @@ let get_buff len = String.sub !buff 0 len
(* The classical lexer: idents, numbers, quoted strings, comments *)
+let warn_unrecognized_unicode =
+ CWarnings.create ~name:"unrecognized-unicode" ~category:"parsing"
+ (fun (u,id) ->
+ strbrk (Printf.sprintf "Not considering unicode character \"%s\" of unknown \
+ lexical status as part of identifier \"%s\"." u id))
+
let rec ident_tail loc len = parser
| [< ' ('a'..'z' | 'A'..'Z' | '0'..'9' | ''' | '_' as c); s >] ->
ident_tail loc (store len c) s
@@ -251,6 +257,10 @@ let rec ident_tail loc len = parser
match lookup_utf8 loc s with
| Utf8Token ((Unicode.IdentPart | Unicode.Letter), n) ->
ident_tail loc (nstore n len s) s
+ | Utf8Token (Unicode.Unknown, n) ->
+ let id = get_buff len in
+ let u = String.concat "" (List.map (String.make 1) (Stream.npeek n s)) in
+ warn_unrecognized_unicode ~loc:!@loc (u,id); len
| _ -> len
let rec number len = parser