diff options
author | Hugo Herbelin <Hugo.Herbelin@inria.fr> | 2016-10-13 18:05:12 +0200 |
---|---|---|
committer | Hugo Herbelin <Hugo.Herbelin@inria.fr> | 2016-10-17 20:22:17 +0200 |
commit | 561349466556f02b8d2e1cb8f2b846c188243bf9 (patch) | |
tree | adad932b0aafc320f8acde9f5664c589cbb109d7 /parsing/cLexer.ml4 | |
parent | 81ee9f1cb152a82cc4c116dd47294f2ae6eee0ed (diff) |
Extra warning about unicode character of unknown status following an ident.
This covers the case e.g. of "xₚ" (until the table of unicode
characters is upgraded!).
Diffstat (limited to 'parsing/cLexer.ml4')
-rw-r--r-- | parsing/cLexer.ml4 | 10 |
1 files changed, 10 insertions, 0 deletions
diff --git a/parsing/cLexer.ml4 b/parsing/cLexer.ml4 index 740578aad..31025075c 100644 --- a/parsing/cLexer.ml4 +++ b/parsing/cLexer.ml4 @@ -244,6 +244,12 @@ let get_buff len = String.sub !buff 0 len (* The classical lexer: idents, numbers, quoted strings, comments *) +let warn_unrecognized_unicode = + CWarnings.create ~name:"unrecognized-unicode" ~category:"parsing" + (fun (u,id) -> + strbrk (Printf.sprintf "Not considering unicode character \"%s\" of unknown \ + lexical status as part of identifier \"%s\"." u id)) + let rec ident_tail loc len = parser | [< ' ('a'..'z' | 'A'..'Z' | '0'..'9' | ''' | '_' as c); s >] -> ident_tail loc (store len c) s @@ -251,6 +257,10 @@ let rec ident_tail loc len = parser match lookup_utf8 loc s with | Utf8Token ((Unicode.IdentPart | Unicode.Letter), n) -> ident_tail loc (nstore n len s) s + | Utf8Token (Unicode.Unknown, n) -> + let id = get_buff len in + let u = String.concat "" (List.map (String.make 1) (Stream.npeek n s)) in + warn_unrecognized_unicode ~loc:!@loc (u,id); len | _ -> len let rec number len = parser |