diff options
author | Hugo Herbelin <Hugo.Herbelin@inria.fr> | 2017-10-04 22:00:19 +0200 |
---|---|---|
committer | Hugo Herbelin <Hugo.Herbelin@inria.fr> | 2017-10-05 08:36:50 +0200 |
commit | 526791d917f9b0804376eae02a462a3b32dd7cba (patch) | |
tree | d9ad17bf4c1e287627acc3183bdbabf88b49438c /parsing | |
parent | 9a2bb3a6d12a082c61dfda62be53c195fe3cb57c (diff) |
Distinguishing pseudo-letters out of the set of unicode letters.
This includes _ and insecable space which can be used in idents and
this allows more precise heuristics.
Diffstat (limited to 'parsing')
-rw-r--r-- | parsing/cLexer.ml4 | 16 |
1 files changed, 8 insertions, 8 deletions
diff --git a/parsing/cLexer.ml4 b/parsing/cLexer.ml4 index 9c9189ffe..f26398fa9 100644 --- a/parsing/cLexer.ml4 +++ b/parsing/cLexer.ml4 @@ -245,8 +245,8 @@ let check_ident str = loop_id true s | [< s >] -> match unlocated lookup_utf8 Ploc.dummy s with - | Utf8Token (Unicode.Letter, n) -> njunk n s; loop_id true s - | Utf8Token (Unicode.IdentPart, n) when intail -> + | Utf8Token (st, n) when not intail && Unicode.is_valid_ident_initial st -> njunk n s; loop_id true s + | Utf8Token (st, n) when intail && Unicode.is_valid_ident_trailing st -> njunk n s; loop_id true s | EmptyStream -> () @@ -311,9 +311,9 @@ let rec ident_tail loc len = parser ident_tail loc (store len c) s | [< s >] -> match lookup_utf8 loc s with - | Utf8Token ((Unicode.IdentPart | Unicode.Letter), n) -> + | Utf8Token (st, n) when Unicode.is_valid_ident_trailing st -> ident_tail loc (nstore n len s) s - | Utf8Token (Unicode.Unknown, n) -> + | Utf8Token (st, n) when Unicode.is_unknown st -> let id = get_buff len in let u = String.concat "" (List.map (String.make 1) (Stream.npeek n s)) in warn_unrecognized_unicode ~loc:!@loc (u,id); len @@ -539,7 +539,7 @@ let parse_after_dot loc c bp = (try find_keyword loc ("."^field) s with Not_found -> FIELD field) | [< s >] -> match lookup_utf8 loc s with - | Utf8Token (Unicode.Letter, n) -> + | Utf8Token (st, n) when Unicode.is_valid_ident_initial st -> let len = ident_tail loc (nstore n 0 s) s in let field = get_buff len in (try find_keyword loc ("."^field) s with Not_found -> FIELD field) @@ -553,7 +553,7 @@ let parse_after_qmark loc bp s = | None -> KEYWORD "?" | _ -> match lookup_utf8 loc s with - | Utf8Token (Unicode.Letter, _) -> LEFTQMARK + | Utf8Token (st, _) when Unicode.is_valid_ident_initial st -> LEFTQMARK | AsciiChar | Utf8Token _ | EmptyStream -> fst (process_chars loc bp '?' s) @@ -618,13 +618,13 @@ let rec next_token loc = parser bp comment_stop bp; between_commands := new_between_commands; t | [< s >] -> match lookup_utf8 loc s with - | Utf8Token (Unicode.Letter, n) -> + | Utf8Token (st, n) when Unicode.is_valid_ident_initial st -> let len = ident_tail loc (nstore n 0 s) s in let id = get_buff len in let ep = Stream.count s in comment_stop bp; (try find_keyword loc id s with Not_found -> IDENT id), set_loc_pos loc bp ep - | AsciiChar | Utf8Token ((Unicode.Symbol | Unicode.IdentPart | Unicode.Unknown), _) -> + | AsciiChar | Utf8Token _ -> let t = process_chars loc bp (Stream.next s) s in comment_stop bp; t | EmptyStream -> |