aboutsummaryrefslogtreecommitdiffhomepage
path: root/parsing
diff options
context:
space:
mode:
authorGravatar Hugo Herbelin <Hugo.Herbelin@inria.fr>2017-10-04 22:00:19 +0200
committerGravatar Hugo Herbelin <Hugo.Herbelin@inria.fr>2017-10-05 08:36:50 +0200
commit526791d917f9b0804376eae02a462a3b32dd7cba (patch)
treed9ad17bf4c1e287627acc3183bdbabf88b49438c /parsing
parent9a2bb3a6d12a082c61dfda62be53c195fe3cb57c (diff)
Distinguishing pseudo-letters out of the set of unicode letters.
This includes _ and insecable space which can be used in idents and this allows more precise heuristics.
Diffstat (limited to 'parsing')
-rw-r--r--parsing/cLexer.ml416
1 files changed, 8 insertions, 8 deletions
diff --git a/parsing/cLexer.ml4 b/parsing/cLexer.ml4
index 9c9189ffe..f26398fa9 100644
--- a/parsing/cLexer.ml4
+++ b/parsing/cLexer.ml4
@@ -245,8 +245,8 @@ let check_ident str =
loop_id true s
| [< s >] ->
match unlocated lookup_utf8 Ploc.dummy s with
- | Utf8Token (Unicode.Letter, n) -> njunk n s; loop_id true s
- | Utf8Token (Unicode.IdentPart, n) when intail ->
+ | Utf8Token (st, n) when not intail && Unicode.is_valid_ident_initial st -> njunk n s; loop_id true s
+ | Utf8Token (st, n) when intail && Unicode.is_valid_ident_trailing st ->
njunk n s;
loop_id true s
| EmptyStream -> ()
@@ -311,9 +311,9 @@ let rec ident_tail loc len = parser
ident_tail loc (store len c) s
| [< s >] ->
match lookup_utf8 loc s with
- | Utf8Token ((Unicode.IdentPart | Unicode.Letter), n) ->
+ | Utf8Token (st, n) when Unicode.is_valid_ident_trailing st ->
ident_tail loc (nstore n len s) s
- | Utf8Token (Unicode.Unknown, n) ->
+ | Utf8Token (st, n) when Unicode.is_unknown st ->
let id = get_buff len in
let u = String.concat "" (List.map (String.make 1) (Stream.npeek n s)) in
warn_unrecognized_unicode ~loc:!@loc (u,id); len
@@ -539,7 +539,7 @@ let parse_after_dot loc c bp =
(try find_keyword loc ("."^field) s with Not_found -> FIELD field)
| [< s >] ->
match lookup_utf8 loc s with
- | Utf8Token (Unicode.Letter, n) ->
+ | Utf8Token (st, n) when Unicode.is_valid_ident_initial st ->
let len = ident_tail loc (nstore n 0 s) s in
let field = get_buff len in
(try find_keyword loc ("."^field) s with Not_found -> FIELD field)
@@ -553,7 +553,7 @@ let parse_after_qmark loc bp s =
| None -> KEYWORD "?"
| _ ->
match lookup_utf8 loc s with
- | Utf8Token (Unicode.Letter, _) -> LEFTQMARK
+ | Utf8Token (st, _) when Unicode.is_valid_ident_initial st -> LEFTQMARK
| AsciiChar | Utf8Token _ | EmptyStream ->
fst (process_chars loc bp '?' s)
@@ -618,13 +618,13 @@ let rec next_token loc = parser bp
comment_stop bp; between_commands := new_between_commands; t
| [< s >] ->
match lookup_utf8 loc s with
- | Utf8Token (Unicode.Letter, n) ->
+ | Utf8Token (st, n) when Unicode.is_valid_ident_initial st ->
let len = ident_tail loc (nstore n 0 s) s in
let id = get_buff len in
let ep = Stream.count s in
comment_stop bp;
(try find_keyword loc id s with Not_found -> IDENT id), set_loc_pos loc bp ep
- | AsciiChar | Utf8Token ((Unicode.Symbol | Unicode.IdentPart | Unicode.Unknown), _) ->
+ | AsciiChar | Utf8Token _ ->
let t = process_chars loc bp (Stream.next s) s in
comment_stop bp; t
| EmptyStream ->