aboutsummaryrefslogtreecommitdiffhomepage
path: root/parsing
diff options
context:
space:
mode:
authorGravatar barras <barras@85f007b7-540e-0410-9357-904b9bb8a0f7>2003-11-25 17:19:44 +0000
committerGravatar barras <barras@85f007b7-540e-0410-9357-904b9bb8a0f7>2003-11-25 17:19:44 +0000
commit801b4eea7475660813dac697d375a5847af3e7b3 (patch)
tree5c49fa39137f1d77cf3eb9f6dc2c5879310bb5bc /parsing
parent2f115b8d422829e6c4640624f5053c82b1e336d1 (diff)
modif lexer: ident peut commencer par _
git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/coq/trunk@4991 85f007b7-540e-0410-9357-904b9bb8a0f7
Diffstat (limited to 'parsing')
-rw-r--r--parsing/lexer.ml471
1 files changed, 37 insertions, 34 deletions
diff --git a/parsing/lexer.ml4 b/parsing/lexer.ml4
index a96237e5b..5ffd29984 100644
--- a/parsing/lexer.ml4
+++ b/parsing/lexer.ml4
@@ -110,8 +110,7 @@ let is_normal_token str =
if String.length str > 0 then
match str.[0] with
| ' ' | '\n' | '\r' | '\t' | '"' -> bad_token str
- | '$' | 'a'..'z' | 'A'..'Z' -> true
- | '_' -> !Options.v7
+ | '$' | 'a'..'z' | 'A'..'Z' | '_' -> true
(* utf-8 symbols of the form "E2 xx xx" [E2=226] *)
| '\226' when String.length str > 2 ->
(match str.[1], str.[2] with
@@ -179,16 +178,27 @@ let get_buff len = String.sub !buff 0 len
(* The classical lexer: idents, numbers, quoted strings, comments *)
-let rec ident len = parser
- | [< ' ('a'..'z' | 'A'..'Z' | '0'..'9' | ''' | '_' | '@' as c); s >] ->
- ident (store len c) s
- (* Greek utf-8 letters [CE80-CEBF and CF80-CFBF] (CE=206; BF=191) *)
- | [< ' ('\206' | '\207' as c1); ' ('\128'..'\191' as c2) ; s >] ->
- ident (store (store len c1) c2) s
- (* iso 8859-1 accentuated letters *)
- | [< ' ('\192'..'\214' | '\216'..'\246' | '\248'..'\255' as c); s >] ->
- ident (store len c) s
- | [< >] -> len
+let rec ident_tail len strm =
+ if !Options.v7 then
+ match strm with parser
+ | [< ' ('a'..'z' | 'A'..'Z' | '0'..'9' | ''' | '_' | '@' as c); s >] ->
+ ident_tail (store len c) s
+ (* Greek utf-8 letters [CE80-CEBF and CF80-CFBF] (CE=206; BF=191) *)
+ | [< ' ('\206' | '\207' as c1); ' ('\128'..'\191' as c2) ; s >] ->
+ ident_tail (store (store len c1) c2) s
+ (* iso 8859-1 accentuated letters *)
+ | [< ' ('\192'..'\214' | '\216'..'\246' | '\248'..'\255' as c); s >] ->
+ ident_tail (store len c) s
+ | [< >] -> len
+ else
+ match strm with parser
+ | [< ' ('a'..'z' | 'A'..'Z' | '0'..'9' | ''' | '_' as c); s >] ->
+ ident_tail (store len c) s
+ (* Greek utf-8 letters [CE80-CEBF and CF80-CFBF] (CE=206; BF=191) *)
+ | [< ' ('\206' | '\207' as c1); ' ('\128'..'\191' as c2) ; s >] ->
+ ident_tail (store (store len c1) c2) s
+ | [< >] -> len
+
let rec number len = parser
| [< ' ('0'..'9' as c); s >] -> number (store len c) s
@@ -325,11 +335,11 @@ type token_226_tail =
let parse_226_tail tk = parser
| [< ''\132' as c2; ' ('\128'..'\191' as c3);
(* utf8 letter-like unicode 2100-214F *)
- len = ident (store (store (store 0 '\226') c2) c3) >] ->
+ len = ident_tail (store (store (store 0 '\226') c2) c3) >] ->
TokIdent (get_buff len)
| [< ''\133' as c2; ' ('\128'..'\143' as c3);
(* utf8 letter-like unicode 2100-214F *)
- len = ident (store (store (store 0 '\226') c2) c3) >] ->
+ len = ident_tail (store (store (store 0 '\226') c2) c3) >] ->
TokIdent (get_buff len)
| [< ' ('\134'..'\143' | '\152'..'\155'
| '\164'..'\165' | '\168'..'\171' as c2); 'c3;
@@ -342,7 +352,7 @@ let parse_226_tail tk = parser
t = special (progress_special c3 (progress_special c2
(progress_special '\226' tk))) >] ->
TokSymbol t
- | [< len = ident (store 0 '\226') >] ->
+ | [< len = ident_tail (store 0 '\226') >] ->
TokIdent (get_buff len)
@@ -350,11 +360,12 @@ let parse_226_tail tk = parser
let parse_after_dot bp c strm =
if !Options.v7 then
match strm with parser
- | [< ' ('_' | 'a'..'z' | 'A'..'Z' as c); len = ident (store 0 c) >] ->
+ | [< ' ('_' | 'a'..'z' | 'A'..'Z' as c);
+ len = ident_tail (store 0 c) >] ->
("FIELD", get_buff len)
(* Greek utf-8 letters [CE80-CEBF and CF80-CFBF] (CE=206; BF=191) *)
| [< ' ('\206' | '\207' as c1); ' ('\128'..'\191' as c2);
- len = ident (store (store 0 c1) c2) >] ->
+ len = ident_tail (store (store 0 c1) c2) >] ->
("FIELD", get_buff len)
(* utf-8 mathematical symbols have format E2 xx xx [E2=226] *)
| [< ''\226' as c1; t = parse_226_tail
@@ -365,16 +376,17 @@ let parse_after_dot bp c strm =
| TokIdent t -> ("FIELD", t))
(* iso 8859-1 accentuated letters *)
| [< ' ('\192'..'\214' | '\216'..'\246' | '\248'..'\255' as c);
- len = ident (store 0 c) >] ->
+ len = ident_tail (store 0 c) >] ->
("FIELD", get_buff len)
| [< (t,_) = process_chars bp c >] -> t
else
match strm with parser
- | [< ' ('a'..'z' | 'A'..'Z' as c); len = ident (store 0 c) >] ->
+ | [< ' ('a'..'z' | 'A'..'Z' | '_' as c);
+ len = ident_tail (store 0 c) >] ->
("FIELD", get_buff len)
(* Greek utf-8 letters [CE80-CEBF and CF80-CFBF] (CE=206; BF=191) *)
| [< ' ('\206' | '\207' as c1); ' ('\128'..'\191' as c2);
- len = ident (store (store 0 c1) c2) >] ->
+ len = ident_tail (store (store 0 c1) c2) >] ->
("FIELD", get_buff len)
(* utf-8 mathematical symbols have format E2 xx xx [E2=226] *)
| [< ''\226' as c1; t = parse_226_tail
@@ -391,30 +403,21 @@ let parse_after_dot bp c strm =
let rec next_token = parser bp
| [< '' ' | '\t' | '\n' |'\r' as c; s >] ep ->
comm_loc bp; push_char c; next_token s
- | [< ''$'; len = ident (store 0 '$') >] ep ->
+ | [< ''$'; len = ident_tail (store 0 '$') >] ep ->
comment_stop bp;
(("METAIDENT", get_buff len), (bp,ep))
| [< ''.' as c; t = parse_after_dot bp c >] ep ->
comment_stop bp;
if !Options.v7 & t=("",".") then between_com := true;
(t, (bp,ep))
- | [< ' ('a'..'z' | 'A'..'Z' as c); len = ident (store 0 c) >] ep ->
+ | [< ' ('a'..'z' | 'A'..'Z' | '_' as c);
+ len = ident_tail (store 0 c) >] ep ->
let id = get_buff len in
comment_stop bp;
(try ("", find_keyword id) with Not_found -> ("IDENT", id)), (bp, ep)
- | [< ''_' as c; s >] ->
- if !Options.v7 then
- (match s with parser [< len = ident (store 0 c) >] ep ->
- let id = get_buff len in
- comment_stop bp;
- (try ("", find_keyword id) with Not_found -> ("IDENT", id)),
- (bp, ep))
- else
- (match s with parser [< t = process_chars bp c >] ->
- comment_stop bp; t)
(* Greek utf-8 letters [CE80-CEBF and CF80-CFBF] (CE=206; BF=191) *)
| [< ' ('\206' | '\207' as c1); ' ('\128'..'\191' as c2);
- len = ident (store (store 0 c1) c2) >] ep ->
+ len = ident_tail (store (store 0 c1) c2) >] ep ->
let id = get_buff len in
comment_stop bp;
(try ("", find_keyword id) with Not_found -> ("IDENT", id)), (bp, ep)
@@ -429,7 +432,7 @@ let rec next_token = parser bp
(bp, ep))
(* iso 8859-1 accentuated letters *)
| [< ' ('\192'..'\214' | '\216'..'\246' | '\248'..'\255' as c);
- len = ident (store 0 c) >] ep ->
+ len = ident_tail (store 0 c) >] ep ->
let id = get_buff len in
comment_stop bp;
(try ("", find_keyword id) with Not_found -> ("IDENT", id)), (bp, ep)