aboutsummaryrefslogtreecommitdiffhomepage
path: root/parsing/lexer.ml4
diff options
context:
space:
mode:
authorGravatar barras <barras@85f007b7-540e-0410-9357-904b9bb8a0f7>2003-10-10 15:42:22 +0000
committerGravatar barras <barras@85f007b7-540e-0410-9357-904b9bb8a0f7>2003-10-10 15:42:22 +0000
commitcc1b83979b9978fb2979ae8cda86daddaa62badb (patch)
treea13cc08f374cff641aea74a027cf6b7a85ffeb06 /parsing/lexer.ml4
parentdb1658f0837918e27885c827cc29392068775fa6 (diff)
changement nouvelle syntaxe (pt fixes)
git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/coq/trunk@4559 85f007b7-540e-0410-9357-904b9bb8a0f7
Diffstat (limited to 'parsing/lexer.ml4')
-rw-r--r--parsing/lexer.ml434
1 files changed, 29 insertions, 5 deletions
diff --git a/parsing/lexer.ml4 b/parsing/lexer.ml4
index 843bd068f..a96237e5b 100644
--- a/parsing/lexer.ml4
+++ b/parsing/lexer.ml4
@@ -110,7 +110,8 @@ let is_normal_token str =
if String.length str > 0 then
match str.[0] with
| ' ' | '\n' | '\r' | '\t' | '"' -> bad_token str
- | '_' | '$' | 'a'..'z' | 'A'..'Z' -> true
+ | '$' | 'a'..'z' | 'A'..'Z' -> true
+ | '_' -> !Options.v7
(* utf-8 symbols of the form "E2 xx xx" [E2=226] *)
| '\226' when String.length str > 2 ->
(match str.[1], str.[2] with
@@ -122,9 +123,9 @@ let is_normal_token str =
false
| _ ->
(* default to iso 8859-1 "â" *)
- true)
+ !Options.v7)
(* iso 8859-1 accentuated letters *)
- | '\192'..'\214' | '\216'..'\246' | '\248'..'\255' -> true
+ | '\192'..'\214' | '\216'..'\246' | '\248'..'\255' -> !Options.v7
| _ -> false
else
bad_token str
@@ -347,7 +348,7 @@ let parse_226_tail tk = parser
(* Parse what follows a dot *)
let parse_after_dot bp c strm =
- if (* !Options.v7 *) true then
+ if !Options.v7 then
match strm with parser
| [< ' ('_' | 'a'..'z' | 'A'..'Z' as c); len = ident (store 0 c) >] ->
("FIELD", get_buff len)
@@ -369,6 +370,19 @@ let parse_after_dot bp c strm =
| [< (t,_) = process_chars bp c >] -> t
else
match strm with parser
+ | [< ' ('a'..'z' | 'A'..'Z' as c); len = ident (store 0 c) >] ->
+ ("FIELD", get_buff len)
+ (* Greek utf-8 letters [CE80-CEBF and CF80-CFBF] (CE=206; BF=191) *)
+ | [< ' ('\206' | '\207' as c1); ' ('\128'..'\191' as c2);
+ len = ident (store (store 0 c1) c2) >] ->
+ ("FIELD", get_buff len)
+ (* utf-8 mathematical symbols have format E2 xx xx [E2=226] *)
+ | [< ''\226' as c1; t = parse_226_tail
+ (progress_special '.' (Some !token_tree)) >] ep ->
+ (match t with
+ | TokSymbol (Some t) -> ("", t)
+ | TokSymbol None -> err (bp, ep) Undefined_token
+ | TokIdent t -> ("FIELD", t))
| [< (t,_) = process_chars bp c >] -> t
@@ -384,10 +398,20 @@ let rec next_token = parser bp
comment_stop bp;
if !Options.v7 & t=("",".") then between_com := true;
(t, (bp,ep))
- | [< ' ('_' | 'a'..'z' | 'A'..'Z' as c); len = ident (store 0 c) >] ep ->
+ | [< ' ('a'..'z' | 'A'..'Z' as c); len = ident (store 0 c) >] ep ->
let id = get_buff len in
comment_stop bp;
(try ("", find_keyword id) with Not_found -> ("IDENT", id)), (bp, ep)
+ | [< ''_' as c; s >] ->
+ if !Options.v7 then
+ (match s with parser [< len = ident (store 0 c) >] ep ->
+ let id = get_buff len in
+ comment_stop bp;
+ (try ("", find_keyword id) with Not_found -> ("IDENT", id)),
+ (bp, ep))
+ else
+ (match s with parser [< t = process_chars bp c >] ->
+ comment_stop bp; t)
(* Greek utf-8 letters [CE80-CEBF and CF80-CFBF] (CE=206; BF=191) *)
| [< ' ('\206' | '\207' as c1); ' ('\128'..'\191' as c2);
len = ident (store (store 0 c1) c2) >] ep ->