summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>2014-06-05 06:50:10 +0000
committerGravatar xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>2014-06-05 06:50:10 +0000
commit16fc8b07cb8dbbabad0b665b9114925f4349cf38 (patch)
tree3ed603afd2e87c5ed70fe6e86d6691d876129024
parentc8a892a09e9f61c3af7dae30d39509558f77462a (diff)
Cleaner, more resilient parsing of pragmas.
git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2507 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
-rw-r--r--cfrontend/CPragmas.ml80
-rw-r--r--lib/Tokenize.mli33
-rw-r--r--lib/Tokenize.mll45
3 files changed, 103 insertions, 55 deletions
diff --git a/cfrontend/CPragmas.ml b/cfrontend/CPragmas.ml
index 3c0c9f1..c074632 100644
--- a/cfrontend/CPragmas.ml
+++ b/cfrontend/CPragmas.ml
@@ -41,10 +41,12 @@ let process_section_pragma classname istring ustring addrmode accmode =
let re_c_ident = Str.regexp "[A-Za-z_][A-Za-z_0-9]*$"
let process_use_section_pragma classname id =
- if not (Str.string_match re_c_ident id 0) then
- C2C.error (sprintf "bad identifier `%s' in #pragma use_section" id);
- if not (Sections.use_section_for (intern_string id) classname) then
- C2C.error (sprintf "unknown section name `%s'" classname)
+ if id = "," || id = ";" then () else begin
+ if not (Str.string_match re_c_ident id 0) then
+ C2C.error (sprintf "bad identifier `%s' in #pragma use_section" id);
+ if not (Sections.use_section_for (intern_string id) classname) then
+ C2C.error (sprintf "unknown section name `%s'" classname)
+ end
(* #pragma reserve_register *)
@@ -58,59 +60,27 @@ let process_reserve_register_pragma name =
else
C2C.error "cannot reserve this register (not a callee-save)"
-(* Parsing of pragmas using regexps *)
-
-let re_start_pragma_section = Str.regexp "section\\b"
-
-let re_pragma_section = Str.regexp(
- "section[ \t]+"
-^ "\\([A-Za-z_][A-Za-z_0-9]*\\)[ \t]+" (* class_name *)
-^ "\"\\([^\"]*\\)\"?[ \t]*" (* istring *)
-^ "\"\\([^\"]*\\)\"?[ \t]*" (* ustring *)
-^ "\\([a-zA-Z-]+\\)?[ \t]*" (* addressing mode *)
-^ "\\([RWXON]*\\)" (* access mode *)
-)
-
-let re_start_pragma_use_section = Str.regexp "use_section\\b"
-
-let re_pragma_use_section = Str.regexp
- "use_section[ \t]+\
- \\([A-Za-z_][A-Za-z_0-9]*\\)[ \t]+\
- \\(.*\\)$"
-
-let re_split_idents = Str.regexp "[ \t,]+"
-
-let re_start_pragma_reserve_register = Str.regexp "reserve_register\\b"
-
-let re_pragma_reserve_register = Str.regexp
- "reserve_register[ \t]+\\([A-Za-z0-9]+\\)"
+(* Parsing of pragmas *)
let process_pragma name =
- if Str.string_match re_pragma_section name 0 then begin
- process_section_pragma
- (Str.matched_group 1 name) (* classname *)
- (Str.matched_group 2 name) (* istring *)
- (Str.matched_group 3 name) (* ustring *)
- (Str.matched_group 4 name) (* addrmode *)
- (Str.matched_group 5 name); (* accmode *)
- true
- end else if Str.string_match re_start_pragma_section name 0 then
- (C2C.error "ill-formed `section' pragma"; true)
- else if Str.string_match re_pragma_use_section name 0 then begin
- let classname = Str.matched_group 1 name
- and idents = Str.matched_group 2 name in
- let identlist = Str.split re_split_idents idents in
- if identlist = [] then C2C.warning "vacuous `use_section' pragma";
- List.iter (process_use_section_pragma classname) identlist;
- true
- end else if Str.string_match re_start_pragma_use_section name 0 then begin
- C2C.error "ill-formed `use_section' pragma"; true
- end else if Str.string_match re_pragma_reserve_register name 0 then begin
- process_reserve_register_pragma (Str.matched_group 1 name); true
- end else if Str.string_match re_start_pragma_reserve_register name 0 then begin
- C2C.error "ill-formed `reserve_register' pragma"; true
- end else
- false
+ match Tokenize.string name with
+ | ["section"; classname; istring; ustring; addrmode; accmode] ->
+ process_section_pragma classname istring ustring addrmode accmode;
+ true
+ | "section" :: _ ->
+ C2C.error "ill-formed `section' pragma"; true
+ | "use_section" :: classname :: identlist ->
+ if identlist = [] then C2C.warning "vacuous `use_section' pragma";
+ List.iter (process_use_section_pragma classname) identlist;
+ true
+ | "use_section" :: _ ->
+ C2C.error "ill-formed `use_section' pragma"; true
+ | ["reserve_register"; reg] ->
+ process_reserve_register_pragma reg; true
+ | "reserve_register" :: _ ->
+ C2C.error "ill-formed `reserve_register' pragma"; true
+ | _ ->
+ false
let initialize () =
C2C.process_pragma_hook := process_pragma
diff --git a/lib/Tokenize.mli b/lib/Tokenize.mli
new file mode 100644
index 0000000..a9f22c4
--- /dev/null
+++ b/lib/Tokenize.mli
@@ -0,0 +1,33 @@
+(* *********************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Xavier Leroy, INRIA Paris-Rocquencourt *)
+(* *)
+(* Copyright Institut National de Recherche en Informatique et en *)
+(* Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the GNU General Public License as published by *)
+(* the Free Software Foundation, either version 2 of the License, or *)
+(* (at your option) any later version. This file is also distributed *)
+(* under the terms of the INRIA Non-Commercial License Agreement. *)
+(* *)
+(* *********************************************************************)
+
+(* Parse a string as a list of tokens *)
+
+val string: string -> string list
+ (** [Tokenize.string s] decomposes [s] into a list of tokens.
+ Whitespace separates tokens. The following substrings
+ constitute tokens:
+ - A string enclosed in double quotes. Within the string,
+ the escape sequences '\t' '\n' '\"' and '\\' are recognized.
+ The token value is the contents of the string without the
+ enclosing double quotes.
+ - A string enclosed in single quotes. No escape sequences are
+ recognized. The token value is the contents of the string without the
+ enclosing single quotes.
+ - A sequence of letters, digits, or the [_], [$], [-] and [.]
+ characters. [-] and [.] cannot appear as the first character.
+ - Any other non-whitespace character is treated as a separate token
+ of length 1.
+ *)
diff --git a/lib/Tokenize.mll b/lib/Tokenize.mll
new file mode 100644
index 0000000..422068b
--- /dev/null
+++ b/lib/Tokenize.mll
@@ -0,0 +1,45 @@
+(* *********************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Xavier Leroy, INRIA Paris-Rocquencourt *)
+(* *)
+(* Copyright Institut National de Recherche en Informatique et en *)
+(* Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the GNU General Public License as published by *)
+(* the Free Software Foundation, either version 2 of the License, or *)
+(* (at your option) any later version. This file is also distributed *)
+(* under the terms of the INRIA Non-Commercial License Agreement. *)
+(* *)
+(* *********************************************************************)
+
+(* Parse a string as a list of tokens *)
+
+let identstart = [ '0'-'9' 'A'-'Z' 'a'-'z' '$' '_' ]
+let identcont = [ '0'-'9' 'A'-'Z' 'a'-'z' '$' '_' '-' '.' ]
+
+rule tokenize acc = parse
+ | eof { List.rev acc }
+ | [' ' '\t' '\n'] + { tokenize acc lexbuf }
+ | "\"" { tok_dquote acc (Buffer.create 16) lexbuf }
+ | "'" { tok_squote acc (Buffer.create 16) lexbuf }
+ | (identstart identcont*) as s
+ { tokenize (s :: acc) lexbuf }
+ | _ as c { tokenize (String.make 1 c :: acc) lexbuf }
+
+and tok_dquote acc buf = parse
+ | "\"" | eof { tokenize (Buffer.contents buf :: acc) lexbuf }
+ | "\\t" { Buffer.add_char buf '\t'; tok_dquote acc buf lexbuf }
+ | "\\n" { Buffer.add_char buf '\n'; tok_dquote acc buf lexbuf }
+ | "\\" ([ '\\' '\"' ] as c)
+ { Buffer.add_char buf c; tok_dquote acc buf lexbuf }
+ | _ as c { Buffer.add_char buf c; tok_dquote acc buf lexbuf }
+
+and tok_squote acc buf = parse
+ | "\'" | eof { tokenize (Buffer.contents buf :: acc) lexbuf }
+ | _ as c { Buffer.add_char buf c; tok_squote acc buf lexbuf }
+
+{
+let string s =
+ tokenize [] (Lexing.from_string s)
+}