summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorGravatar xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>2014-06-05 06:50:10 +0000
committerGravatar xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>2014-06-05 06:50:10 +0000
commit16fc8b07cb8dbbabad0b665b9114925f4349cf38 (patch)
tree3ed603afd2e87c5ed70fe6e86d6691d876129024 /lib
parentc8a892a09e9f61c3af7dae30d39509558f77462a (diff)
Cleaner, more resilient parsing of pragmas.
git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2507 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
Diffstat (limited to 'lib')
-rw-r--r--lib/Tokenize.mli33
-rw-r--r--lib/Tokenize.mll45
2 files changed, 78 insertions, 0 deletions
diff --git a/lib/Tokenize.mli b/lib/Tokenize.mli
new file mode 100644
index 0000000..a9f22c4
--- /dev/null
+++ b/lib/Tokenize.mli
@@ -0,0 +1,33 @@
+(* *********************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Xavier Leroy, INRIA Paris-Rocquencourt *)
+(* *)
+(* Copyright Institut National de Recherche en Informatique et en *)
+(* Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the GNU General Public License as published by *)
+(* the Free Software Foundation, either version 2 of the License, or *)
+(* (at your option) any later version. This file is also distributed *)
+(* under the terms of the INRIA Non-Commercial License Agreement. *)
+(* *)
+(* *********************************************************************)
+
+(* Parse a string as a list of tokens *)
+
+val string: string -> string list
+ (** [Tokenize.string s] decomposes [s] into a list of tokens.
+ Whitespace separates tokens. The following substrings
+ constitute tokens:
+ - A string enclosed in double quotes. Within the string,
+ the escape sequences '\t' '\n' '\"' and '\\' are recognized.
+ The token value is the contents of the string without the
+ enclosing double quotes.
+ - A string enclosed in single quotes. No escape sequences are
+ recognized. The token value is the contents of the string without the
+ enclosing single quotes.
+ - A sequence of letters, digits, or the [_], [$], [-] and [.]
+ characters. [-] and [.] cannot appear as the first character.
+ - Any other non-whitespace character is treated as a separate token
+ of length 1.
+ *)
diff --git a/lib/Tokenize.mll b/lib/Tokenize.mll
new file mode 100644
index 0000000..422068b
--- /dev/null
+++ b/lib/Tokenize.mll
@@ -0,0 +1,45 @@
+(* *********************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Xavier Leroy, INRIA Paris-Rocquencourt *)
+(* *)
+(* Copyright Institut National de Recherche en Informatique et en *)
+(* Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the GNU General Public License as published by *)
+(* the Free Software Foundation, either version 2 of the License, or *)
+(* (at your option) any later version. This file is also distributed *)
+(* under the terms of the INRIA Non-Commercial License Agreement. *)
+(* *)
+(* *********************************************************************)
+
+(* Parse a string as a list of tokens *)
+
+let identstart = [ '0'-'9' 'A'-'Z' 'a'-'z' '$' '_' ]
+let identcont = [ '0'-'9' 'A'-'Z' 'a'-'z' '$' '_' '-' '.' ]
+
+rule tokenize acc = parse
+ | eof { List.rev acc }
+ | [' ' '\t' '\n'] + { tokenize acc lexbuf }
+ | "\"" { tok_dquote acc (Buffer.create 16) lexbuf }
+ | "'" { tok_squote acc (Buffer.create 16) lexbuf }
+ | (identstart identcont*) as s
+ { tokenize (s :: acc) lexbuf }
+ | _ as c { tokenize (String.make 1 c :: acc) lexbuf }
+
+and tok_dquote acc buf = parse
+ | "\"" | eof { tokenize (Buffer.contents buf :: acc) lexbuf }
+ | "\\t" { Buffer.add_char buf '\t'; tok_dquote acc buf lexbuf }
+ | "\\n" { Buffer.add_char buf '\n'; tok_dquote acc buf lexbuf }
+ | "\\" ([ '\\' '\"' ] as c)
+ { Buffer.add_char buf c; tok_dquote acc buf lexbuf }
+ | _ as c { Buffer.add_char buf c; tok_dquote acc buf lexbuf }
+
+and tok_squote acc buf = parse
+ | "\'" | eof { tokenize (Buffer.contents buf :: acc) lexbuf }
+ | _ as c { Buffer.add_char buf c; tok_squote acc buf lexbuf }
+
+{
+let string s =
+ tokenize [] (Lexing.from_string s)
+}