summaryrefslogtreecommitdiff
path: root/cparser/Lexer.mll
diff options
context:
space:
mode:
authorGravatar xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>2014-04-29 13:58:18 +0000
committerGravatar xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>2014-04-29 13:58:18 +0000
commitf1d236b83003eda71e12840732d159fd23b1b771 (patch)
tree0edad805ea24f7b626d2c6fee9fc50da23acfc47 /cparser/Lexer.mll
parent39df8fb19bacb38f317abf06de432b83296dfdd1 (diff)
Integration of Jacques-Henri Jourdan's verified parser.
(Merge of branch newparser.) git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2469 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
Diffstat (limited to 'cparser/Lexer.mll')
-rw-r--r--cparser/Lexer.mll1055
1 files changed, 462 insertions, 593 deletions
diff --git a/cparser/Lexer.mll b/cparser/Lexer.mll
index 90e4d3c..4fa0552 100644
--- a/cparser/Lexer.mll
+++ b/cparser/Lexer.mll
@@ -1,622 +1,491 @@
-(*
- *
- * Copyright (c) 2001-2003,
- * George C. Necula <necula@cs.berkeley.edu>
- * Scott McPeak <smcpeak@cs.berkeley.edu>
- * Wes Weimer <weimer@cs.berkeley.edu>
- * Ben Liblit <liblit@cs.berkeley.edu>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * 3. The names of the contributors may not be used to endorse or promote
- * products derived from this software without specific prior written
- * permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
- * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
- * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *)
-(* FrontC -- lexical analyzer
-**
-** 1.0 3.22.99 Hugues Cassé First version.
-** 2.0 George Necula 12/12/00: Many extensions
-*)
+(* *********************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Jacques-Henri Jourdan, INRIA Paris-Rocquencourt *)
+(* *)
+(* Copyright Institut National de Recherche en Informatique et en *)
+(* Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the GNU General Public License as published by *)
+(* the Free Software Foundation, either version 2 of the License, or *)
+(* (at your option) any later version. This file is also distributed *)
+(* under the terms of the INRIA Non-Commercial License Agreement. *)
+(* *)
+(* *********************************************************************)
+
{
open Lexing
-open Parse_aux
-open Parser
-
-exception Eof
-
-module H = Hashtbl
-
-let newline lb =
- let cp = lb.lex_curr_p in
- lb.lex_curr_p <- { cp with pos_lnum = 1 + cp.pos_lnum }
-
-let setCurrentLine lb lineno =
- let cp = lb.lex_curr_p in
- lb.lex_curr_p <- { cp with pos_lnum = lineno }
-
-let setCurrentFile lb file =
- let cp = lb.lex_curr_p in
- lb.lex_curr_p <- { cp with pos_fname = file }
-
-let matchingParsOpen = ref 0
-
-let currentLoc = Cabshelper.currentLoc_lexbuf
-
-let int64_to_char value =
- assert (value <= 255L && value >= 0L);
- Char.chr (Int64.to_int value)
-
-(* takes a not-nul-terminated list, and converts it to a string. *)
-let rec intlist_to_string (str: int64 list):string =
- match str with
- [] -> "" (* add nul-termination *)
- | value::rest ->
- let this_char = int64_to_char value in
- (String.make 1 this_char) ^ (intlist_to_string rest)
-
-(*
-** Keyword hashtable
-*)
-let lexicon = H.create 211
-let init_lexicon _ =
- H.clear lexicon;
- List.iter
- (fun (key, builder) -> H.add lexicon key builder)
- [ ("_Bool", fun loc -> UNDERSCORE_BOOL loc);
- ("auto", fun loc -> AUTO loc);
+open Pre_parser
+open Pre_parser_aux
+open Cabshelper
+open Camlcoq
+
+module SMap = Map.Make(String)
+
+let contexts : string list list ref = ref []
+let lexicon : (string, Cabs.cabsloc -> token) Hashtbl.t = Hashtbl.create 0
+
+let init filename channel : Lexing.lexbuf =
+ assert (!contexts = []);
+ Hashtbl.clear lexicon;
+ List.iter
+ (fun (key, builder) -> Hashtbl.add lexicon key builder)
+ [ ("auto", fun loc -> AUTO loc);
+ ("break", fun loc -> BREAK loc);
+ ("case", fun loc -> CASE loc);
+ ("char", fun loc -> CHAR loc);
("const", fun loc -> CONST loc);
- ("__const", fun loc -> CONST loc);
- ("__const__", fun loc -> CONST loc);
- ("static", fun loc -> STATIC loc);
+ ("continue", fun loc -> CONTINUE loc);
+ ("default", fun loc -> DEFAULT loc);
+ ("do", fun loc -> DO loc);
+ ("double", fun loc -> DOUBLE loc);
+ ("else", fun loc -> ELSE loc);
+ ("enum", fun loc -> ENUM loc);
("extern", fun loc -> EXTERN loc);
+ ("float", fun loc -> FLOAT loc);
+ ("for", fun loc -> FOR loc);
+ ("goto", fun loc -> GOTO loc);
+ ("if", fun loc -> IF loc);
+ ("inline", fun loc -> INLINE loc);
+ ("int", fun loc -> INT loc);
("long", fun loc -> LONG loc);
- ("short", fun loc -> SHORT loc);
("register", fun loc -> REGISTER loc);
+ ("restrict", fun loc -> RESTRICT loc);
+ ("return", fun loc -> RETURN loc);
+ ("short", fun loc -> SHORT loc);
("signed", fun loc -> SIGNED loc);
- ("__signed", fun loc -> SIGNED loc);
- ("unsigned", fun loc -> UNSIGNED loc);
- ("volatile", fun loc -> VOLATILE loc);
- ("__volatile", fun loc -> VOLATILE loc);
- (* WW: see /usr/include/sys/cdefs.h for why __signed and __volatile
- * are accepted GCC-isms *)
- ("char", fun loc -> CHAR loc);
- ("int", fun loc -> INT loc);
- ("float", fun loc -> FLOAT loc);
- ("double", fun loc -> DOUBLE loc);
- ("void", fun loc -> VOID loc);
- ("enum", fun loc -> ENUM loc);
+ ("sizeof", fun loc -> SIZEOF loc);
+ ("static", fun loc -> STATIC loc);
("struct", fun loc -> STRUCT loc);
+ ("switch", fun loc -> SWITCH loc);
("typedef", fun loc -> TYPEDEF loc);
("union", fun loc -> UNION loc);
- ("break", fun loc -> BREAK loc);
- ("continue", fun loc -> CONTINUE loc);
- ("goto", fun loc -> GOTO loc);
- ("return", fun loc -> RETURN loc);
- ("switch", fun loc -> SWITCH loc);
- ("case", fun loc -> CASE loc);
- ("default", fun loc -> DEFAULT loc);
- ("while", fun loc -> WHILE loc);
- ("do", fun loc -> DO loc);
- ("for", fun loc -> FOR loc);
- ("if", fun loc -> IF loc);
- ("else", fun _ -> ELSE);
- ("sizeof", fun loc -> SIZEOF loc);
- (*** Implementation specific keywords ***)
- ("__signed__", fun loc -> SIGNED loc);
- ("__inline__", fun loc -> INLINE loc);
- ("inline", fun loc -> INLINE loc);
- ("__inline", fun loc -> INLINE loc);
- ("_inline", fun loc ->
- if !msvcMode then
- INLINE loc
- else
- IDENT ("_inline", loc));
- ("__attribute__", fun loc -> ATTRIBUTE loc);
- ("__attribute", fun loc -> ATTRIBUTE loc);
-(*
- ("__attribute_used__", fun loc -> ATTRIBUTE_USED loc);
-*)
- ("__blockattribute__", fun _ -> BLOCKATTRIBUTE);
- ("__blockattribute", fun _ -> BLOCKATTRIBUTE);
- ("__asm__", fun loc -> ASM loc);
- ("asm", fun loc -> ASM loc);
- ("__typeof__", fun loc -> TYPEOF loc);
- ("__typeof", fun loc -> TYPEOF loc);
-(*
- ("typeof", fun loc -> TYPEOF loc);
-*)
+ ("unsigned", fun loc -> UNSIGNED loc);
+ ("void", fun loc -> VOID loc);
+ ("volatile", fun loc -> VOLATILE loc);
+ ("while", fun loc -> WHILE loc);
+ ("_Alignas", fun loc -> ALIGNAS loc);
("_Alignof", fun loc -> ALIGNOF loc);
("__alignof", fun loc -> ALIGNOF loc);
("__alignof__", fun loc -> ALIGNOF loc);
- ("_Alignas", fun loc -> ALIGNAS loc);
- ("__volatile__", fun loc -> VOLATILE loc);
- ("__volatile", fun loc -> VOLATILE loc);
-
- ("__FUNCTION__", fun loc -> FUNCTION__ loc);
- ("__func__", fun loc -> FUNCTION__ loc); (* ISO 6.4.2.2 *)
- ("__PRETTY_FUNCTION__", fun loc -> PRETTY_FUNCTION__ loc);
- ("__label__", fun _ -> LABEL__);
- (*** weimer: GCC arcana ***)
- ("__restrict", fun loc -> RESTRICT loc);
- ("restrict", fun loc -> RESTRICT loc);
- ("__packed__", fun loc -> PACKED loc);
-(* ("__extension__", EXTENSION); *)
- (**** MS VC ***)
- ("__int64", fun loc -> INT64 loc);
- ("__int32", fun loc -> INT loc);
- ("_cdecl", fun loc -> MSATTR ("_cdecl", loc));
- ("__cdecl", fun loc -> MSATTR ("__cdecl", loc));
- ("_stdcall", fun loc -> MSATTR ("_stdcall", loc));
- ("__stdcall", fun loc -> MSATTR ("__stdcall", loc));
- ("_fastcall", fun loc -> MSATTR ("_fastcall", loc));
- ("__fastcall", fun loc -> MSATTR ("__fastcall", loc));
- ("__w64", fun loc -> MSATTR("__w64", loc));
- ("__declspec", fun loc -> DECLSPEC loc);
- ("__forceinline", fun loc -> INLINE loc); (* !! we turn forceinline
- * into inline *)
- ("__try", fun loc -> TRY loc);
- ("__except", fun loc -> EXCEPT loc);
- ("__finally", fun loc -> FINALLY loc);
- (* weimer: some files produced by 'GCC -E' expect this type to be
- * defined *)
- ("__builtin_va_list", fun loc -> NAMED_TYPE ("__builtin_va_list", loc));
+ ("__attribute", fun loc -> ATTRIBUTE loc);
+ ("__attribute__", fun loc -> ATTRIBUTE loc);
+ ("_Bool", fun loc -> UNDERSCORE_BOOL loc);
("__builtin_va_arg", fun loc -> BUILTIN_VA_ARG loc);
- ("__builtin_types_compatible_p", fun loc -> BUILTIN_TYPES_COMPAT loc);
- ("__builtin_offsetof", fun loc -> BUILTIN_OFFSETOF loc);
- (* On some versions of GCC __thread is a regular identifier *)
- ("__thread", fun loc -> THREAD loc)
- ]
-
-(* Mark an identifier as a type name. The old mapping is preserved and will
- * be reinstated when we exit this context *)
-let add_type name =
- (* ignore (print_string ("adding type name " ^ name ^ "\n")); *)
- H.add lexicon name (fun loc -> NAMED_TYPE (name, loc))
-
-let context : string list list ref = ref []
-
-let push_context _ = context := []::!context
-
-let pop_context _ =
- match !context with
- [] -> assert false
- | con::sub ->
- (context := sub;
- List.iter (fun name ->
- (* ignore (print_string ("removing lexicon for " ^ name ^ "\n")); *)
- H.remove lexicon name) con)
-
-(* Mark an identifier as a variable name. The old mapping is preserved and
- * will be reinstated when we exit this context *)
-let add_identifier name =
- match !context with
- [] -> () (* Just ignore raise (InternalError "Empty context stack") *)
- | con::sub ->
- context := (name::con)::sub;
- H.add lexicon name (fun loc -> IDENT (name, loc))
-
-
-(*
-** Useful primitives
-*)
-let scan_ident lb id =
- let here = currentLoc lb in
- try (H.find lexicon id) here
- (* default to variable name, as opposed to type *)
- with Not_found -> IDENT (id, here)
-
-
-(*
-** Buffer processor
-*)
-
-
-let init ~(filename: string) ic : Lexing.lexbuf =
- init_lexicon ();
- (* Inititialize the pointer in Errormsg *)
- Parse_aux.add_type := add_type;
- Parse_aux.push_context := push_context;
- Parse_aux.pop_context := pop_context;
- Parse_aux.add_identifier := add_identifier;
- (* Build lexbuf *)
- let lb = Lexing.from_channel ic in
- let cp = lb.lex_curr_p in
- lb.lex_curr_p <- {cp with pos_fname = filename; pos_lnum = 1};
- lb
-
-let finish () =
- ()
-
-(*** Error handling ***)
-let error = parse_error
-
-
-(*** escape character management ***)
-let scan_escape (char: char) : int64 =
- let result = match char with
- 'n' -> '\n'
- | 'r' -> '\r'
- | 't' -> '\t'
- | 'b' -> '\b'
- | 'f' -> '\012' (* ASCII code 12 *)
- | 'v' -> '\011' (* ASCII code 11 *)
- | 'a' -> '\007' (* ASCII code 7 *)
- | 'e' | 'E' -> '\027' (* ASCII code 27. This is a GCC extension *)
- | '\'' -> '\''
- | '"'-> '"' (* '"' *)
- | '?' -> '?'
- | '(' when not !msvcMode -> '('
- | '{' when not !msvcMode -> '{'
- | '[' when not !msvcMode -> '['
- | '%' when not !msvcMode -> '%'
- | '\\' -> '\\'
- | other -> error ("Unrecognized escape sequence: \\" ^ (String.make 1 other)); raise Parsing.Parse_error
- in
- Int64.of_int (Char.code result)
-
-let scan_hex_escape str =
- let radix = Int64.of_int 16 in
- let the_value = ref Int64.zero in
- (* start at character 2 to skip the \x *)
- for i = 2 to (String.length str) - 1 do
- let thisDigit = Cabshelper.valueOfDigit (String.get str i) in
- (* the_value := !the_value * 16 + thisDigit *)
- the_value := Int64.add (Int64.mul !the_value radix) thisDigit
- done;
- !the_value
-
-let scan_oct_escape str =
- let radix = Int64.of_int 8 in
- let the_value = ref Int64.zero in
- (* start at character 1 to skip the \x *)
- for i = 1 to (String.length str) - 1 do
- let thisDigit = Cabshelper.valueOfDigit (String.get str i) in
- (* the_value := !the_value * 8 + thisDigit *)
- the_value := Int64.add (Int64.mul !the_value radix) thisDigit
- done;
- !the_value
-
-let lex_hex_escape remainder lexbuf =
- let prefix = scan_hex_escape (Lexing.lexeme lexbuf) in
- prefix :: remainder lexbuf
-
-let lex_oct_escape remainder lexbuf =
- let prefix = scan_oct_escape (Lexing.lexeme lexbuf) in
- prefix :: remainder lexbuf
-
-let lex_simple_escape remainder lexbuf =
- let lexchar = Lexing.lexeme_char lexbuf 1 in
- let prefix = scan_escape lexchar in
- prefix :: remainder lexbuf
-
-let lex_unescaped remainder lexbuf =
- let prefix = Int64.of_int (Char.code (Lexing.lexeme_char lexbuf 0)) in
- prefix :: remainder lexbuf
-
-let lex_comment remainder lexbuf =
- let ch = Lexing.lexeme_char lexbuf 0 in
- let prefix = Int64.of_int (Char.code ch) in
- if ch = '\n' then newline lexbuf;
- prefix :: remainder lexbuf
-
-let make_char (i:int64):char =
- let min_val = Int64.zero in
- let max_val = Int64.of_int 255 in
- (* if i < 0 || i > 255 then error*)
- if compare i min_val < 0 || compare i max_val > 0 then begin
- let msg = Printf.sprintf "clexer:make_char: character 0x%Lx too big" i in
- error msg
+ ("__packed__", fun loc -> PACKED loc);
+ ("__asm__", fun loc -> ASM loc);
+ ("__asm", fun loc -> ASM loc);
+ ("asm", fun loc -> ASM loc);
+ ];
+
+ push_context := begin fun () -> contexts := []::!contexts end;
+ pop_context := begin fun () ->
+ match !contexts with
+ | [] -> assert false
+ | t::q -> List.iter (Hashtbl.remove lexicon) t;
+ contexts := q
end;
- Char.chr (Int64.to_int i)
-
-
-(* ISO standard locale-specific function to convert a wide character
- * into a sequence of normal characters. Here we work on strings.
- * We convert L"Hi" to "H\000i\000"
- matth: this seems unused.
-let wbtowc wstr =
- let len = String.length wstr in
- let dest = String.make (len * 2) '\000' in
- for i = 0 to len-1 do
- dest.[i*2] <- wstr.[i] ;
- done ;
- dest
-*)
-
-(* This function converst the "Hi" in L"Hi" to { L'H', L'i', L'\0' }
- matth: this seems unused.
-let wstr_to_warray wstr =
- let len = String.length wstr in
- let res = ref "{ " in
- for i = 0 to len-1 do
- res := !res ^ (Printf.sprintf "L'%c', " wstr.[i])
- done ;
- res := !res ^ "}" ;
- !res
-*)
-}
+ declare_varname := begin fun id ->
+ Hashtbl.add lexicon id (fun loc -> VAR_NAME (id, ref VarId, loc));
+ match !contexts with
+ | [] -> ()
+ | t::q -> contexts := (id::t)::q
+ end;
-let decdigit = ['0'-'9']
-let octdigit = ['0'-'7']
-let hexdigit = ['0'-'9' 'a'-'f' 'A'-'F']
-let letter = ['a'- 'z' 'A'-'Z']
+ declare_typename := begin fun id ->
+ Hashtbl.add lexicon id (fun loc -> TYPEDEF_NAME (id, ref TypedefId, loc));
+ match !contexts with
+ | [] -> ()
+ | t::q -> contexts := (id::t)::q
+ end;
+ !declare_typename "__builtin_va_list";
-let usuffix = ['u' 'U']
-let lsuffix = "l"|"L"|"ll"|"LL"
-let intsuffix = lsuffix | usuffix | usuffix lsuffix | lsuffix usuffix
- | usuffix ? "i64"
+ let lb = Lexing.from_channel channel in
+ lb.lex_curr_p <-
+ {lb.lex_curr_p with pos_fname = filename; pos_lnum = 1};
+ lb
-let hexprefix = '0' ['x' 'X']
+let currentLoc =
+ let nextident = ref 0 in
+ let getident () =
+ nextident := !nextident + 1;
+ !nextident
+ in
+ fun lb ->
+ let p = Lexing.lexeme_start_p lb in
+ Cabs.({ lineno = p.Lexing.pos_lnum;
+ filename = p.Lexing.pos_fname;
+ byteno = p.Lexing.pos_cnum;
+ ident = getident ();})
-let intnum = decdigit+ intsuffix?
-let octnum = '0' octdigit+ intsuffix?
-let hexnum = hexprefix hexdigit+ intsuffix?
+}
-let floating_suffix = ['f' 'F' 'l' 'L'] as suffix
-let exponent_part = ['e' 'E']((['+' '-']? decdigit+) as expo)
-let fractional_constant = ((decdigit+ as intpart)? '.' (decdigit+ as frac))
- |((decdigit+ as intpart) '.')
+(* Identifiers *)
+let digit = ['0'-'9']
+let hexadecimal_digit = ['0'-'9' 'A'-'F' 'a'-'f']
+let nondigit = ['_' 'a'-'z' 'A'-'Z']
+
+let hex_quad = hexadecimal_digit hexadecimal_digit
+ hexadecimal_digit hexadecimal_digit
+let universal_character_name =
+ "\\u" hex_quad
+ | "\\U" hex_quad hex_quad
+
+let identifier_nondigit =
+ nondigit
+(*| universal_character_name*)
+ | '$'
+
+let identifier = identifier_nondigit (identifier_nondigit|digit)*
+
+(* Whitespaces *)
+let whitespace_char_no_newline = [' ' '\t' '\012' '\r']
+
+(* Integer constants *)
+let nonzero_digit = ['1'-'9']
+let decimal_constant = nonzero_digit digit*
+
+let octal_digit = ['0'-'7']
+let octal_constant = '0' octal_digit*
+
+let hexadecimal_prefix = "0x" | "0X"
+let hexadecimal_constant =
+ hexadecimal_prefix hexadecimal_digit+
+
+let unsigned_suffix = ['u' 'U']
+let long_suffix = ['l' 'L']
+let long_long_suffix = "ll" | "LL"
+let integer_suffix =
+ unsigned_suffix long_suffix?
+ | unsigned_suffix long_long_suffix
+ | long_suffix unsigned_suffix?
+ | long_long_suffix unsigned_suffix?
+
+let integer_constant =
+ decimal_constant integer_suffix?
+ | octal_constant integer_suffix?
+ | hexadecimal_constant integer_suffix?
+
+(* Floating constants *)
+let sign = ['-' '+']
+let digit_sequence = digit+
+let floating_suffix = ['f' 'l' 'F' 'L'] as suffix
+
+let fractional_constant =
+ (digit_sequence as intpart)? '.' (digit_sequence as frac)
+ | (digit_sequence as intpart) '.'
+let exponent_part =
+ 'e' ((sign? digit_sequence) as expo)
+ | 'E' ((sign? digit_sequence) as expo)
let decimal_floating_constant =
- (fractional_constant exponent_part? floating_suffix?)
- |((decdigit+ as intpart) exponent_part floating_suffix?)
-let binary_exponent_part = ['p' 'P']((['+' '-']? decdigit+) as expo)
-let hexadecimal_fractional_constant = ((hexdigit+ as intpart)? '.' (hexdigit+ as frac))
- |((hexdigit+ as intpart) '.')
+ fractional_constant exponent_part? floating_suffix?
+ | (digit_sequence as intpart) exponent_part floating_suffix?
+
+let hexadecimal_digit_sequence = hexadecimal_digit+
+let hexadecimal_fractional_constant =
+ (hexadecimal_digit_sequence as intpart)? '.' (hexadecimal_digit_sequence as frac)
+ | (hexadecimal_digit_sequence as intpart) '.'
+let binary_exponent_part =
+ 'p' ((sign? digit_sequence) as expo)
+ | 'P' ((sign? digit_sequence) as expo)
let hexadecimal_floating_constant =
- (hexprefix hexadecimal_fractional_constant binary_exponent_part floating_suffix?)
- |(hexprefix (hexdigit+ as intpart) binary_exponent_part floating_suffix?)
-
-let ident = (letter|'_'|'$')(letter|decdigit|'_'|'$')*
-let blank = [' ' '\t' '\012' '\r']+
-let escape = '\\' _
-let hex_escape = '\\' ['x' 'X'] hexdigit+
-let oct_escape = '\\' octdigit octdigit? octdigit?
-
-rule initial =
- parse "/*" { comment lexbuf;
- initial lexbuf}
-| "//" { onelinecomment lexbuf;
- newline lexbuf;
- initial lexbuf
- }
-| blank { initial lexbuf}
-| '\n' { newline lexbuf;
- initial lexbuf }
-| '\\' '\r' * '\n' { newline lexbuf;
- initial lexbuf
- }
-| '#' { hash lexbuf}
-(*
-| "_Pragma" { PRAGMA (currentLoc lexbuf) }
-*)
-| '\'' { CST_CHAR (chr lexbuf, currentLoc lexbuf)}
-| "L'" { CST_WCHAR (chr lexbuf, currentLoc lexbuf) }
-| '"' { (* '"' *)
-(* matth: BUG: this could be either a regular string or a wide string.
- * e.g. if it's the "world" in
- * L"Hello, " "world"
- * then it should be treated as wide even though there's no L immediately
- * preceding it. See test/small1/wchar5.c for a failure case. *)
- CST_STRING (str lexbuf, currentLoc lexbuf) }
-| "L\"" { (* weimer: wchar_t string literal *)
- CST_WSTRING(str lexbuf, currentLoc lexbuf) }
-| decimal_floating_constant
- {CST_FLOAT ({Cabs.isHex_FI = false;
- Cabs.integer_FI = intpart;
- Cabs.fraction_FI = frac;
- Cabs.exponent_FI = expo;
- Cabs.suffix_FI = suffix},
- currentLoc lexbuf)}
-| hexadecimal_floating_constant
- {CST_FLOAT ({Cabs.isHex_FI = true;
- Cabs.integer_FI = intpart;
- Cabs.fraction_FI = frac;
- Cabs.exponent_FI = Some expo;
- Cabs.suffix_FI = suffix},
- currentLoc lexbuf)}
-| hexnum {CST_INT (Lexing.lexeme lexbuf, currentLoc lexbuf)}
-| octnum {CST_INT (Lexing.lexeme lexbuf, currentLoc lexbuf)}
-| intnum {CST_INT (Lexing.lexeme lexbuf, currentLoc lexbuf)}
-| "!quit!" {EOF}
-| "..." {ELLIPSIS}
-| "+=" {PLUS_EQ}
-| "-=" {MINUS_EQ}
-| "*=" {STAR_EQ}
-| "/=" {SLASH_EQ}
-| "%=" {PERCENT_EQ}
-| "|=" {PIPE_EQ}
-| "&=" {AND_EQ}
-| "^=" {CIRC_EQ}
-| "<<=" {INF_INF_EQ}
-| ">>=" {SUP_SUP_EQ}
-| "<<" {INF_INF}
-| ">>" {SUP_SUP}
-| "==" {EQ_EQ}
-| "!=" {EXCLAM_EQ}
-| "<=" {INF_EQ}
-| ">=" {SUP_EQ}
-| "=" {EQ}
-| "<" {INF}
-| ">" {SUP}
-| "++" {PLUS_PLUS (currentLoc lexbuf)}
-| "--" {MINUS_MINUS (currentLoc lexbuf)}
-| "->" {ARROW}
-| '+' {PLUS (currentLoc lexbuf)}
-| '-' {MINUS (currentLoc lexbuf)}
-| '*' {STAR (currentLoc lexbuf)}
-| '/' {SLASH}
-| '%' {PERCENT}
-| '!' {EXCLAM (currentLoc lexbuf)}
-| "&&" {AND_AND (currentLoc lexbuf)}
-| "||" {PIPE_PIPE}
-| '&' {AND (currentLoc lexbuf)}
-| '|' {PIPE}
-| '^' {CIRC}
-| '?' {QUEST}
-| ':' {COLON}
-| '~' {TILDE (currentLoc lexbuf)}
-
-| '{' {LBRACE (currentLoc lexbuf)}
-| '}' {RBRACE (currentLoc lexbuf)}
-| '[' {LBRACKET}
-| ']' {RBRACKET}
-| '(' { (LPAREN (currentLoc lexbuf)) }
-| ')' {RPAREN}
-| ';' { (SEMICOLON (currentLoc lexbuf)) }
-| ',' {COMMA}
-| '.' {DOT}
-(* XL: redundant?
-| "sizeof" {SIZEOF (currentLoc lexbuf)}
-*)
-| "__asm" { if !msvcMode then
- MSASM (msasm lexbuf, currentLoc lexbuf)
- else (ASM (currentLoc lexbuf)) }
-
-(* If we see __pragma we eat it and the matching parentheses as well *)
-| "__pragma" { matchingParsOpen := 0;
- let _ = matchingpars lexbuf in
- initial lexbuf
- }
-
-(* __extension__ is a black. The parser runs into some conflicts if we let it
- * pass *)
-| "__extension__" {initial lexbuf }
-| ident {scan_ident lexbuf (Lexing.lexeme lexbuf)}
-| eof {EOF}
-| _ {parse_error "Invalid symbol"; raise Parsing.Parse_error }
-and comment =
- parse
- "*/" { () }
-| eof { () }
-| '\n' { newline lexbuf; comment lexbuf }
-| _ { comment lexbuf }
-
-
-and onelinecomment = parse
- '\n'|eof { () }
-| _ { onelinecomment lexbuf }
-
-and matchingpars = parse
- '\n' { newline lexbuf; matchingpars lexbuf }
-| blank { matchingpars lexbuf }
-| '(' { incr matchingParsOpen; matchingpars lexbuf }
-| ')' { decr matchingParsOpen;
- if !matchingParsOpen = 0 then
- ()
- else
- matchingpars lexbuf
- }
-| "/*" { comment lexbuf; matchingpars lexbuf}
-| '"' { (* '"' *)
- let _ = str lexbuf in
- matchingpars lexbuf
- }
-| _ { matchingpars lexbuf }
-
-(* # <line number> <file name> ... *)
+ hexadecimal_prefix hexadecimal_fractional_constant
+ binary_exponent_part floating_suffix?
+ | hexadecimal_prefix (hexadecimal_digit_sequence as intpart)
+ binary_exponent_part floating_suffix?
+
+(* Charater constants *)
+let simple_escape_sequence =
+ "\\'" | "\\\"" | "\\?" | "\\\\" | "\\a" | "\\b" | "\\f" | "\\n"
+ | "\\r" | "\\t" | "\\v"
+let octal_escape_sequence =
+ '\\' octal_digit
+ | '\\' octal_digit octal_digit
+ | '\\' octal_digit octal_digit octal_digit
+let hexadecimal_escape_sequence = "\\x" hexadecimal_digit+
+let escape_sequence =
+ simple_escape_sequence
+ | octal_escape_sequence
+ | hexadecimal_escape_sequence
+ | universal_character_name
+let c_char =
+ [^ '\'' '\\' '\n']
+ | escape_sequence
+let c_char_sequence = c_char+
+let character_constant =
+ "'" c_char_sequence "'"
+ | "L'" c_char_sequence "'"
+
+(* String literals *)
+let s_char =
+ [^ '"' '\\' '\n']
+ | escape_sequence
+let s_char_sequence = s_char+
+let string_literal =
+ '"' s_char_sequence? '"'
+ | 'L' '"' s_char_sequence? '"'
+
+(* We assume comments are removed by the preprocessor. *)
+rule initial = parse
+ | '\n' { new_line lexbuf; initial_linebegin lexbuf }
+ | whitespace_char_no_newline { initial lexbuf }
+ | integer_constant as s { CONSTANT (Cabs.CONST_INT s, currentLoc lexbuf) }
+ | decimal_floating_constant { CONSTANT (Cabs.CONST_FLOAT
+ {Cabs.isHex_FI = false;
+ Cabs.integer_FI = intpart;
+ Cabs.fraction_FI = frac;
+ Cabs.exponent_FI = expo;
+ Cabs.suffix_FI =
+ match suffix with
+ | None -> None
+ | Some c -> Some (String.make 1 c) },
+ currentLoc lexbuf)}
+ | hexadecimal_floating_constant { CONSTANT (Cabs.CONST_FLOAT
+ {Cabs.isHex_FI = true;
+ Cabs.integer_FI = intpart;
+ Cabs.fraction_FI = frac;
+ Cabs.exponent_FI = Some expo;
+ Cabs.suffix_FI =
+ match suffix with
+ | None -> None
+ | Some c -> Some (String.make 1 c) },
+ currentLoc lexbuf)}
+ | character_constant as s { CONSTANT (Cabs.CONST_CHAR s, currentLoc lexbuf) }
+ | string_literal as s { STRING_LITERAL (s, currentLoc lexbuf) }
+ | "..." { ELLIPSIS(currentLoc lexbuf) }
+ | "+=" { ADD_ASSIGN(currentLoc lexbuf) }
+ | "-=" { SUB_ASSIGN(currentLoc lexbuf) }
+ | "*=" { MUL_ASSIGN(currentLoc lexbuf) }
+ | "/=" { DIV_ASSIGN(currentLoc lexbuf) }
+ | "%=" { MOD_ASSIGN(currentLoc lexbuf) }
+ | "|=" { OR_ASSIGN(currentLoc lexbuf) }
+ | "&=" { AND_ASSIGN(currentLoc lexbuf) }
+ | "^=" { XOR_ASSIGN(currentLoc lexbuf) }
+ | "<<=" { LEFT_ASSIGN(currentLoc lexbuf) }
+ | ">>=" { RIGHT_ASSIGN(currentLoc lexbuf) }
+ | "<<" { LEFT(currentLoc lexbuf) }
+ | ">>" { RIGHT(currentLoc lexbuf) }
+ | "==" { EQEQ(currentLoc lexbuf) }
+ | "!=" { NEQ(currentLoc lexbuf) }
+ | "<=" { LEQ(currentLoc lexbuf) }
+ | ">=" { GEQ(currentLoc lexbuf) }
+ | "=" { EQ(currentLoc lexbuf) }
+ | "<" { LT(currentLoc lexbuf) }
+ | ">" { GT(currentLoc lexbuf) }
+ | "++" { INC(currentLoc lexbuf) }
+ | "--" { DEC(currentLoc lexbuf) }
+ | "->" { PTR(currentLoc lexbuf) }
+ | "+" { PLUS(currentLoc lexbuf) }
+ | "-" { MINUS(currentLoc lexbuf) }
+ | "*" { STAR(currentLoc lexbuf) }
+ | "/" { SLASH(currentLoc lexbuf) }
+ | "%" { PERCENT(currentLoc lexbuf) }
+ | "!" { BANG(currentLoc lexbuf) }
+ | "&&" { ANDAND(currentLoc lexbuf) }
+ | "||" { BARBAR(currentLoc lexbuf) }
+ | "&" { AND(currentLoc lexbuf) }
+ | "|" { BAR(currentLoc lexbuf) }
+ | "^" { HAT(currentLoc lexbuf) }
+ | "?" { QUESTION(currentLoc lexbuf) }
+ | ":" { COLON(currentLoc lexbuf) }
+ | "~" { TILDE(currentLoc lexbuf) }
+ | "{"|"<%" { LBRACE(currentLoc lexbuf) }
+ | "}"|"%>" { RBRACE(currentLoc lexbuf) }
+ | "["|"<:" { LBRACK(currentLoc lexbuf) }
+ | "]"|":>" { RBRACK(currentLoc lexbuf) }
+ | "(" { LPAREN(currentLoc lexbuf) }
+ | ")" { RPAREN(currentLoc lexbuf) }
+ | ";" { SEMICOLON(currentLoc lexbuf) }
+ | "," { COMMA(currentLoc lexbuf) }
+ | "." { DOT(currentLoc lexbuf) }
+ | identifier as id {
+ try Hashtbl.find lexicon id (currentLoc lexbuf)
+ with Not_found ->
+ let pref = "__builtin_" in
+ if String.length id > String.length pref &&
+ String.sub id 0 (String.length pref) = pref then
+ VAR_NAME (id, ref VarId, currentLoc lexbuf)
+ else
+ UNKNOWN_NAME(id, ref OtherId, currentLoc lexbuf) }
+ | eof { EOF }
+ | _ {
+ Cerrors.fatal_error "%s:%d Error:@ invalid symbol"
+ lexbuf.lex_curr_p.pos_fname lexbuf.lex_curr_p.pos_lnum }
+
+and initial_linebegin = parse
+ | '\n' { new_line lexbuf; initial_linebegin lexbuf }
+ | whitespace_char_no_newline { initial_linebegin lexbuf }
+ | '#' { hash lexbuf }
+ | "" { initial lexbuf }
+
+(* We assume gcc -E syntax. **)
and hash = parse
- '\n' { newline lexbuf; initial lexbuf}
-| blank { hash lexbuf}
-| intnum { (* We are seeing a line number. This is the number for the
- * next line *)
- let s = Lexing.lexeme lexbuf in
- begin try
- setCurrentLine lexbuf (int_of_string s - 1)
- with Failure ("int_of_string") ->
- (* the int is too big. *)
- ()
- end;
- (* A file name may follow *)
- file lexbuf }
-| "line" { hash lexbuf } (* MSVC line number info *)
-| "pragma" blank
- { let here = currentLoc lexbuf in
- PRAGMA_LINE (pragma lexbuf, here)
- }
-| _ { endline lexbuf}
-
-and file = parse
- '\n' { newline lexbuf; initial lexbuf}
-| blank { file lexbuf}
-| '"' [^ '\012' '\t' '"']* '"' { (* '"' *)
- let n = Lexing.lexeme lexbuf in
- let n1 = String.sub n 1
- ((String.length n) - 2) in
- setCurrentFile lexbuf n1;
- endline lexbuf}
-
-| _ { endline lexbuf}
-
-and endline = parse
- '\n' { newline lexbuf; initial lexbuf}
-| eof { EOF }
-| _ { endline lexbuf}
-
-and pragma = parse
- '\n' { newline lexbuf; "" }
-| _ { let cur = Lexing.lexeme lexbuf in
- cur ^ (pragma lexbuf) }
-
-and str = parse
- '"' {[]} (* no nul terminiation in CST_STRING '"' *)
-| hex_escape { lex_hex_escape str lexbuf}
-| oct_escape { lex_oct_escape str lexbuf}
-| escape { lex_simple_escape str lexbuf}
-| _ { lex_unescaped str lexbuf}
-
-and chr = parse
- '\'' {[]}
-| hex_escape {lex_hex_escape chr lexbuf}
-| oct_escape {lex_oct_escape chr lexbuf}
-| escape {lex_simple_escape chr lexbuf}
-| _ {lex_unescaped chr lexbuf}
-
-and msasm = parse
- blank { msasm lexbuf }
-| '{' { msasminbrace lexbuf }
-| _ { let cur = Lexing.lexeme lexbuf in
- cur ^ (msasmnobrace lexbuf) }
-
-and msasminbrace = parse
- '}' { "" }
-| _ { let cur = Lexing.lexeme lexbuf in
- cur ^ (msasminbrace lexbuf) }
-and msasmnobrace = parse
- ['}' ';' '\n'] { lexbuf.Lexing.lex_curr_pos <-
- lexbuf.Lexing.lex_curr_pos - 1;
- "" }
-| "__asm" { lexbuf.Lexing.lex_curr_pos <-
- lexbuf.Lexing.lex_curr_pos - 5;
- "" }
-| _ { let cur = Lexing.lexeme lexbuf in
-
- cur ^ (msasmnobrace lexbuf) }
+ | ' ' (decimal_constant as n) " \"" (([^ '\n']#whitespace_char_no_newline)* as file) "\"" [^ '\n']* '\n'
+ { let n =
+ try int_of_string n
+ with Failure "int_of_string" ->
+ Cerrors.fatal_error "%s:%d Error:@ invalid line number"
+ lexbuf.lex_curr_p.pos_fname lexbuf.lex_curr_p.pos_lnum
+ in
+ lexbuf.lex_curr_p <- {
+ lexbuf.lex_curr_p with
+ pos_fname = file;
+ pos_lnum = n;
+ pos_bol = lexbuf.lex_curr_p.pos_cnum
+ };
+ initial_linebegin lexbuf }
+ | "pragma" whitespace_char_no_newline ([^ '\n']* as s) '\n'
+ { new_line lexbuf; PRAGMA (s, currentLoc lexbuf) }
+ | [^ '\n']* eof
+ { Cerrors.fatal_error "%s:%d Error:@ unexpected end of file"
+ lexbuf.lex_curr_p.pos_fname lexbuf.lex_curr_p.pos_lnum }
+ | _
+ { Cerrors.fatal_error "%s:%d Error:@ invalid symbol"
+ lexbuf.lex_curr_p.pos_fname lexbuf.lex_curr_p.pos_lnum }
{
+ open Streams
+ open Specif
+ open Parser
+ open Aut.GramDefs
+
+ let tokens_stream lexbuf : token coq_Stream =
+ let tokens = Queue.create () in
+ let lexer_wraper lexbuf : Pre_parser.token =
+ let res =
+ if lexbuf.lex_curr_p.pos_cnum = lexbuf.lex_curr_p.pos_bol then
+ initial_linebegin lexbuf
+ else
+ initial lexbuf
+ in
+ Queue.push res tokens;
+ res
+ in
+ Pre_parser.translation_unit_file lexer_wraper lexbuf;
+ assert (!contexts = []);
+ let rec compute_token_stream () =
+ let loop t v =
+ Cons (Coq_existT (t, Obj.magic v), Lazy.from_fun compute_token_stream)
+ in
+ match Queue.pop tokens with
+ | ADD_ASSIGN loc -> loop ADD_ASSIGN't loc
+ | AND loc -> loop AND't loc
+ | ANDAND loc -> loop ANDAND't loc
+ | AND_ASSIGN loc -> loop AND_ASSIGN't loc
+ | AUTO loc -> loop AUTO't loc
+ | BANG loc -> loop BANG't loc
+ | BAR loc -> loop BAR't loc
+ | BARBAR loc -> loop BARBAR't loc
+ | UNDERSCORE_BOOL loc -> loop UNDERSCORE_BOOL't loc
+ | BREAK loc -> loop BREAK't loc
+ | BUILTIN_VA_ARG loc -> loop BUILTIN_VA_ARG't loc
+ | CASE loc -> loop CASE't loc
+ | CHAR loc -> loop CHAR't loc
+ | COLON loc -> loop COLON't loc
+ | COMMA loc -> loop COMMA't loc
+ | CONST loc -> loop CONST't loc
+ | CONSTANT (cst, loc) -> loop CONSTANT't (cst, loc)
+ | CONTINUE loc -> loop CONTINUE't loc
+ | DEC loc -> loop DEC't loc
+ | DEFAULT loc -> loop DEFAULT't loc
+ | DIV_ASSIGN loc -> loop DIV_ASSIGN't loc
+ | DO loc -> loop DO't loc
+ | DOT loc -> loop DOT't loc
+ | DOUBLE loc -> loop DOUBLE't loc
+ | ELLIPSIS loc -> loop ELLIPSIS't loc
+ | ELSE loc -> loop ELSE't loc
+ | ENUM loc -> loop ENUM't loc
+ | EOF -> loop EOF't ()
+ | EQ loc -> loop EQ't loc
+ | EQEQ loc -> loop EQEQ't loc
+ | EXTERN loc -> loop EXTERN't loc
+ | FLOAT loc -> loop FLOAT't loc
+ | FOR loc -> loop FOR't loc
+ | GEQ loc -> loop GEQ't loc
+ | GOTO loc -> loop GOTO't loc
+ | GT loc -> loop GT't loc
+ | HAT loc -> loop HAT't loc
+ | IF loc -> loop IF't loc
+ | INC loc -> loop INC't loc
+ | INLINE loc -> loop INLINE't loc
+ | INT loc -> loop INT't loc
+ | LBRACE loc -> loop LBRACE't loc
+ | LBRACK loc -> loop LBRACK't loc
+ | LEFT loc -> loop LEFT't loc
+ | LEFT_ASSIGN loc -> loop LEFT_ASSIGN't loc
+ | LEQ loc -> loop LEQ't loc
+ | LONG loc -> loop LONG't loc
+ | LPAREN loc -> loop LPAREN't loc
+ | LT loc -> loop LT't loc
+ | MINUS loc -> loop MINUS't loc
+ | MOD_ASSIGN loc -> loop MOD_ASSIGN't loc
+ | MUL_ASSIGN loc -> loop MUL_ASSIGN't loc
+ | NEQ loc -> loop NEQ't loc
+ | OR_ASSIGN loc -> loop OR_ASSIGN't loc
+ | PACKED loc -> loop PACKED't loc
+ | PERCENT loc -> loop PERCENT't loc
+ | PLUS loc -> loop PLUS't loc
+ | PTR loc -> loop PTR't loc
+ | QUESTION loc -> loop QUESTION't loc
+ | RBRACE loc -> loop RBRACE't loc
+ | RBRACK loc -> loop RBRACK't loc
+ | REGISTER loc -> loop REGISTER't loc
+ | RESTRICT loc -> loop RESTRICT't loc
+ | RETURN loc -> loop RETURN't loc
+ | RIGHT loc -> loop RIGHT't loc
+ | RIGHT_ASSIGN loc -> loop RIGHT_ASSIGN't loc
+ | RPAREN loc -> loop RPAREN't loc
+ | SEMICOLON loc -> loop SEMICOLON't loc
+ | SHORT loc -> loop SHORT't loc
+ | SIGNED loc -> loop SIGNED't loc
+ | SIZEOF loc -> loop SIZEOF't loc
+ | SLASH loc -> loop SLASH't loc
+ | STAR loc -> loop STAR't loc
+ | STATIC loc -> loop STATIC't loc
+ | STRING_LITERAL (str, loc) ->
+ let buf = Buffer.create (String.length str) in
+ Buffer.add_string buf str;
+ (* Merge consecutive string literals *)
+ let rec doConcat () =
+ try
+ match Queue.peek tokens with
+ | STRING_LITERAL (str, loc) ->
+ ignore (Queue.pop tokens);
+ Buffer.add_string buf str;
+ doConcat ()
+ | _ -> ()
+ with Queue.Empty -> ()
+ in
+ doConcat ();
+ loop CONSTANT't (Cabs.CONST_STRING (Buffer.contents buf), loc)
+ | STRUCT loc -> loop STRUCT't loc
+ | SUB_ASSIGN loc -> loop SUB_ASSIGN't loc
+ | SWITCH loc -> loop SWITCH't loc
+ | TILDE loc -> loop TILDE't loc
+ | TYPEDEF loc -> loop TYPEDEF't loc
+ | TYPEDEF_NAME (id, typ, loc)
+ | UNKNOWN_NAME (id, typ, loc)
+ | VAR_NAME (id, typ, loc) ->
+ let terminal = match !typ with
+ | VarId -> VAR_NAME't
+ | TypedefId -> TYPEDEF_NAME't
+ | OtherId -> OTHER_NAME't
+ in
+ loop terminal (id, loc)
+ | UNION loc -> loop UNION't loc
+ | UNSIGNED loc -> loop UNSIGNED't loc
+ | VOID loc -> loop VOID't loc
+ | VOLATILE loc -> loop VOLATILE't loc
+ | WHILE loc -> loop WHILE't loc
+ | XOR_ASSIGN loc -> loop XOR_ASSIGN't loc
+ | ALIGNAS loc -> loop ALIGNAS't loc
+ | ALIGNOF loc -> loop ALIGNOF't loc
+ | ATTRIBUTE loc -> loop ATTRIBUTE't loc
+ | ASM loc -> loop ASM't loc
+ | PRAGMA (s, loc) -> loop PRAGMA't (s, loc)
+ in
+ Lazy.from_fun compute_token_stream
}