summaryrefslogtreecommitdiff
path: root/cparser/Lexer.mll
diff options
context:
space:
mode:
authorGravatar xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>2014-05-12 15:52:42 +0000
committerGravatar xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>2014-05-12 15:52:42 +0000
commitedc00e0c90a5598f653add89f42a095d8ee1b629 (patch)
tree2d2539335cc7e916a8964847b2ed7489f9340d00 /cparser/Lexer.mll
parent951bf7bdb208f500c86e8d45c45247cd25adb4ab (diff)
Assorted fixes to fix parsing issues and be more GCC-like:
- Moved scanning of char constants and string literals entirely to Lexer - Parser: separate STRING_LITERAL from CONSTANT to be closer to ISO C99 grammar - pre_parser: adapted + "asm" takes string_literal, not CONSTANT - Revised errors "inline doesnt belong here" git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2492 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
Diffstat (limited to 'cparser/Lexer.mll')
-rw-r--r--cparser/Lexer.mll163
1 files changed, 101 insertions, 62 deletions
diff --git a/cparser/Lexer.mll b/cparser/Lexer.mll
index e4cb9a6..276aead 100644
--- a/cparser/Lexer.mll
+++ b/cparser/Lexer.mll
@@ -20,8 +20,6 @@ open Pre_parser_aux
open Cabshelper
open Camlcoq
-module SMap = Map.Make(String)
-
let contexts : string list list ref = ref []
let lexicon : (string, Cabs.cabsloc -> token) Hashtbl.t = Hashtbl.create 0
@@ -128,6 +126,32 @@ let currentLoc =
byteno = p.Lexing.pos_cnum;
ident = getident ();})
+(* Error reporting *)
+
+let fatal_error lb fmt =
+ Cerrors.fatal_error ("%s:%d: Error:@ " ^^ fmt)
+ lb.lex_curr_p.pos_fname lb.lex_curr_p.pos_lnum
+
+let error lb fmt =
+ Cerrors.error ("%s:%d: Error:@ " ^^ fmt)
+ lb.lex_curr_p.pos_fname lb.lex_curr_p.pos_lnum
+
+let warning lb fmt =
+ Cerrors.warning ("%s:%d: Warning:@ " ^^ fmt)
+ lb.lex_curr_p.pos_fname lb.lex_curr_p.pos_lnum
+
+(* Simple character escapes *)
+
+let convert_escape = function
+ | 'a' -> 7L (* bell *)
+ | 'b' -> 8L (* backspace *)
+ | 'e' -> 27L (* escape (GCC extension) *)
+ | 'f' -> 12L (* form feed *)
+ | 'n' -> 10L (* new line *)
+ | 'r' -> 13L (* carriage return *)
+ | 't' -> 9L (* horizontal tab *)
+ | 'v' -> 11L (* vertical tab *)
+ | c -> Int64.of_int (Char.code c)
}
(* Identifiers *)
@@ -138,8 +162,8 @@ let nondigit = ['_' 'a'-'z' 'A'-'Z']
let hex_quad = hexadecimal_digit hexadecimal_digit
hexadecimal_digit hexadecimal_digit
let universal_character_name =
- "\\u" hex_quad
- | "\\U" hex_quad hex_quad
+ "\\u" (hex_quad as n)
+ | "\\U" (hex_quad hex_quad as n)
let identifier_nondigit =
nondigit
@@ -204,36 +228,19 @@ let hexadecimal_floating_constant =
| hexadecimal_prefix (hexadecimal_digit_sequence as intpart)
binary_exponent_part floating_suffix?
-(* Charater constants *)
+(* Character and string constants *)
let simple_escape_sequence =
- "\\'" | "\\\"" | "\\?" | "\\\\" | "\\a" | "\\b" | "\\f" | "\\n"
- | "\\r" | "\\t" | "\\v"
+ '\\' ( ['\'' '\"' '?' '\\' 'a' 'b' 'e' 'f' 'n' 'r' 't' 'v'] as c)
let octal_escape_sequence =
- '\\' octal_digit
- | '\\' octal_digit octal_digit
- | '\\' octal_digit octal_digit octal_digit
-let hexadecimal_escape_sequence = "\\x" hexadecimal_digit+
+ '\\' ((octal_digit
+ | octal_digit octal_digit
+ | octal_digit octal_digit octal_digit) as n)
+let hexadecimal_escape_sequence = "\\x" (hexadecimal_digit+ as n)
let escape_sequence =
simple_escape_sequence
| octal_escape_sequence
| hexadecimal_escape_sequence
| universal_character_name
-let c_char =
- [^ '\'' '\\' '\n']
- | escape_sequence
-let c_char_sequence = c_char+
-let character_constant =
- "'" c_char_sequence "'"
- | "L'" c_char_sequence "'"
-
-(* String literals *)
-let s_char =
- [^ '"' '\\' '\n']
- | escape_sequence
-let s_char_sequence = s_char+
-let string_literal =
- '"' s_char_sequence? '"'
- | 'L' '"' s_char_sequence? '"'
rule initial = parse
| '\n' { new_line lexbuf; initial_linebegin lexbuf }
@@ -261,8 +268,16 @@ rule initial = parse
| None -> None
| Some c -> Some (String.make 1 c) },
currentLoc lexbuf)}
- | character_constant as s { CONSTANT (Cabs.CONST_CHAR s, currentLoc lexbuf) }
- | string_literal as s { STRING_LITERAL (s, currentLoc lexbuf) }
+ | "'" { let l = char_literal [] lexbuf in
+ CONSTANT (Cabs.CONST_CHAR(false, l),
+ currentLoc lexbuf) }
+ | "L'" { let l = char_literal [] lexbuf in
+ CONSTANT (Cabs.CONST_CHAR(true, l),
+ currentLoc lexbuf) }
+ | "\"" { let l = string_literal [] lexbuf in
+ STRING_LITERAL(false, l, currentLoc lexbuf) }
+ | "L\"" { let l = string_literal [] lexbuf in
+ STRING_LITERAL(true, l, currentLoc lexbuf) }
| "..." { ELLIPSIS(currentLoc lexbuf) }
| "+=" { ADD_ASSIGN(currentLoc lexbuf) }
| "-=" { SUB_ASSIGN(currentLoc lexbuf) }
@@ -313,12 +328,7 @@ rule initial = parse
try Hashtbl.find lexicon id (currentLoc lexbuf)
with Not_found -> VAR_NAME (id, ref VarId, currentLoc lexbuf) }
| eof { EOF }
- | '"' ("" | 'L') s_char* '\\' (_ as c) {
- Cerrors.fatal_error "%s:%d Error:@ invalid escape sequence in string litteral %S"
- lexbuf.lex_curr_p.pos_fname lexbuf.lex_curr_p.pos_lnum (Printf.sprintf "\\%c" c) }
- | _ as c {
- Cerrors.fatal_error "%s:%d Error:@ invalid symbol %C"
- lexbuf.lex_curr_p.pos_fname lexbuf.lex_curr_p.pos_lnum c }
+ | _ as c { fatal_error lexbuf "invalid symbol %C" c }
and initial_linebegin = parse
| '\n' { new_line lexbuf; initial_linebegin lexbuf }
@@ -326,6 +336,41 @@ and initial_linebegin = parse
| '#' { hash lexbuf }
| "" { initial lexbuf }
+and char = parse
+ | universal_character_name
+ { try
+ Int64.of_string ("0x" ^ n)
+ with Failure _ ->
+ error lexbuf "overflow in universal character name";
+ 0L
+ }
+ | hexadecimal_escape_sequence
+ { try
+ Int64.of_string ("0x" ^ n)
+ with Failure _ ->
+ error lexbuf "overflow in hexadecimal escape sequence";
+ 0L
+ }
+ | octal_escape_sequence
+ { Int64.of_string ("0o" ^ n) }
+ | simple_escape_sequence
+ { convert_escape c }
+ | '\\' (_ as c)
+ { warning lexbuf "incorrect escape sequence '\\%c', treating as '%c'" c c;
+ Int64.of_int (Char.code c) }
+ | _ as c
+ { Int64.of_int (Char.code c) }
+
+and char_literal accu = parse
+ | '\'' { List.rev accu }
+ | '\n' | eof { fatal_error lexbuf "missing terminating \"'\" character" }
+ | "" { let c = char lexbuf in char_literal (c :: accu) lexbuf }
+
+and string_literal accu = parse
+ | '\"' { List.rev accu }
+ | '\n' | eof { fatal_error lexbuf "missing terminating '\"' character" }
+ | "" { let c = char lexbuf in string_literal (c :: accu) lexbuf }
+
(* We assume gcc -E syntax but try to tolerate variations. *)
and hash = parse
| whitespace_char_no_newline +
@@ -334,11 +379,10 @@ and hash = parse
"\"" ([^ '\n' '\"']* as file) "\""
[^ '\n']* '\n'
{ let n =
- try int_of_string n
- with Failure "int_of_string" ->
- Cerrors.warning "%s:%d Warning:@ invalid line number"
- lexbuf.lex_curr_p.pos_fname lexbuf.lex_curr_p.pos_lnum;
- lexbuf.lex_curr_p.pos_lnum
+ try
+ int_of_string n
+ with Failure _ ->
+ warning lexbuf "invalid line number"; lexbuf.lex_curr_p.pos_lnum
in
lexbuf.lex_curr_p <- {
lexbuf.lex_curr_p with
@@ -353,29 +397,24 @@ and hash = parse
([^ '\n']* as s) '\n'
{ new_line lexbuf; PRAGMA (s, currentLoc lexbuf) }
| [^ '\n']* '\n'
- { Cerrors.warning "%s:%d Warning:@ unrecognized '#' line"
- lexbuf.lex_curr_p.pos_fname lexbuf.lex_curr_p.pos_lnum;
+ { warning lexbuf "unrecognized '#' line";
new_line lexbuf; initial_linebegin lexbuf }
| [^ '\n']* eof
- { Cerrors.fatal_error "%s:%d Error:@ unexpected end of file"
- lexbuf.lex_curr_p.pos_fname lexbuf.lex_curr_p.pos_lnum }
+ { fatal_error lexbuf "unexpected end of file" }
| _ as c
- { Cerrors.fatal_error "%s:%d Error:@ invalid symbol %C"
- lexbuf.lex_curr_p.pos_fname lexbuf.lex_curr_p.pos_lnum c }
+ { fatal_error lexbuf "invalid symbol %C" c }
(* Multi-line comment terminated by "*/" *)
and multiline_comment = parse
| "*/" { () }
- | eof { Cerrors.error "%s:%d Error: unterminated comment"
- lexbuf.lex_curr_p.pos_fname lexbuf.lex_curr_p.pos_lnum }
+ | eof { error lexbuf "unterminated comment" }
| '\n' { new_line lexbuf; multiline_comment lexbuf }
| _ { multiline_comment lexbuf }
(* Single-line comment terminated by a newline *)
and singleline_comment = parse
| '\n' { new_line lexbuf }
- | eof { Cerrors.error "%s:%d Error: unterminated comment"
- lexbuf.lex_curr_p.pos_fname lexbuf.lex_curr_p.pos_lnum }
+ | eof { () }
| _ { singleline_comment lexbuf }
{
@@ -477,22 +516,22 @@ and singleline_comment = parse
| SLASH loc -> loop SLASH't loc
| STAR loc -> loop STAR't loc
| STATIC loc -> loop STATIC't loc
- | STRING_LITERAL (str, loc) ->
- let buf = Buffer.create (String.length str) in
- Buffer.add_string buf str;
+ | STRING_LITERAL (wide, str, loc) ->
(* Merge consecutive string literals *)
- let rec doConcat () =
+ let rec doConcat wide str =
try
match Queue.peek tokens with
- | STRING_LITERAL (str, loc) ->
+ | STRING_LITERAL (wide', str', loc) ->
ignore (Queue.pop tokens);
- Buffer.add_string buf str;
- doConcat ()
- | _ -> ()
- with Queue.Empty -> ()
- in
- doConcat ();
- loop CONSTANT't (Cabs.CONST_STRING (Buffer.contents buf), loc)
+ let (wide'', str'') = doConcat wide' str' in
+ if str'' <> []
+ then (wide || wide'', str @ str'')
+ else (wide, str)
+ | _ ->
+ (wide, str)
+ with Queue.Empty -> (wide, str) in
+ let (wide', str') = doConcat wide str in
+ loop STRING_LITERAL't ((wide', str'), loc)
| STRUCT loc -> loop STRUCT't loc
| SUB_ASSIGN loc -> loop SUB_ASSIGN't loc
| SWITCH loc -> loop SWITCH't loc