From edc00e0c90a5598f653add89f42a095d8ee1b629 Mon Sep 17 00:00:00 2001 From: xleroy Date: Mon, 12 May 2014 15:52:42 +0000 Subject: Assorted fixes to fix parsing issues and be more GCC-like: - Moved scanning of char constants and string literals entirely to Lexer - Parser: separate STRING_LITERAL from CONSTANT to be closer to ISO C99 grammar - pre_parser: adapted + "asm" takes string_literal, not CONSTANT - Revised errors "inline doesnt belong here" git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2492 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e --- cparser/Lexer.mll | 163 +++++++++++++++++++++++++++++++++--------------------- 1 file changed, 101 insertions(+), 62 deletions(-) (limited to 'cparser/Lexer.mll') diff --git a/cparser/Lexer.mll b/cparser/Lexer.mll index e4cb9a6..276aead 100644 --- a/cparser/Lexer.mll +++ b/cparser/Lexer.mll @@ -20,8 +20,6 @@ open Pre_parser_aux open Cabshelper open Camlcoq -module SMap = Map.Make(String) - let contexts : string list list ref = ref [] let lexicon : (string, Cabs.cabsloc -> token) Hashtbl.t = Hashtbl.create 0 @@ -128,6 +126,32 @@ let currentLoc = byteno = p.Lexing.pos_cnum; ident = getident ();}) +(* Error reporting *) + +let fatal_error lb fmt = + Cerrors.fatal_error ("%s:%d: Error:@ " ^^ fmt) + lb.lex_curr_p.pos_fname lb.lex_curr_p.pos_lnum + +let error lb fmt = + Cerrors.error ("%s:%d: Error:@ " ^^ fmt) + lb.lex_curr_p.pos_fname lb.lex_curr_p.pos_lnum + +let warning lb fmt = + Cerrors.warning ("%s:%d: Warning:@ " ^^ fmt) + lb.lex_curr_p.pos_fname lb.lex_curr_p.pos_lnum + +(* Simple character escapes *) + +let convert_escape = function + | 'a' -> 7L (* bell *) + | 'b' -> 8L (* backspace *) + | 'e' -> 27L (* escape (GCC extension) *) + | 'f' -> 12L (* form feed *) + | 'n' -> 10L (* new line *) + | 'r' -> 13L (* carriage return *) + | 't' -> 9L (* horizontal tab *) + | 'v' -> 11L (* vertical tab *) + | c -> Int64.of_int (Char.code c) } (* Identifiers *) @@ -138,8 +162,8 @@ let nondigit = ['_' 'a'-'z' 'A'-'Z'] let hex_quad = hexadecimal_digit hexadecimal_digit hexadecimal_digit hexadecimal_digit let universal_character_name = - "\\u" hex_quad - | "\\U" hex_quad hex_quad + "\\u" (hex_quad as n) + | "\\U" (hex_quad hex_quad as n) let identifier_nondigit = nondigit @@ -204,36 +228,19 @@ let hexadecimal_floating_constant = | hexadecimal_prefix (hexadecimal_digit_sequence as intpart) binary_exponent_part floating_suffix? -(* Charater constants *) +(* Character and string constants *) let simple_escape_sequence = - "\\'" | "\\\"" | "\\?" | "\\\\" | "\\a" | "\\b" | "\\f" | "\\n" - | "\\r" | "\\t" | "\\v" + '\\' ( ['\'' '\"' '?' '\\' 'a' 'b' 'e' 'f' 'n' 'r' 't' 'v'] as c) let octal_escape_sequence = - '\\' octal_digit - | '\\' octal_digit octal_digit - | '\\' octal_digit octal_digit octal_digit -let hexadecimal_escape_sequence = "\\x" hexadecimal_digit+ + '\\' ((octal_digit + | octal_digit octal_digit + | octal_digit octal_digit octal_digit) as n) +let hexadecimal_escape_sequence = "\\x" (hexadecimal_digit+ as n) let escape_sequence = simple_escape_sequence | octal_escape_sequence | hexadecimal_escape_sequence | universal_character_name -let c_char = - [^ '\'' '\\' '\n'] - | escape_sequence -let c_char_sequence = c_char+ -let character_constant = - "'" c_char_sequence "'" - | "L'" c_char_sequence "'" - -(* String literals *) -let s_char = - [^ '"' '\\' '\n'] - | escape_sequence -let s_char_sequence = s_char+ -let string_literal = - '"' s_char_sequence? '"' - | 'L' '"' s_char_sequence? '"' rule initial = parse | '\n' { new_line lexbuf; initial_linebegin lexbuf } @@ -261,8 +268,16 @@ rule initial = parse | None -> None | Some c -> Some (String.make 1 c) }, currentLoc lexbuf)} - | character_constant as s { CONSTANT (Cabs.CONST_CHAR s, currentLoc lexbuf) } - | string_literal as s { STRING_LITERAL (s, currentLoc lexbuf) } + | "'" { let l = char_literal [] lexbuf in + CONSTANT (Cabs.CONST_CHAR(false, l), + currentLoc lexbuf) } + | "L'" { let l = char_literal [] lexbuf in + CONSTANT (Cabs.CONST_CHAR(true, l), + currentLoc lexbuf) } + | "\"" { let l = string_literal [] lexbuf in + STRING_LITERAL(false, l, currentLoc lexbuf) } + | "L\"" { let l = string_literal [] lexbuf in + STRING_LITERAL(true, l, currentLoc lexbuf) } | "..." { ELLIPSIS(currentLoc lexbuf) } | "+=" { ADD_ASSIGN(currentLoc lexbuf) } | "-=" { SUB_ASSIGN(currentLoc lexbuf) } @@ -313,12 +328,7 @@ rule initial = parse try Hashtbl.find lexicon id (currentLoc lexbuf) with Not_found -> VAR_NAME (id, ref VarId, currentLoc lexbuf) } | eof { EOF } - | '"' ("" | 'L') s_char* '\\' (_ as c) { - Cerrors.fatal_error "%s:%d Error:@ invalid escape sequence in string litteral %S" - lexbuf.lex_curr_p.pos_fname lexbuf.lex_curr_p.pos_lnum (Printf.sprintf "\\%c" c) } - | _ as c { - Cerrors.fatal_error "%s:%d Error:@ invalid symbol %C" - lexbuf.lex_curr_p.pos_fname lexbuf.lex_curr_p.pos_lnum c } + | _ as c { fatal_error lexbuf "invalid symbol %C" c } and initial_linebegin = parse | '\n' { new_line lexbuf; initial_linebegin lexbuf } @@ -326,6 +336,41 @@ and initial_linebegin = parse | '#' { hash lexbuf } | "" { initial lexbuf } +and char = parse + | universal_character_name + { try + Int64.of_string ("0x" ^ n) + with Failure _ -> + error lexbuf "overflow in universal character name"; + 0L + } + | hexadecimal_escape_sequence + { try + Int64.of_string ("0x" ^ n) + with Failure _ -> + error lexbuf "overflow in hexadecimal escape sequence"; + 0L + } + | octal_escape_sequence + { Int64.of_string ("0o" ^ n) } + | simple_escape_sequence + { convert_escape c } + | '\\' (_ as c) + { warning lexbuf "incorrect escape sequence '\\%c', treating as '%c'" c c; + Int64.of_int (Char.code c) } + | _ as c + { Int64.of_int (Char.code c) } + +and char_literal accu = parse + | '\'' { List.rev accu } + | '\n' | eof { fatal_error lexbuf "missing terminating \"'\" character" } + | "" { let c = char lexbuf in char_literal (c :: accu) lexbuf } + +and string_literal accu = parse + | '\"' { List.rev accu } + | '\n' | eof { fatal_error lexbuf "missing terminating '\"' character" } + | "" { let c = char lexbuf in string_literal (c :: accu) lexbuf } + (* We assume gcc -E syntax but try to tolerate variations. *) and hash = parse | whitespace_char_no_newline + @@ -334,11 +379,10 @@ and hash = parse "\"" ([^ '\n' '\"']* as file) "\"" [^ '\n']* '\n' { let n = - try int_of_string n - with Failure "int_of_string" -> - Cerrors.warning "%s:%d Warning:@ invalid line number" - lexbuf.lex_curr_p.pos_fname lexbuf.lex_curr_p.pos_lnum; - lexbuf.lex_curr_p.pos_lnum + try + int_of_string n + with Failure _ -> + warning lexbuf "invalid line number"; lexbuf.lex_curr_p.pos_lnum in lexbuf.lex_curr_p <- { lexbuf.lex_curr_p with @@ -353,29 +397,24 @@ and hash = parse ([^ '\n']* as s) '\n' { new_line lexbuf; PRAGMA (s, currentLoc lexbuf) } | [^ '\n']* '\n' - { Cerrors.warning "%s:%d Warning:@ unrecognized '#' line" - lexbuf.lex_curr_p.pos_fname lexbuf.lex_curr_p.pos_lnum; + { warning lexbuf "unrecognized '#' line"; new_line lexbuf; initial_linebegin lexbuf } | [^ '\n']* eof - { Cerrors.fatal_error "%s:%d Error:@ unexpected end of file" - lexbuf.lex_curr_p.pos_fname lexbuf.lex_curr_p.pos_lnum } + { fatal_error lexbuf "unexpected end of file" } | _ as c - { Cerrors.fatal_error "%s:%d Error:@ invalid symbol %C" - lexbuf.lex_curr_p.pos_fname lexbuf.lex_curr_p.pos_lnum c } + { fatal_error lexbuf "invalid symbol %C" c } (* Multi-line comment terminated by "*/" *) and multiline_comment = parse | "*/" { () } - | eof { Cerrors.error "%s:%d Error: unterminated comment" - lexbuf.lex_curr_p.pos_fname lexbuf.lex_curr_p.pos_lnum } + | eof { error lexbuf "unterminated comment" } | '\n' { new_line lexbuf; multiline_comment lexbuf } | _ { multiline_comment lexbuf } (* Single-line comment terminated by a newline *) and singleline_comment = parse | '\n' { new_line lexbuf } - | eof { Cerrors.error "%s:%d Error: unterminated comment" - lexbuf.lex_curr_p.pos_fname lexbuf.lex_curr_p.pos_lnum } + | eof { () } | _ { singleline_comment lexbuf } { @@ -477,22 +516,22 @@ and singleline_comment = parse | SLASH loc -> loop SLASH't loc | STAR loc -> loop STAR't loc | STATIC loc -> loop STATIC't loc - | STRING_LITERAL (str, loc) -> - let buf = Buffer.create (String.length str) in - Buffer.add_string buf str; + | STRING_LITERAL (wide, str, loc) -> (* Merge consecutive string literals *) - let rec doConcat () = + let rec doConcat wide str = try match Queue.peek tokens with - | STRING_LITERAL (str, loc) -> + | STRING_LITERAL (wide', str', loc) -> ignore (Queue.pop tokens); - Buffer.add_string buf str; - doConcat () - | _ -> () - with Queue.Empty -> () - in - doConcat (); - loop CONSTANT't (Cabs.CONST_STRING (Buffer.contents buf), loc) + let (wide'', str'') = doConcat wide' str' in + if str'' <> [] + then (wide || wide'', str @ str'') + else (wide, str) + | _ -> + (wide, str) + with Queue.Empty -> (wide, str) in + let (wide', str') = doConcat wide str in + loop STRING_LITERAL't ((wide', str'), loc) | STRUCT loc -> loop STRUCT't loc | SUB_ASSIGN loc -> loop SUB_ASSIGN't loc | SWITCH loc -> loop SWITCH't loc -- cgit v1.2.3