aboutsummaryrefslogtreecommitdiffhomepage
path: root/parsing/lexer.mll
blob: adafc2c842af029b8d57e5e386b50635f87e4f97 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221

(* $Id$ *)

{
open Util

type error =
  | Illegal_character
  | Unterminated_comment
  | Unterminated_string

exception BadToken of string

exception Error of error * int * int

type frozen_t = string list

let kw_table = Hashtbl.create 149

let init () =
  Hashtbl.clear kw_table;
  List.iter (fun kw -> Hashtbl.add kw_table kw ())
    [ "Grammar"; "Syntax"; "Quit"; "Load"; "Compile";
      "of"; "with"; "end"; "as"; "in"; "using";
      "Cases"; "Fixpoint"; "CoFixpoint";
      "Definition"; "Inductive"; "CoInductive"; 
      "Theorem"; "Variable"; "Axiom"; "Parameter"; "Hypothesis";
      "Orelse"; "Proof"; "Qed";
      "Prop"; "Set"; "Type" ]  

let add_keyword s = Hashtbl.add kw_table s ()

let is_keyword s = try Hashtbl.find kw_table s; true with Not_found -> false

let freeze () = 
  let l = ref [] in
  Hashtbl.iter (fun k _ -> l := k :: !l) kw_table;
  !l

let unfreeze ft =
  init ();
  List.iter add_keyword ft

let find_keyword s = if is_keyword s then s else raise Not_found
      
let char_for_backslash =
  match Sys.os_type with
  | "Unix" | "Win32" ->
      begin function
      | 'n' -> '\010'
      | 'r' -> '\013'
      | 'b' -> '\008'
      | 't' -> '\009'
      | c   -> c
      end
  | "MacOS" ->
      begin function
      | 'n' -> '\013'
      | 'r' -> '\010'
      | 'b' -> '\008'
      | 't' -> '\009'
      | c   -> c
      end
  | x -> error "Lexer: unknown system type"

let char_for_decimal_code lexbuf i =
  let c = 100 * (Char.code(Lexing.lexeme_char lexbuf i) - 48) +
           10 * (Char.code(Lexing.lexeme_char lexbuf (i+1)) - 48) +
                (Char.code(Lexing.lexeme_char lexbuf (i+2)) - 48) in  
  Char.chr(c land 0xFF)

let string_buffer = Buffer.create 80
let string_start_pos = ref 0
			 
let comment_depth = ref 0
let comment_start_pos = ref 0

}

let blank = [' ' '\010' '\013' '\009' '\012']
let firstchar = 
  ['$' 'A'-'Z' 'a'-'z' '\192'-'\214' '\216'-'\246' '\248'-'\255']
let identchar = 
  ['$' 'A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' 
   '\'' '0'-'9']
let symbolchar =
  ['!' '$' '%' '&' '*' '+' '-' '<' '>' '/' ':' ';' '=' '?' '@' '^' '|' '~' '#']
let decimal_literal = ['0'-'9']+
let hex_literal = '0' ['x' 'X'] ['0'-'9' 'A'-'F' 'a'-'f']+
let oct_literal = '0' ['o' 'O'] ['0'-'7']+
let bin_literal = '0' ['b' 'B'] ['0'-'1']+

rule token = parse
  | blank+ 
      { token lexbuf }
  | firstchar identchar* 
      { let s = Lexing.lexeme lexbuf in
	if is_keyword s then ("",s) else ("IDENT",s) }
  | decimal_literal | hex_literal | oct_literal | bin_literal
      { ("INT", Lexing.lexeme lexbuf) }
  | "(" | ")" | "[" | "]" | "{" | "}" | "." | "_" 
      { ("", Lexing.lexeme lexbuf) }
  | symbolchar+
      { ("", Lexing.lexeme lexbuf) }
  | '`' [^'`']* '`'
      { let s = Lexing.lexeme lexbuf in
	("QUOTED", String.sub s 1 (String.length s - 2)) }
  | "\""
      { Buffer.reset string_buffer;
        let string_start = Lexing.lexeme_start lexbuf in
        string_start_pos := string_start;
        string lexbuf;
        ("STRING", Buffer.contents string_buffer) }
  | "(*"
      { comment_depth := 1;
        comment_start_pos := Lexing.lexeme_start lexbuf;
        comment lexbuf;
        token lexbuf }
  | eof 
      { ("EOI","") }

and comment = parse
  | "(*"
      { comment_depth := succ !comment_depth; comment lexbuf }
  | "*)"
      { comment_depth := pred !comment_depth;
        if !comment_depth > 0 then comment lexbuf }
  | "\""
      { Buffer.reset string_buffer;
        string_start_pos := Lexing.lexeme_start lexbuf;
        string lexbuf;
        comment lexbuf }
  | "''"
      { comment lexbuf }
  | "'" [^ '\\' '\''] "'"
      { comment lexbuf }
  | "'\\" ['\\' '\'' 'n' 't' 'b' 'r'] "'"
      { comment lexbuf }
  | "'\\" ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
      { comment lexbuf }
  | eof
      { raise (Error (Unterminated_comment,
                      !comment_start_pos, !comment_start_pos+2)) }
  | _
      { comment lexbuf }

and string = parse
  | '"'
      { () }
  | '\\' ("\010" | "\013" | "\013\010") [' ' '\009'] *
      { string lexbuf }
  | '\\' ['\\' '"' 'n' 't' 'b' 'r']
      { let c = char_for_backslash (Lexing.lexeme_char lexbuf 1) in
	Buffer.add_char string_buffer c;
        string lexbuf }
  | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9']
      { Buffer.add_char string_buffer (char_for_decimal_code lexbuf 1);
        string lexbuf }
  | eof
      { raise (Error (Unterminated_string,
                      !string_start_pos, !string_start_pos+1)) }
  | _
      { Buffer.add_char string_buffer (Lexing.lexeme_char lexbuf 0);
        string lexbuf }

{

let create_loc_table () = ref (Array.create 1024 None)

let find_loc t i = 
  if i < 0 || i >= Array.length !t then invalid_arg "find_loc";
  match Array.unsafe_get !t i with 
    | None -> invalid_arg "find_loc" 
    | Some l -> l

let add_loc t i l =
  while i >= Array.length !t do
    let new_t = Array.create (2 * Array.length !t) None in
    Array.blit !t 0 new_t 0 (Array.length !t);
    t := new_t
  done;
  !t.(i) <- Some l
  
let func cs =
  let loct = create_loc_table () in
  let lexbuf = 
    Lexing.from_function 
      (fun s _ -> match cs with parser 
	 | [< 'c >] -> String.unsafe_set s 0 c; 1
	 | [< >] -> 0)
  in
  let next_token i = 
    let tok = token lexbuf in
    let loc = (Lexing.lexeme_start lexbuf, Lexing.lexeme_end lexbuf) in
    add_loc loct i loc; Some tok
  in
  let ts = Stream.from next_token in
  (ts, find_loc loct)

let add_token = function
  | ("",kw) -> add_keyword kw
  | _ -> ()
	 
let token_text = function
  | ("", t) -> "'" ^ t ^ "'"
  | ("IDENT", "") -> "identifier"
  | ("IDENT", t) -> "'" ^ t ^ "'"
  | ("INT", "") -> "integer"
  | ("INT", s) -> "'" ^ s ^ "'"
  | ("STRING", "") -> "string"
  | ("EOI", "") -> "end of input"
  | (con, "") -> con
  | (con, prm) -> con ^ " \"" ^ prm ^ "\""

let tparse (p_con, p_prm) =
  if p_prm = "" then
    parser [< '(con, prm) when con = p_con >] -> prm
  else
    parser [< '(con, prm) when con = p_con && prm = p_prm >] -> prm

}