Imported Upstream version 8.0pl1upstream/8.0pl1

author: Samuel Mimram <samuel.mimram@ens-lyon.org> 2004-07-28 21:54:47 +0000
committer: Samuel Mimram <samuel.mimram@ens-lyon.org> 2004-07-28 21:54:47 +0000
commit: 6b649aba925b6f7462da07599fe67ebb12a3460e (patch)
tree: 43656bcaa51164548f3fa14e5b10de5ef1088574 /contrib/interface/line_parser.ml4
1 files changed, 241 insertions, 0 deletions
diff --git a/contrib/interface/line_parser.ml4 b/contrib/interface/line_parser.ml4
new file mode 100755
index 00000000..b5669351
--- /dev/null
+++ b/contrib/interface/line_parser.ml4
@@ -0,0 +1,241 @@
+(* line-oriented Syntactic analyser for a Coq parser *)
+(* This parser expects a very small number of commands, each given on a complete
+line.  Some of these commands are then followed by a text fragment terminated
+by a precise keyword, which is also expected to appear alone on a line. *)
+
+(* The main parsing loop procedure is "parser_loop", given at the end of this
+file.  It read lines one by one and checks whether they can be parsed using
+a very simple parser.  This very simple parser uses a lexer, which is also given
+in this file.  
+
+The lexical analyser:
+ There are only 5 sorts of tokens *)
+type simple_tokens = Tspace | Tid of string | Tint of int | Tstring of string |
+      Tlbracket | Trbracket;;
+
+(* When recognizing identifiers or strings, the lexical analyser accumulates
+   the characters in a buffer, using the command add_in_buff.  To recuperate
+   the characters, one can use get_buff (this code was inspired by the
+   code in src/meta/lexer.ml of Coq revision 6.1) *)
+let add_in_buff,get_buff =
+  let buff = ref (String.create 80) in
+    (fun i x -> 
+       let len = String.length !buff in
+        if i >= len then (buff := !buff ^ (String.create len);());
+         String.set !buff i x;
+         succ i),
+     (fun len -> String.sub !buff 0 len);;
+
+(* Identifiers are [a-zA-Z_][.a-zA-Z0-9_]*.  When arriving here the first
+   character has already been recognized. *)
+let rec ident len = parser
+  [<''_' | '.' | 'a'..'z' | 'A'..'Z' | '0'..'9' as c; s >] ->
+    ident (add_in_buff len c) s
+| [< >] -> let str = get_buff len in Tid(str);;
+
+(* While recognizing integers, one constructs directly the integer value.
+   The ascii code of '0' is important for this. *)
+let code0 = Char.code '0';;
+
+let get_digit c =  Char.code c - code0;;
+
+(* Integers are [0-9]*
+   The variable intval is the integer value of the text that has already
+   been recognized.  As for identifiers, the first character has already been
+   recognized. *)
+
+let rec parse_int intval = parser
+  [< ''0'..'9' as c ; i=parse_int (10 * intval + get_digit c)>] -> i
+| [< >] -> Tint intval;;
+      
+(* The string lexer is borrowed from the string parser of Coq V6.1 
+   This may be a problem if convention have changed in Coq,
+   However this parser is only used to recognize file names which should
+   not contain too many special characters *)
+
+let rec spec_char = parser
+  [< ''n' >] -> '\n' 
+| [< ''t' >] -> '\t'
+| [< ''b' >] -> '\008' 
+| [< ''r' >] -> '\013'
+| [< ''0'..'9' as c; v= (spec1 (get_digit c)) >] ->
+    Char.chr v
+| [< 'x >] -> x
+
+and spec1 v = parser
+  [< ''0'..'9' as c; s >] -> spec1 (10*v+(get_digit c)) s
+| [< >] -> v
+;;
+
+(* This is the actual string lexical analyser.  Strings are
+   QUOT([^QUOT\\]|\\[0-9]*|\\[^0-9])QUOT (the word QUOT is used
+   to represents double quotation characters, that cannot be used
+   freely, even inside comments. *)
+
+let rec string len = parser
+  [< ''"' >] -> len
+| [<''\\' ;
+      len = (parser [< ''\n' >] -> len
+             | [< c=spec_char >] -> add_in_buff len c);
+      s >] -> string len s
+| [< 'x; s >] -> string (add_in_buff len x) s;;
+
+(* The lexical analyser repeats the recognized given by next_token:
+   spaces and tabulations are ignored, identifiers, integers,
+   strings, opening and closing square brackets.  Lexical errors are
+   ignored ! *)
+let rec next_token = parser count
+  [< '' ' | '\t'; tok = next_token >] -> tok
+| [< ''_' | 'a'..'z' | 'A'..'Z' as c;i = (ident (add_in_buff 0 c))>] -> i
+| [< ''0'..'9' as c ; i = (parse_int (get_digit c))>] -> i
+| [< ''"' ; len = (string 0) >] -> Tstring (get_buff len)
+| [< ''[' >] -> Tlbracket
+| [< '']' >] -> Trbracket
+| [< '_ ; x = next_token >] -> x;;
+
+(* A very simple lexical analyser to recognize a integer value behind 
+   blank characters *)
+
+let rec next_int = parser count
+  [< '' ' | '\t'; v = next_int >] -> v
+| [< ''0'..'9' as c; i = (parse_int (get_digit c))>] ->
+     (match i with
+        Tint n -> n
+     |  _ -> failwith "unexpected branch in next_int");;
+
+(* This is the actual lexical analyser, implemented as a function on a stream.
+   It will be used with the Stream.from primitive to construct a function
+   of type char Stream.t -> simple_token option Stream.t *)
+let token_stream cs _ =
+ try  let tok = next_token cs in
+      Some tok
+ with Stream.Failure -> None;;
+
+(* Two of the actions of the parser request that one reads the rest of
+   the input up to a specific string stop_string.  This is done
+   with a function that transform the input_channel into a pair of
+   char Stream.t, reading from the input_channel all the lines to
+   the stop_string first. *)
+
+
+let rec gather_strings stop_string input_channel =
+   let buff = input_line input_channel in
+   if buff = stop_string then
+     []
+   else
+     (buff::(gather_strings stop_string input_channel));;
+
+
+(* the result of this function is supposed to be used in a Stream.from
+   construction. *)
+
+let line_list_to_stream string_list =
+   let count = ref 0 in
+   let buff = ref "" in
+   let reserve = ref string_list in
+   let current_length = ref 0 in
+   (fun i -> if (i - !count) >= !current_length then
+               begin
+                  count := !count + !current_length + 1;
+                  match !reserve with
+		  | [] -> None
+                  | s1::rest -> 
+                    begin
+                      buff := s1;
+                      current_length := String.length !buff;
+		      reserve := rest;
+                      Some '\n'
+                    end
+               end
+             else
+               Some(String.get !buff (i - !count)));;
+ 
+
+(* In older revisions of this file you would find a function that
+   does line oriented breakdown of the input channel without resorting to
+   a list of lines.  However, the need for the list of line appeared when
+   we wanted to have  a channel and a list of strings describing the same
+   data, one for regular parsing and the other for error recovery. *)
+
+let channel_to_stream_and_string_list stop_string input_channel =
+   let string_list = gather_strings stop_string input_channel in
+   (line_list_to_stream string_list, string_list);;
+
+let flush_until_end_of_stream char_stream =
+ Stream.iter (function _ -> ()) char_stream;;
+
+(* There are only 5 kinds of lines recognized by our little parser.
+   Unrecognized lines are ignored. *)
+type parser_request =
+  | PRINT_VERSION
+  | PARSE_STRING of string
+      (* parse_string <int> [<ident>] then text and && END--OF--DATA *)
+  | QUIET_PARSE_STRING
+      (* quiet_parse_string then text and && END--OF--DATA *)
+  | PARSE_FILE of string
+      (* parse_file <int> <string> *)
+  | ADD_PATH of string
+      (* add_path <int> <string> *)
+  | ADD_REC_PATH of string * string
+      (* add_rec_path <int> <string> <ident> *)
+  | LOAD_SYNTAX of string
+      (* load_syntax_file <int> <ident> *)
+  | GARBAGE
+;;
+
+(* The procedure parser_loop should never terminate while the input_channel is
+   not closed. This procedure receives the functions called for each sentence
+   as arguments.  Thus the code is completely independent from the Coq sources. *)
+let parser_loop functions input_channel =
+  let print_version_action,
+      parse_string_action,
+      quiet_parse_string_action,
+      parse_file_action,
+      add_path_action,
+      add_rec_path_action,
+      load_syntax_action = functions in
+  let rec parser_loop_rec input_channel =
+  (let line = input_line input_channel in
+  let reqid, parser_request = 
+      try 
+      (match Stream.from (token_stream (Stream.of_string line)) with
+        parser
+        | [< 'Tid "print_version" >] ->
+                     0, PRINT_VERSION
+        | [< 'Tid "parse_string" ; 'Tint reqid ; 'Tlbracket ;
+             'Tid phylum ; 'Trbracket >] 
+                  -> reqid,PARSE_STRING phylum
+        | [< 'Tid "quiet_parse_string" >] ->
+                     0,QUIET_PARSE_STRING
+        | [< 'Tid "parse_file" ; 'Tint reqid ; 'Tstring fname >] ->
+                     reqid, PARSE_FILE fname
+        | [< 'Tid "add_rec_path"; 'Tint reqid ; 'Tstring directory ;  'Tid alias >]
+            -> reqid, ADD_REC_PATH(directory, alias)
+        | [< 'Tid "add_path"; 'Tint reqid ; 'Tstring directory >]
+            -> reqid, ADD_PATH directory
+        | [< 'Tid "load_syntax_file"; 'Tint reqid; 'Tid module_name >] ->
+               reqid, LOAD_SYNTAX module_name
+	| [< 'Tid "quit_parser" >] -> raise End_of_file
+        | [< >] -> 0, GARBAGE)
+       with
+         Stream.Failure | Stream.Error _ -> 0,GARBAGE in
+   match parser_request with
+     PRINT_VERSION -> print_version_action ()
+   | PARSE_STRING phylum ->
+     let regular_stream, string_list =
+        channel_to_stream_and_string_list "&& END--OF--DATA" input_channel in
+      parse_string_action reqid phylum (Stream.from regular_stream)
+        string_list;()
+   | QUIET_PARSE_STRING ->
+     let regular_stream, string_list =
+        channel_to_stream_and_string_list "&& END--OF--DATA" input_channel in
+       quiet_parse_string_action
+        (Stream.from regular_stream);()
+   | PARSE_FILE file_name ->
+        parse_file_action reqid file_name
+   | ADD_PATH path -> add_path_action reqid path
+   | ADD_REC_PATH(path, alias) -> add_rec_path_action reqid path alias
+   | LOAD_SYNTAX syn -> load_syntax_action reqid syn
+   | GARBAGE -> ());
+   parser_loop_rec input_channel in
+   parser_loop_rec input_channel;;
author	Samuel Mimram <samuel.mimram@ens-lyon.org>	2004-07-28 21:54:47 +0000
committer	Samuel Mimram <samuel.mimram@ens-lyon.org>	2004-07-28 21:54:47 +0000
commit	6b649aba925b6f7462da07599fe67ebb12a3460e (patch)
tree	43656bcaa51164548f3fa14e5b10de5ef1088574 /contrib/interface/line_parser.ml4