summaryrefslogtreecommitdiff
path: root/tools/coqdoc/pretty.mll
diff options
context:
space:
mode:
Diffstat (limited to 'tools/coqdoc/pretty.mll')
-rw-r--r--tools/coqdoc/pretty.mll586
1 files changed, 586 insertions, 0 deletions
diff --git a/tools/coqdoc/pretty.mll b/tools/coqdoc/pretty.mll
new file mode 100644
index 00000000..541939b5
--- /dev/null
+++ b/tools/coqdoc/pretty.mll
@@ -0,0 +1,586 @@
+(************************************************************************)
+(* v * The Coq Proof Assistant / The Coq Development Team *)
+(* <O___,, * CNRS-Ecole Polytechnique-INRIA Futurs-Universite Paris Sud *)
+(* \VV/ **************************************************************)
+(* // * This file is distributed under the terms of the *)
+(* * GNU Lesser General Public License Version 2.1 *)
+(************************************************************************)
+
+(*i $Id: pretty.mll,v 1.7.2.1 2004/07/16 19:31:47 herbelin Exp $ i*)
+
+(*s Utility functions for the scanners *)
+
+{
+
+ open Printf
+ open Index
+ open Lexing
+ open Output
+
+ (* count the number of spaces at the beginning of a string *)
+ let count_spaces s =
+ let n = String.length s in
+ let rec count c i =
+ if i == n then c else match s.[i] with
+ | '\t' -> count (c + (8 - (c mod 8))) (i + 1)
+ | ' ' -> count (c + 1) (i + 1)
+ | _ -> c
+ in
+ count 0 0
+
+ let count_dashes s =
+ let c = ref 0 in
+ for i = 0 to String.length s - 1 do if s.[i] = '-' then incr c done;
+ !c
+
+ let cut_head_tail_spaces s =
+ let n = String.length s in
+ let rec look_up i = if i == n || s.[i] <> ' ' then i else look_up (i+1) in
+ let rec look_dn i = if i == -1 || s.[i] <> ' ' then i else look_dn (i-1) in
+ let l = look_up 0 in
+ let r = look_dn (n-1) in
+ if l <= r then String.sub s l (r-l+1) else s
+
+ let sec_title s =
+ let rec count lev i =
+ if s.[i] = '*' then
+ count (succ lev) (succ i)
+ else
+ let t = String.sub s i (String.length s - i) in
+ lev, cut_head_tail_spaces t
+ in
+ count 0 (String.index s '*')
+
+ let formatted = ref false
+ let brackets = ref 0
+
+ let backtrack lexbuf = lexbuf.lex_curr_pos <- lexbuf.lex_start_pos
+
+ (* Gallina (skipping proofs). This is a three states automaton. *)
+
+ let gallina = ref false
+
+ type gallina_state = Nothing | AfterDot | Proof
+
+ let gstate = ref AfterDot
+
+ let is_proof =
+ let t = Hashtbl.create 13 in
+ List.iter (fun s -> Hashtbl.add t s true)
+ [ "Theorem"; "Lemma"; "Fact"; "Remark"; "Goal"; "Let";
+ "Correctness"; "Definition"; "Morphism" ];
+ fun s -> try Hashtbl.find t s with Not_found -> false
+
+ let gallina_id id =
+ if !gstate = AfterDot then
+ if is_proof id then gstate := Proof else
+ if id <> "Add" then gstate := Nothing
+
+ let gallina_symbol s =
+ if !gstate = AfterDot || (!gstate = Proof && s = ":=") then
+ gstate := Nothing
+
+ let is_space = function ' ' | '\t' | '\n' | '\r' -> true | _ -> false
+
+ let gallina_char c =
+ if c = '.' then
+ (let skip = !gstate = Proof in gstate := AfterDot; skip)
+ else
+ (if !gstate = AfterDot && not (is_space c) then gstate := Nothing;
+ false)
+
+ (* saving/restoring the PP state *)
+
+ type state = {
+ st_gallina : bool;
+ st_light : bool
+ }
+
+ let state_stack = Stack.create ()
+
+ let save_state () =
+ Stack.push { st_gallina = !gallina; st_light = !light } state_stack
+
+ let restore_state () =
+ let s = Stack.pop state_stack in
+ gallina := s.st_gallina;
+ light := s.st_light
+
+ let without_ref r f x = save_state (); r := false; f x; restore_state ()
+
+ let without_gallina = without_ref gallina
+
+ let without_light = without_ref light
+
+ let show_all f = without_gallina (without_light f)
+
+ let begin_show () = save_state (); gallina := false; light := false
+ let end_show () = restore_state ()
+
+ (* Reset the globals *)
+
+ let reset () =
+ formatted := false;
+ brackets := 0;
+ gstate := AfterDot
+
+ (* erasing of Section/End *)
+
+ let section_re = Str.regexp "[ \t]*Section"
+ let end_re = Str.regexp "[ \t]*End"
+ let is_section s = Str.string_match section_re s 0
+ let is_end s = Str.string_match end_re s 0
+
+ let sections_to_close = ref 0
+
+ let section_or_end s =
+ if is_section s then begin
+ incr sections_to_close; true
+ end else if is_end s then begin
+ if !sections_to_close > 0 then begin
+ decr sections_to_close; true
+ end else
+ false
+ end else
+ true
+
+ (* tokens pretty-print *)
+
+ let token_buffer = Buffer.create 1024
+
+ let token_re =
+ Str.regexp "[ \t]*(\\*\\*[ \t]+printing[ \t]+\\([^ \t]+\\)"
+ let printing_token_re =
+ Str.regexp
+ "[ \t]*\\(\\(%\\([^%]*\\)%\\)\\|\\(\\$[^$]*\\$\\)\\)?[ \t]*\\(#\\(\\([^#]\\|&#\\)*\\)#\\)?"
+
+ let add_printing_token toks pps =
+ try
+ if Str.string_match token_re toks 0 then
+ let tok = Str.matched_group 1 toks in
+ if Str.string_match printing_token_re pps 0 then
+ let pp =
+ (try Some (Str.matched_group 3 pps) with _ ->
+ try Some (Str.matched_group 4 pps) with _ -> None),
+ (try Some (Str.matched_group 6 pps) with _ -> None)
+ in
+ Output.add_printing_token tok pp
+ with _ ->
+ ()
+
+ let remove_token_re =
+ Str.regexp
+ "[ \t]*(\\*\\*[ \t]+remove[ \t]+printing[ \t]+\\([^ \t]+\\)[ \t]*\\*)"
+
+ let remove_printing_token toks =
+ try
+ if Str.string_match remove_token_re toks 0 then
+ let tok = Str.matched_group 1 toks in
+ Output.remove_printing_token tok
+ with _ ->
+ ()
+
+ let extract_ident_re = Str.regexp "([ \t]*\\([^ \t]+\\)[ \t]*:="
+ let extract_ident s =
+ assert (String.length s >= 3);
+ if Str.string_match extract_ident_re s 0 then
+ Str.matched_group 1 s
+ else
+ String.sub s 1 (String.length s - 3)
+
+}
+
+(*s Regular expressions *)
+
+let space = [' ' '\t']
+let space_nl = [' ' '\t' '\n' '\r']
+
+let firstchar =
+ ['A'-'Z' 'a'-'z' '_'
+ (* iso 8859-1 accents *)
+ '\192'-'\214' '\216'-'\246' '\248'-'\255' ] |
+ (* utf-8 latin 1 supplement *)
+ '\195' ['\128'-'\191'] |
+ (* utf-8 letterlike symbols *)
+ '\226' ('\132' ['\128'-'\191'] | '\133' ['\128'-'\143'])
+let identchar =
+ firstchar | ['\'' '0'-'9' '@']
+let identifier = firstchar identchar*
+
+let symbolchar_no_brackets =
+ ['!' '$' '%' '&' '*' '+' ',' '@' '^' '#'
+ '\\' '/' '-' '<' '>' '|' ':' '?' '=' '~'
+ '{' '}' '(' ')'] |
+ (* utf-8 symbols *)
+ '\226' ['\134'-'\143' '\152'-'\155' '\164'-'\165' '\168'-'\171'] _
+let symbolchar = symbolchar_no_brackets | '[' | ']'
+let token = symbolchar+ | '[' [^ '[' ']' ':']* ']'
+
+(* tokens with balanced brackets *)
+let token_brackets =
+ ( symbolchar_no_brackets+ ('[' symbolchar_no_brackets* ']')*
+ | symbolchar_no_brackets* ('[' symbolchar_no_brackets* ']')+ )
+ symbolchar_no_brackets*
+
+let section = "*" | "**" | "***" | "****"
+
+let item_space = " "
+
+let begin_hide = "(*" space* "begin" space+ "hide" space* "*)" space* '\n'
+let end_hide = "(*" space* "end" space+ "hide" space* "*)" space* '\n'
+let begin_show = "(*" space* "begin" space+ "show" space* "*)" space* '\n'
+let end_show = "(*" space* "end" space+ "show" space* "*)" space* '\n'
+(*
+let begin_verb = "(*" space* "begin" space+ "verb" space* "*)"
+let end_verb = "(*" space* "end" space+ "verb" space* "*)"
+*)
+
+let coq_command_to_hide =
+ "Implicit" space |
+ "Ltac" space |
+ "Require" space |
+ "Load" space |
+ "Hint" space |
+ "Transparent" space |
+ "Opaque" space |
+ ("Declare" space+ ("Morphism" | "Step") space) |
+ "Section" space |
+ "Variable" 's'? space |
+ ("Hypothesis" | "Hypotheses") space |
+ "End" space |
+ ("Set" | "Unset") space+ "Printing" space+ "Coercions" space |
+ "Declare" space+ ("Left" | "Right") space+ "Step" space
+
+(*s Scanning Coq, at beginning of line *)
+
+rule coq_bol = parse
+ | '\n'+
+ { empty_line_of_code (); coq_bol lexbuf }
+ | space* "(**" space_nl
+ { end_coq (); start_doc ();
+ let eol = doc_bol lexbuf in
+ end_doc (); start_coq ();
+ if eol then coq_bol lexbuf else coq lexbuf }
+ | space* "Comments" space_nl
+ { end_coq (); start_doc (); comments lexbuf; end_doc ();
+ start_coq (); coq lexbuf }
+ | space* begin_hide
+ { skip_hide lexbuf; coq_bol lexbuf }
+ | space* begin_show
+ { begin_show (); coq_bol lexbuf }
+ | space* end_show
+ { end_show (); coq_bol lexbuf }
+ | space* coq_command_to_hide
+ { let s = lexeme lexbuf in
+ if !light && section_or_end s then begin
+ skip_to_dot lexbuf;
+ coq_bol lexbuf
+ end else begin
+ indentation (count_spaces s);
+ backtrack lexbuf;
+ coq lexbuf
+ end }
+ | space* "(**" space+ "printing" space+ (identifier | token) space+
+ { let tok = lexeme lexbuf in
+ let s = printing_token lexbuf in
+ add_printing_token tok s;
+ coq_bol lexbuf }
+ | space* "(**" space+ "printing" space+
+ { eprintf "warning: bad 'printing' command at character %d\n"
+ (lexeme_start lexbuf); flush stderr;
+ ignore (comment lexbuf);
+ coq_bol lexbuf }
+ | space* "(**" space+ "remove" space+ "printing" space+
+ (identifier | token) space* "*)"
+ { remove_printing_token (lexeme lexbuf);
+ coq_bol lexbuf }
+ | space* "(**" space+ "remove" space+ "printing" space+
+ { eprintf "warning: bad 'remove printing' command at character %d\n"
+ (lexeme_start lexbuf); flush stderr;
+ ignore (comment lexbuf);
+ coq_bol lexbuf }
+ | space* "(*"
+ { let eol = comment lexbuf in
+ if eol then coq_bol lexbuf else coq lexbuf }
+ | space+
+ { indentation (count_spaces (lexeme lexbuf)); coq lexbuf }
+ | eof
+ { () }
+ | _
+ { backtrack lexbuf; indentation 0; coq lexbuf }
+
+(*s Scanning Coq elsewhere *)
+
+and coq = parse
+ | "\n"
+ { line_break (); coq_bol lexbuf }
+ | "(**" space_nl
+ { end_coq (); start_doc ();
+ let eol = doc_bol lexbuf in
+ end_doc (); start_coq ();
+ if eol then coq_bol lexbuf else coq lexbuf }
+ | "(*"
+ { let eol = comment lexbuf in
+ if eol then coq_bol lexbuf else coq lexbuf }
+ | '\n'+ space* "]]"
+ { if not !formatted then begin symbol (lexeme lexbuf); coq lexbuf end }
+ | eof
+ { () }
+ | token
+ { let s = lexeme lexbuf in
+ if !gallina then gallina_symbol s;
+ symbol s;
+ coq lexbuf }
+ | "with" space+ "Module" | "Module" space+ "Type" | "Declare" space+ "Module"
+ (* hack to avoid making Type a keyword *)
+ { let s = lexeme lexbuf in
+ if !gallina then gallina_id s;
+ ident s (lexeme_start lexbuf); coq lexbuf }
+ | "(" space* identifier space* ":="
+ { let id = extract_ident (lexeme lexbuf) in
+ symbol "("; ident id (lexeme_start lexbuf); symbol ":="; coq lexbuf }
+ | (identifier '.')* identifier
+ { let id = lexeme lexbuf in
+ if !gallina then gallina_id id;
+ ident id (lexeme_start lexbuf); coq lexbuf }
+ | _
+ { let c = lexeme_char lexbuf 0 in
+ char c;
+ if !gallina && gallina_char c then skip_proof lexbuf;
+ coq lexbuf }
+
+(*s Scanning documentation, at beginning of line *)
+
+and doc_bol = parse
+ | space* "\n" '\n'*
+ { paragraph (); doc_bol lexbuf }
+ | space* section [^')'] ([^'\n' '*'] | '*' [^'\n'')'])*
+ { let lev, s = sec_title (lexeme lexbuf) in
+ section lev (fun () -> ignore (doc (from_string s)));
+ doc lexbuf }
+ | space* '-'+
+ { let n = count_dashes (lexeme lexbuf) in
+ if n >= 4 then rule () else item n;
+ doc lexbuf }
+ | "<<" space*
+ { start_verbatim (); verbatim lexbuf; doc_bol lexbuf }
+ | eof
+ { false }
+ | _
+ { backtrack lexbuf; doc lexbuf }
+
+(*s Scanning documentation elsewhere *)
+
+and doc = parse
+ | "\n"
+ { char '\n'; doc_bol lexbuf }
+ | "["
+ { brackets := 1;
+ start_inline_coq (); escaped_coq lexbuf; end_inline_coq ();
+ doc lexbuf }
+ | "[[" '\n' space*
+ { formatted := true; start_code ();
+ indentation (count_spaces (lexeme lexbuf));
+ without_gallina coq lexbuf;
+ end_code (); formatted := false;
+ doc lexbuf }
+ | '*'* "*)" space* '\n'
+ { true }
+ | '*'* "*)"
+ { false }
+ | "$"
+ { start_latex_math (); escaped_math_latex lexbuf; doc lexbuf }
+ | "$$"
+ { char '$'; doc lexbuf }
+ | "%"
+ { escaped_latex lexbuf; doc lexbuf }
+ | "%%"
+ { char '%'; doc lexbuf }
+ | "#"
+ { escaped_html lexbuf; doc lexbuf }
+ | "##"
+ { char '#'; doc lexbuf }
+ | eof
+ { false }
+ | _
+ { char (lexeme_char lexbuf 0); doc lexbuf }
+
+(*s Various escapings *)
+
+and escaped_math_latex = parse
+ | "$" { stop_latex_math () }
+ | eof { stop_latex_math () }
+ | _ { latex_char (lexeme_char lexbuf 0); escaped_math_latex lexbuf }
+
+and escaped_latex = parse
+ | "%" { () }
+ | eof { () }
+ | _ { latex_char (lexeme_char lexbuf 0); escaped_latex lexbuf }
+
+and escaped_html = parse
+ | "#" { () }
+ | "&#"
+ { html_char '&'; html_char '#'; escaped_html lexbuf }
+ | "##"
+ { html_char '#'; escaped_html lexbuf }
+ | eof { () }
+ | _ { html_char (lexeme_char lexbuf 0); escaped_html lexbuf }
+
+and verbatim = parse
+ | "\n>>" { verbatim_char '\n'; stop_verbatim () }
+ | eof { stop_verbatim () }
+ | _ { verbatim_char (lexeme_char lexbuf 0); verbatim lexbuf }
+
+(*s Coq, inside quotations *)
+
+and escaped_coq = parse
+ | "]"
+ { decr brackets;
+ if !brackets > 0 then begin char ']'; escaped_coq lexbuf end }
+ | "["
+ { incr brackets; char '['; escaped_coq lexbuf }
+ | "(*"
+ { ignore (comment lexbuf); escaped_coq lexbuf }
+ | "*)"
+ { (* likely to be a syntax error: we escape *) backtrack lexbuf }
+ | eof
+ { () }
+ | token_brackets
+ { let s = lexeme lexbuf in
+ symbol s;
+ escaped_coq lexbuf }
+ | (identifier '.')* identifier
+ { ident (lexeme lexbuf) (lexeme_start lexbuf); escaped_coq lexbuf }
+ | _
+ { char (lexeme_char lexbuf 0); escaped_coq lexbuf }
+
+(*s Coq "Comments" command. *)
+
+and comments = parse
+ | space_nl+
+ { char ' '; comments lexbuf }
+ | '"' [^ '"']* '"'
+ { let s = lexeme lexbuf in
+ let s = String.sub s 1 (String.length s - 2) in
+ ignore (doc (from_string s)); comments lexbuf }
+ | ([^ '.' '"'] | '.' [^ ' ' '\t' '\n'])+
+ { escaped_coq (from_string (lexeme lexbuf)); comments lexbuf }
+ | "." (space_nl | eof)
+ { () }
+ | eof
+ { () }
+ | _
+ { char (lexeme_char lexbuf 0); comments lexbuf }
+
+(*s Skip comments *)
+
+and comment = parse
+ | "(*" { ignore (comment lexbuf); comment lexbuf }
+ | "*)" space* '\n'+ { true }
+ | "*)" { false }
+ | eof { false }
+ | _ { comment lexbuf }
+
+(*s Skip proofs *)
+
+and skip_proof = parse
+ | "(*" { ignore (comment lexbuf); skip_proof lexbuf }
+ | "Save" | "Qed" | "Defined"
+ | "Abort" | "Proof" | "Admitted" { skip_to_dot lexbuf }
+ | "Proof" space* '.' { skip_proof lexbuf }
+ | identifier { skip_proof lexbuf } (* to avoid keywords within idents *)
+ | eof { () }
+ | _ { skip_proof lexbuf }
+
+and skip_to_dot = parse
+ | eof | '.' { if !gallina then gstate := AfterDot }
+ | "(*" { ignore (comment lexbuf); skip_to_dot lexbuf }
+ | _ { skip_to_dot lexbuf }
+
+and skip_hide = parse
+ | eof | end_hide { () }
+ | _ { skip_hide lexbuf }
+
+(*s Reading token pretty-print *)
+
+and printing_token = parse
+ | "*)" | eof
+ { let s = Buffer.contents token_buffer in
+ Buffer.clear token_buffer;
+ s }
+ | _ { Buffer.add_string token_buffer (lexeme lexbuf);
+ printing_token lexbuf }
+
+(*s Applying the scanners to files *)
+
+{
+
+ type file =
+ | Vernac_file of string * coq_module
+ | Latex_file of string
+
+ let coq_file f m =
+ reset ();
+ Index.scan_file f m;
+ start_module ();
+ let c = open_in f in
+ let lb = from_channel c in
+ start_coq (); coq_bol lb; end_coq ();
+ close_in c
+
+ (* LaTeX document *)
+
+ let latex_document l =
+ let file = function
+ | Vernac_file (f,m) -> set_module m; coq_file f m
+ | Latex_file f -> dump_file f
+ in
+ header ();
+ if !toc then make_toc ();
+ List.iter file l;
+ trailer ();
+ close ()
+
+ (* HTML document *)
+
+ let html_document l =
+ let file = function
+ | Vernac_file (f,m) ->
+ set_module m;
+ let hf = m ^ ".html" in
+ set_out_file hf;
+ header ();
+ coq_file f m;
+ trailer ();
+ close ()
+ | Latex_file _ -> ()
+ in
+ List.iter file l;
+ make_index ();
+ if !toc then make_toc ()
+
+ (* TeXmacs document *)
+
+ let texmacs_document l =
+ let file = function
+ | Vernac_file (f,m) -> set_module m; coq_file f m
+ | Latex_file f -> dump_file f
+ in
+ header ();
+ List.iter file l;
+ trailer ();
+ close ()
+
+ let index_module = function
+ | Vernac_file (_,m) -> Index.add_module m
+ | Latex_file _ -> ()
+
+ let produce_document l =
+ List.iter index_module l;
+ (match !target_language with
+ | LaTeX -> latex_document
+ | HTML -> html_document
+ | TeXmacs -> texmacs_document) l
+
+}
+