summaryrefslogtreecommitdiff
path: root/tools/coqdoc/cpretty.mll
diff options
context:
space:
mode:
Diffstat (limited to 'tools/coqdoc/cpretty.mll')
-rw-r--r--tools/coqdoc/cpretty.mll1176
1 files changed, 1176 insertions, 0 deletions
diff --git a/tools/coqdoc/cpretty.mll b/tools/coqdoc/cpretty.mll
new file mode 100644
index 00000000..be8bc85d
--- /dev/null
+++ b/tools/coqdoc/cpretty.mll
@@ -0,0 +1,1176 @@
+(* -*- compile-command: "make -C ../.. bin/coqdoc" -*- *)
+(************************************************************************)
+(* v * The Coq Proof Assistant / The Coq Development Team *)
+(* <O___,, * CNRS-Ecole Polytechnique-INRIA Futurs-Universite Paris Sud *)
+(* \VV/ **************************************************************)
+(* // * This file is distributed under the terms of the *)
+(* * GNU Lesser General Public License Version 2.1 *)
+(************************************************************************)
+
+(*i $Id$ i*)
+
+(*s Utility functions for the scanners *)
+
+{
+ open Printf
+ open Lexing
+
+ (* A function that emulates Lexing.new_line (which does not exist in OCaml < 3.11.0) *)
+ let new_line lexbuf =
+ let pos = lexbuf.lex_curr_p in
+ lexbuf.lex_curr_p <- { pos with
+ pos_lnum = pos.pos_lnum + 1;
+ pos_bol = pos.pos_cnum }
+
+ (* A list function we need *)
+ let rec take n ls =
+ if n = 0 then [] else
+ match ls with
+ | [] -> []
+ | (l :: ls) -> l :: (take (n-1) ls)
+
+ (* count the number of spaces at the beginning of a string *)
+ let count_spaces s =
+ let n = String.length s in
+ let rec count c i =
+ if i == n then c,i else match s.[i] with
+ | '\t' -> count (c + (8 - (c mod 8))) (i + 1)
+ | ' ' -> count (c + 1) (i + 1)
+ | _ -> c,i
+ in
+ count 0 0
+
+ let remove_newline s =
+ let n = String.length s in
+ let rec count i = if i == n || s.[i] <> '\n' then i else count (i + 1) in
+ let i = count 0 in
+ i, String.sub s i (n - i)
+
+ let count_dashes s =
+ let c = ref 0 in
+ for i = 0 to String.length s - 1 do if s.[i] = '-' then incr c done;
+ !c
+
+ let cut_head_tail_spaces s =
+ let n = String.length s in
+ let rec look_up i = if i == n || s.[i] <> ' ' then i else look_up (i+1) in
+ let rec look_dn i = if i == -1 || s.[i] <> ' ' then i else look_dn (i-1) in
+ let l = look_up 0 in
+ let r = look_dn (n-1) in
+ if l <= r then String.sub s l (r-l+1) else s
+
+ let sec_title s =
+ let rec count lev i =
+ if s.[i] = '*' then
+ count (succ lev) (succ i)
+ else
+ let t = String.sub s i (String.length s - i) in
+ lev, cut_head_tail_spaces t
+ in
+ count 0 (String.index s '*')
+
+ let strip_eol s =
+ let eol = s.[String.length s - 1] = '\n' in
+ (eol, if eol then String.sub s 1 (String.length s - 1) else s)
+
+
+ let formatted = ref false
+ let brackets = ref 0
+ let comment_level = ref 0
+ let in_proof = ref None
+ let in_emph = ref false
+
+ let start_emph () = in_emph := true; Output.start_emph ()
+ let stop_emph () = if !in_emph then (Output.stop_emph (); in_emph := false)
+
+ let backtrack lexbuf = lexbuf.lex_curr_pos <- lexbuf.lex_start_pos;
+ lexbuf.lex_curr_p <- lexbuf.lex_start_p
+
+ let backtrack_past_newline lexbuf =
+ let buf = lexeme lexbuf in
+ let splits = Str.bounded_split_delim (Str.regexp "['\n']") buf 2 in
+ match splits with
+ | [] -> ()
+ | (_ :: []) -> ()
+ | (s1 :: rest :: _) ->
+ let length_skip = 1 + String.length s1 in
+ lexbuf.lex_curr_pos <- lexbuf.lex_start_pos + length_skip
+
+ let is_space = function ' ' | '\t' | '\n' | '\r' -> true | _ -> false
+
+ (* saving/restoring the PP state *)
+
+ type state = {
+ st_gallina : bool;
+ st_light : bool
+ }
+
+ let state_stack = Stack.create ()
+
+ let save_state () =
+ Stack.push { st_gallina = !Cdglobals.gallina; st_light = !Cdglobals.light } state_stack
+
+ let restore_state () =
+ let s = Stack.pop state_stack in
+ Cdglobals.gallina := s.st_gallina;
+ Cdglobals.light := s.st_light
+
+ let without_ref r f x = save_state (); r := false; f x; restore_state ()
+
+ let without_gallina = without_ref Cdglobals.gallina
+
+ let without_light = without_ref Cdglobals.light
+
+ let show_all f = without_gallina (without_light f)
+
+ let begin_show () = save_state (); Cdglobals.gallina := false; Cdglobals.light := false
+ let end_show () = restore_state ()
+
+ (* Reset the globals *)
+
+ let reset () =
+ formatted := false;
+ brackets := 0;
+ comment_level := 0
+
+ (* erasing of Section/End *)
+
+ let section_re = Str.regexp "[ \t]*Section"
+ let end_re = Str.regexp "[ \t]*End"
+ let is_section s = Str.string_match section_re s 0
+ let is_end s = Str.string_match end_re s 0
+
+ let sections_to_close = ref 0
+
+ let section_or_end s =
+ if is_section s then begin
+ incr sections_to_close; true
+ end else if is_end s then begin
+ if !sections_to_close > 0 then begin
+ decr sections_to_close; true
+ end else
+ false
+ end else
+ true
+
+ (* for item lists *)
+ type list_compare =
+ | Before
+ | StartLevel of int
+ | InLevel of int * bool
+
+ (* Before : we're before any levels
+ StartLevel : at the same column as the dash in a level
+ InLevel : after the dash of this level, but before any deeper dashes.
+ bool is true if this is the last level *)
+ let find_level levels cur_indent =
+ match levels with
+ | [] -> Before
+ | (l::ls) ->
+ if cur_indent < l then Before
+ else
+ (* cur_indent will never be less than the head of the list *)
+ let rec findind ls n =
+ match ls with
+ | [] -> InLevel (n,true)
+ | (l :: []) -> if cur_indent = l then StartLevel n
+ else InLevel (n,true)
+ | (l1 :: l2 :: ls) ->
+ if cur_indent = l1 then StartLevel n
+ else if cur_indent < l2 then InLevel (n,false)
+ else findind (l2 :: ls) (n+1)
+ in
+ findind (l::ls) 1
+
+ type is_start_list =
+ | Rule
+ | List of int
+ | Neither
+
+ let check_start_list str =
+ let n_dashes = count_dashes str in
+ let (n_spaces,_) = count_spaces str in
+ if n_dashes >= 4 && not !Cdglobals.plain_comments then
+ Rule
+ else
+ if n_dashes = 1 && not !Cdglobals.plain_comments then
+ List n_spaces
+ else
+ Neither
+
+ (* examine a string for subtitleness *)
+ let subtitle m s =
+ match Str.split_delim (Str.regexp ":") s with
+ | [] -> false
+ | (name::_) ->
+ if (cut_head_tail_spaces name) = m then
+ true
+ else
+ false
+
+
+ (* tokens pretty-print *)
+
+ let token_buffer = Buffer.create 1024
+
+ let token_re =
+ Str.regexp "[ \t]*(\\*\\*[ \t]+printing[ \t]+\\([^ \t]+\\)"
+ let printing_token_re =
+ Str.regexp
+ "[ \t]*\\(\\(%\\([^%]*\\)%\\)\\|\\(\\$[^$]*\\$\\)\\)?[ \t]*\\(#\\(\\(&#\\|[^#]\\)*\\)#\\)?"
+
+ let add_printing_token toks pps =
+ try
+ if Str.string_match token_re toks 0 then
+ let tok = Str.matched_group 1 toks in
+ if Str.string_match printing_token_re pps 0 then
+ let pp =
+ (try Some (Str.matched_group 3 pps) with _ ->
+ try Some (Str.matched_group 4 pps) with _ -> None),
+ (try Some (Str.matched_group 6 pps) with _ -> None)
+ in
+ Output.add_printing_token tok pp
+ with _ ->
+ ()
+
+ let remove_token_re =
+ Str.regexp
+ "[ \t]*(\\*\\*[ \t]+remove[ \t]+printing[ \t]+\\([^ \t]+\\)[ \t]*\\*)"
+
+ let remove_printing_token toks =
+ try
+ if Str.string_match remove_token_re toks 0 then
+ let tok = Str.matched_group 1 toks in
+ Output.remove_printing_token tok
+ with _ ->
+ ()
+
+ let extract_ident_re = Str.regexp "([ \t]*\\([^ \t]+\\)[ \t]*:="
+ let extract_ident s =
+ assert (String.length s >= 3);
+ if Str.string_match extract_ident_re s 0 then
+ Str.matched_group 1 s
+ else
+ String.sub s 1 (String.length s - 3)
+
+ let output_indented_keyword s lexbuf =
+ let nbsp,isp = count_spaces s in
+ Output.indentation nbsp;
+ let s = String.sub s isp (String.length s - isp) in
+ Output.ident s (lexeme_start lexbuf + isp)
+
+}
+
+(*s Regular expressions *)
+
+let space = [' ' '\t']
+let space_nl = [' ' '\t' '\n' '\r']
+let nl = "\r\n" | '\n'
+
+let firstchar =
+ ['A'-'Z' 'a'-'z' '_'] |
+ (* superscript 1 *)
+ '\194' '\185' |
+ (* utf-8 latin 1 supplement *)
+ '\195' ['\128'-'\191'] |
+ (* utf-8 letterlike symbols *)
+ (* '\206' ([ '\145' - '\183'] | '\187') | *)
+ (* '\xCF' [ '\x00' - '\xCE' ] | *)
+ (* utf-8 letterlike symbols *)
+ '\206' ('\160' | [ '\177'-'\183'] | '\187') |
+ '\226' ('\130' [ '\128'-'\137' ] (* subscripts *)
+ | '\129' [ '\176'-'\187' ] (* superscripts *)
+ | '\132' ['\128'-'\191'] | '\133' ['\128'-'\143'])
+let identchar =
+ firstchar | ['\'' '0'-'9' '@' ]
+let id = firstchar identchar*
+let pfx_id = (id '.')*
+let identifier =
+ id | pfx_id id
+
+(* This misses unicode stuff, and it adds "[" and "]". It's only an
+ approximation of idents - used for detecting whether an underscore
+ is part of an identifier or meant to indicate emphasis *)
+let nonidentchar = [^ 'A'-'Z' 'a'-'z' '_' '[' ']' '\'' '0'-'9' '@' ]
+
+let printing_token = [^ ' ' '\t']*
+
+let thm_token =
+ "Theorem"
+ | "Lemma"
+ | "Fact"
+ | "Remark"
+ | "Corollary"
+ | "Proposition"
+ | "Property"
+ | "Goal"
+
+let prf_token =
+ "Next" space+ "Obligation"
+ | "Proof" (space* "." | space+ "with")
+
+let def_token =
+ "Definition"
+ | "Let"
+ | "Class"
+ | "SubClass"
+ | "Example"
+ | "Local"
+ | "Fixpoint"
+ | "Boxed"
+ | "CoFixpoint"
+ | "Record"
+ | "Structure"
+ | "Scheme"
+ | "Inductive"
+ | "CoInductive"
+ | "Equations"
+ | "Instance"
+ | "Global" space+ "Instance"
+
+let decl_token =
+ "Hypothesis"
+ | "Hypotheses"
+ | "Parameter"
+ | "Axiom" 's'?
+ | "Conjecture"
+
+let gallina_ext =
+ "Module"
+ | "Include" space+ "Type"
+ | "Include"
+ | "Declare" space+ "Module"
+ | "Transparent"
+ | "Opaque"
+ | "Canonical"
+ | "Coercion"
+ | "Identity"
+ | "Implicit"
+ | "Tactic" space+ "Notation"
+ | "Section"
+ | "Context"
+ | "Variable" 's'?
+ | ("Hypothesis" | "Hypotheses")
+ | "End"
+
+let notation_kw =
+ "Notation"
+ | "Infix"
+ | "Reserved" space+ "Notation"
+
+let commands =
+ "Pwd"
+ | "Cd"
+ | "Drop"
+ | "ProtectedLoop"
+ | "Quit"
+ | "Load"
+ | "Add"
+ | "Remove" space+ "Loadpath"
+ | "Print"
+ | "Inspect"
+ | "About"
+ | "Search"
+ | "Eval"
+ | "Reset"
+ | "Check"
+ | "Type"
+
+ | "Section"
+ | "Chapter"
+ | "Variable" 's'?
+ | ("Hypothesis" | "Hypotheses")
+ | "End"
+
+let end_kw = "Qed" | "Defined" | "Save" | "Admitted" | "Abort"
+
+let extraction =
+ "Extraction"
+ | "Recursive" space+ "Extraction"
+ | "Extract"
+
+let gallina_kw = thm_token | def_token | decl_token | gallina_ext | commands | extraction
+
+let prog_kw =
+ "Program" space+ gallina_kw
+ | "Obligation"
+ | "Obligations"
+ | "Solve"
+
+let gallina_kw_to_hide =
+ "Implicit" space+ "Arguments"
+ | "Ltac"
+ | "Require"
+ | "Import"
+ | "Export"
+ | "Load"
+ | "Hint"
+ | "Open"
+ | "Close"
+ | "Delimit"
+ | "Transparent"
+ | "Opaque"
+ | ("Declare" space+ ("Morphism" | "Step") )
+ | ("Set" | "Unset") space+ "Printing" space+ "Coercions"
+ | "Declare" space+ ("Left" | "Right") space+ "Step"
+
+
+let section = "*" | "**" | "***" | "****"
+
+let item_space = " "
+
+let begin_hide = "(*" space* "begin" space+ "hide" space* "*)" space* nl
+let end_hide = "(*" space* "end" space+ "hide" space* "*)" space* nl
+let begin_show = "(*" space* "begin" space+ "show" space* "*)" space* nl
+let end_show = "(*" space* "end" space+ "show" space* "*)" space* nl
+(*
+let begin_verb = "(*" space* "begin" space+ "verb" space* "*)"
+let end_verb = "(*" space* "end" space+ "verb" space* "*)"
+*)
+
+(*s Scanning Coq, at beginning of line *)
+
+rule coq_bol = parse
+ | space* nl+
+ { if not (!in_proof <> None && (!Cdglobals.gallina || !Cdglobals.light))
+ then Output.empty_line_of_code ();
+ coq_bol lexbuf }
+ | space* "(**" space_nl
+ { Output.end_coq (); Output.start_doc ();
+ let eol = doc_bol lexbuf in
+ Output.end_doc (); Output.start_coq ();
+ if eol then coq_bol lexbuf else coq lexbuf }
+ | space* "Comments" space_nl
+ { Output.end_coq (); Output.start_doc (); comments lexbuf; Output.end_doc ();
+ Output.start_coq (); coq lexbuf }
+ | space* begin_hide
+ { skip_hide lexbuf; coq_bol lexbuf }
+ | space* begin_show
+ { begin_show (); coq_bol lexbuf }
+ | space* end_show
+ { end_show (); coq_bol lexbuf }
+ | space* gallina_kw_to_hide
+ { let s = lexeme lexbuf in
+ if !Cdglobals.light && section_or_end s then
+ let eol = skip_to_dot lexbuf in
+ if eol then (coq_bol lexbuf) else coq lexbuf
+ else
+ begin
+ output_indented_keyword s lexbuf;
+ let eol = body lexbuf in
+ if eol then coq_bol lexbuf else coq lexbuf
+ end }
+ | space* thm_token
+ { let s = lexeme lexbuf in
+ output_indented_keyword s lexbuf;
+ let eol = body lexbuf in
+ in_proof := Some eol;
+ if eol then coq_bol lexbuf else coq lexbuf }
+ | space* prf_token
+ { in_proof := Some true;
+ let eol =
+ if not !Cdglobals.gallina then
+ begin backtrack lexbuf; body_bol lexbuf end
+ else
+ let s = lexeme lexbuf in
+ if s.[String.length s - 1] = '.' then false
+ else skip_to_dot lexbuf
+ in if eol then coq_bol lexbuf else coq lexbuf }
+ | space* end_kw {
+ let eol =
+ if not (!in_proof <> None && !Cdglobals.gallina) then
+ begin backtrack lexbuf; body_bol lexbuf end
+ else skip_to_dot lexbuf
+ in
+ in_proof := None;
+ if eol then coq_bol lexbuf else coq lexbuf }
+ | space* gallina_kw
+ {
+ in_proof := None;
+ let s = lexeme lexbuf in
+ output_indented_keyword s lexbuf;
+ let eol= body lexbuf in
+ if eol then coq_bol lexbuf else coq lexbuf }
+ | space* prog_kw
+ {
+ in_proof := None;
+ let s = lexeme lexbuf in
+ output_indented_keyword s lexbuf;
+ let eol= body lexbuf in
+ if eol then coq_bol lexbuf else coq lexbuf }
+ | space* notation_kw space*
+ { let s = lexeme lexbuf in
+ output_indented_keyword s lexbuf;
+ let eol= start_notation_string lexbuf in
+ if eol then coq_bol lexbuf else coq lexbuf }
+
+ | space* "(**" space+ "printing" space+ printing_token space+
+ { let tok = lexeme lexbuf in
+ let s = printing_token_body lexbuf in
+ add_printing_token tok s;
+ coq_bol lexbuf }
+ | space* "(**" space+ "printing" space+
+ { eprintf "warning: bad 'printing' command at character %d\n"
+ (lexeme_start lexbuf); flush stderr;
+ comment_level := 1;
+ ignore (comment lexbuf);
+ coq_bol lexbuf }
+ | space* "(**" space+ "remove" space+ "printing" space+
+ printing_token space* "*)"
+ { remove_printing_token (lexeme lexbuf);
+ coq_bol lexbuf }
+ | space* "(**" space+ "remove" space+ "printing" space+
+ { eprintf "warning: bad 'remove printing' command at character %d\n"
+ (lexeme_start lexbuf); flush stderr;
+ comment_level := 1;
+ ignore (comment lexbuf);
+ coq_bol lexbuf }
+ | space* "(*"
+ { comment_level := 1;
+ if !Cdglobals.parse_comments then begin
+ let s = lexeme lexbuf in
+ let nbsp,isp = count_spaces s in
+ Output.indentation nbsp;
+ Output.start_comment ();
+ end;
+ let eol = comment lexbuf in
+ if eol then coq_bol lexbuf else coq lexbuf }
+ | eof
+ { () }
+ | _
+ { let eol =
+ if not !Cdglobals.gallina then
+ begin backtrack lexbuf; body_bol lexbuf end
+ else
+ skip_to_dot lexbuf
+ in
+ if eol then coq_bol lexbuf else coq lexbuf }
+
+(*s Scanning Coq elsewhere *)
+
+and coq = parse
+ | nl
+ { if not (!in_proof <> None && !Cdglobals.gallina) then Output.line_break(); coq_bol lexbuf }
+ | "(**" space_nl
+ { Output.end_coq (); Output.start_doc ();
+ let eol = doc_bol lexbuf in
+ Output.end_doc (); Output.start_coq ();
+ if eol then coq_bol lexbuf else coq lexbuf }
+ | "(*"
+ { comment_level := 1;
+ if !Cdglobals.parse_comments then begin
+ let s = lexeme lexbuf in
+ let nbsp,isp = count_spaces s in
+ Output.indentation nbsp;
+ Output.start_comment ();
+ end;
+ let eol = comment lexbuf in
+ if eol then coq_bol lexbuf
+ else coq lexbuf
+ }
+ | nl+ space* "]]"
+ { if not !formatted then
+ begin
+ (* Isn't this an anomaly *)
+ let s = lexeme lexbuf in
+ let nlsp,s = remove_newline s in
+ let nbsp,isp = count_spaces s in
+ Output.indentation nbsp;
+ let loc = lexeme_start lexbuf + isp + nlsp in
+ Output.sublexer ']' loc;
+ Output.sublexer ']' (loc+1);
+ coq lexbuf
+ end }
+ | eof
+ { () }
+ | gallina_kw_to_hide
+ { let s = lexeme lexbuf in
+ if !Cdglobals.light && section_or_end s then
+ begin
+ let eol = skip_to_dot lexbuf in
+ if eol then coq_bol lexbuf else coq lexbuf
+ end
+ else
+ begin
+ Output.ident s (lexeme_start lexbuf);
+ let eol=body lexbuf in
+ if eol then coq_bol lexbuf else coq lexbuf
+ end }
+ | prf_token
+ { let eol =
+ if not !Cdglobals.gallina then
+ begin backtrack lexbuf; body_bol lexbuf end
+ else
+ let s = lexeme lexbuf in
+ let eol =
+ if s.[String.length s - 1] = '.' then false
+ else skip_to_dot lexbuf
+ in
+ eol
+ in if eol then coq_bol lexbuf else coq lexbuf }
+ | end_kw {
+ let eol =
+ if not !Cdglobals.gallina then
+ begin backtrack lexbuf; body lexbuf end
+ else
+ let eol = skip_to_dot lexbuf in
+ if !in_proof <> Some true && eol then
+ Output.line_break ();
+ eol
+ in
+ in_proof := None;
+ if eol then coq_bol lexbuf else coq lexbuf }
+ | gallina_kw
+ { let s = lexeme lexbuf in
+ Output.ident s (lexeme_start lexbuf);
+ let eol = body lexbuf in
+ if eol then coq_bol lexbuf else coq lexbuf }
+ | notation_kw space*
+ { let s = lexeme lexbuf in
+ Output.ident s (lexeme_start lexbuf);
+ let eol= start_notation_string lexbuf in
+ if eol then coq_bol lexbuf else coq lexbuf }
+ | prog_kw
+ { let s = lexeme lexbuf in
+ Output.ident s (lexeme_start lexbuf);
+ let eol = body lexbuf in
+ if eol then coq_bol lexbuf else coq lexbuf }
+ | space+ { Output.char ' '; coq lexbuf }
+ | eof
+ { () }
+ | _ { let eol =
+ if not !Cdglobals.gallina then
+ begin backtrack lexbuf; body lexbuf end
+ else
+ skip_to_dot lexbuf
+ in
+ if eol then coq_bol lexbuf else coq lexbuf}
+
+(*s Scanning documentation, at beginning of line *)
+
+and doc_bol = parse
+ | space* nl+
+ { Output.paragraph (); doc_bol lexbuf }
+ | space* section space+ ([^'\n' '*'] | '*'+ [^'\n' ')' '*'])* ('*'+ '\n')?
+ { let eol, lex = strip_eol (lexeme lexbuf) in
+ let lev, s = sec_title lex in
+ if (!Cdglobals.lib_subtitles) &&
+ (subtitle (Output.get_module false) s) then
+ ()
+ else
+ Output.section lev (fun () -> ignore (doc None (from_string s)));
+ if eol then doc_bol lexbuf else doc None lexbuf }
+ | space* nl space* '-'+
+ { (* adding this production instead of just letting the paragraph
+ production and the begin list production fire eliminates
+ extra vertical whitespace. *)
+ let buf' = lexeme lexbuf in
+ let buf =
+ let bufs = Str.split_delim (Str.regexp "['\n']") buf' in
+ match bufs with
+ | (_ :: s :: []) -> s
+ | (_ :: _ :: s :: _) -> s
+ | _ -> eprintf "Internal error bad_split1 - please report\n";
+ exit 1
+ in
+ match check_start_list buf with
+ | Neither -> backtrack_past_newline lexbuf; doc None lexbuf
+ | List n -> Output.item 1; doc (Some [n]) lexbuf
+ | Rule -> Output.rule (); doc None lexbuf
+ }
+ | space* '-'+
+ { let buf = lexeme lexbuf in
+ match check_start_list buf with
+ | Neither -> backtrack lexbuf; doc None lexbuf
+ | List n -> Output.item 1; doc (Some [n]) lexbuf
+ | Rule -> Output.rule (); doc None lexbuf
+ }
+ | "<<" space*
+ { Output.start_verbatim (); verbatim lexbuf; doc_bol lexbuf }
+ | eof
+ { true }
+ | '_'
+ { if !Cdglobals.plain_comments then Output.char '_' else start_emph ();
+ doc None lexbuf }
+ | _
+ { backtrack lexbuf; doc None lexbuf }
+
+(*s Scanning lists - using whitespace *)
+and doc_list_bol indents = parse
+ | space* '-'
+ { let (n_spaces,_) = count_spaces (lexeme lexbuf) in
+ match find_level indents n_spaces with
+ | Before -> backtrack lexbuf; doc_bol lexbuf
+ | StartLevel n -> Output.item n; doc (Some (take n indents)) lexbuf
+ | InLevel (n,true) ->
+ let items = List.length indents in
+ Output.item (items+1);
+ doc (Some (List.append indents [n_spaces])) lexbuf
+ | InLevel (_,false) ->
+ backtrack lexbuf; doc_bol lexbuf
+ }
+ | "<<" space*
+ { Output.start_verbatim ();
+ verbatim lexbuf;
+ doc_list_bol indents lexbuf }
+ | "[[" nl
+ { formatted := true;
+ Output.start_inline_coq_block ();
+ ignore(body_bol lexbuf);
+ Output.end_inline_coq_block ();
+ formatted := false;
+ doc_list_bol indents lexbuf }
+ | space* nl space* '-'
+ { (* Like in the doc_bol production, these two productions
+ exist only to deal properly with whitespace *)
+ Output.paragraph ();
+ backtrack_past_newline lexbuf;
+ doc_list_bol indents lexbuf }
+ | space* nl space* _
+ { let buf' = lexeme lexbuf in
+ let buf =
+ let bufs = Str.split_delim (Str.regexp "['\n']") buf' in
+ match bufs with
+ | (_ :: s :: []) -> s
+ | (_ :: _ :: s :: _) -> s
+ | _ -> eprintf "Internal error bad_split2 - please report\n";
+ exit 1
+ in
+ let (n_spaces,_) = count_spaces buf in
+ match find_level indents n_spaces with
+ | InLevel _ ->
+ Output.paragraph ();
+ backtrack_past_newline lexbuf;
+ doc_list_bol indents lexbuf
+ | StartLevel n ->
+ if n = 1 then
+ begin
+ Output.stop_item ();
+ backtrack_past_newline lexbuf;
+ doc_bol lexbuf
+ end
+ else
+ begin
+ Output.paragraph ();
+ backtrack_past_newline lexbuf;
+ doc_list_bol indents lexbuf
+ end
+ | Before -> Output.stop_item ();
+ backtrack_past_newline lexbuf;
+ doc_bol lexbuf
+
+ }
+ | space* _
+ { let (n_spaces,_) = count_spaces (lexeme lexbuf) in
+ match find_level indents n_spaces with
+ | Before -> Output.stop_item (); backtrack lexbuf;
+ doc_bol lexbuf
+ | StartLevel n ->
+ Output.reach_item_level (n-1);
+ backtrack lexbuf;
+ doc (Some (take (n-1) indents)) lexbuf
+ | InLevel (n,_) ->
+ Output.reach_item_level n;
+ backtrack lexbuf;
+ doc (Some (take n indents)) lexbuf
+ }
+
+(*s Scanning documentation elsewhere *)
+and doc indents = parse
+ | nl
+ { Output.char '\n';
+ match indents with
+ | Some ls -> doc_list_bol ls lexbuf
+ | None -> doc_bol lexbuf }
+ | "[[" nl
+ { if !Cdglobals.plain_comments
+ then (Output.char '['; Output.char '['; doc indents lexbuf)
+ else (formatted := true;
+ Output.start_inline_coq_block ();
+ let eol = body_bol lexbuf in
+ Output.end_inline_coq_block (); formatted := false;
+ if eol then
+ match indents with
+ | Some ls -> doc_list_bol ls lexbuf
+ | None -> doc_bol lexbuf
+ else doc indents lexbuf)}
+ | "[]"
+ { Output.proofbox (); doc indents lexbuf }
+ | "["
+ { if !Cdglobals.plain_comments then Output.char '['
+ else (brackets := 1; Output.start_inline_coq (); escaped_coq lexbuf;
+ Output.end_inline_coq ()); doc indents lexbuf }
+ | "(*"
+ { backtrack lexbuf ;
+ let bol_parse = match indents with
+ | Some is -> doc_list_bol is
+ | None -> doc_bol
+ in
+ let eol = comment lexbuf in
+ if eol then bol_parse lexbuf else doc indents lexbuf
+ }
+ | '*'* "*)" space* nl
+ { true }
+ | '*'* "*)"
+ { false }
+ | "$"
+ { if !Cdglobals.plain_comments then Output.char '$'
+ else (Output.start_latex_math (); escaped_math_latex lexbuf);
+ doc indents lexbuf }
+ | "$$"
+ { if !Cdglobals.plain_comments then Output.char '$';
+ Output.char '$'; doc indents lexbuf }
+ | "%"
+ { if !Cdglobals.plain_comments then Output.char '%'
+ else escaped_latex lexbuf; doc indents lexbuf }
+ | "%%"
+ { if !Cdglobals.plain_comments then Output.char '%';
+ Output.char '%'; doc indents lexbuf }
+ | "#"
+ { if !Cdglobals.plain_comments then Output.char '#'
+ else escaped_html lexbuf; doc indents lexbuf }
+ | "##"
+ { if !Cdglobals.plain_comments then Output.char '#';
+ Output.char '#'; doc indents lexbuf }
+ | nonidentchar '_' nonidentchar
+ { List.iter (fun x -> Output.char (lexeme_char lexbuf x)) [0;1;2];
+ doc indents lexbuf}
+ | nonidentchar '_'
+ { Output.char (lexeme_char lexbuf 0);
+ if !Cdglobals.plain_comments then Output.char '_' else start_emph () ;
+ doc indents lexbuf }
+ | '_' nonidentchar
+ { if !Cdglobals.plain_comments then Output.char '_' else stop_emph () ;
+ Output.char (lexeme_char lexbuf 1);
+ doc indents lexbuf }
+ | eof
+ { false }
+ | _
+ { Output.char (lexeme_char lexbuf 0); doc indents lexbuf }
+
+(*s Various escapings *)
+
+and escaped_math_latex = parse
+ | "$" { Output.stop_latex_math () }
+ | eof { Output.stop_latex_math () }
+ | _ { Output.latex_char (lexeme_char lexbuf 0); escaped_math_latex lexbuf }
+
+and escaped_latex = parse
+ | "%" { () }
+ | eof { () }
+ | _ { Output.latex_char (lexeme_char lexbuf 0); escaped_latex lexbuf }
+
+and escaped_html = parse
+ | "#" { () }
+ | "&#"
+ { Output.html_char '&'; Output.html_char '#'; escaped_html lexbuf }
+ | "##"
+ { Output.html_char '#'; escaped_html lexbuf }
+ | eof { () }
+ | _ { Output.html_char (lexeme_char lexbuf 0); escaped_html lexbuf }
+
+and verbatim = parse
+ | nl ">>" space* nl { Output.verbatim_char '\n'; Output.stop_verbatim () }
+ | nl ">>" { Output.verbatim_char '\n'; Output.stop_verbatim () }
+ | eof { Output.stop_verbatim () }
+ | _ { Output.verbatim_char (lexeme_char lexbuf 0); verbatim lexbuf }
+
+(*s Coq, inside quotations *)
+
+and escaped_coq = parse
+ | "]"
+ { decr brackets;
+ if !brackets > 0 then
+ (Output.sublexer ']' (lexeme_start lexbuf); escaped_coq lexbuf)
+ else Tokens.flush_sublexer () }
+ | "["
+ { incr brackets;
+ Output.sublexer '[' (lexeme_start lexbuf); escaped_coq lexbuf }
+ | "(*"
+ { Tokens.flush_sublexer (); comment_level := 1;
+ ignore (comment lexbuf); escaped_coq lexbuf }
+ | "*)"
+ { (* likely to be a syntax error: we escape *) backtrack lexbuf }
+ | eof
+ { Tokens.flush_sublexer () }
+ | (identifier '.')* identifier
+ { Output.ident (lexeme lexbuf) (lexeme_start lexbuf); escaped_coq lexbuf }
+ | space
+ { Tokens.flush_sublexer(); Output.char (lexeme_char lexbuf 0);
+ escaped_coq lexbuf }
+ | _
+ { Output.sublexer (lexeme_char lexbuf 0) (lexeme_start lexbuf);
+ escaped_coq lexbuf }
+
+(*s Coq "Comments" command. *)
+
+and comments = parse
+ | space_nl+
+ { Output.char ' '; comments lexbuf }
+ | '"' [^ '"']* '"'
+ { let s = lexeme lexbuf in
+ let s = String.sub s 1 (String.length s - 2) in
+ ignore (doc None (from_string s)); comments lexbuf }
+ | ([^ '.' '"'] | '.' [^ ' ' '\t' '\n'])+
+ { escaped_coq (from_string (lexeme lexbuf)); comments lexbuf }
+ | "." (space_nl | eof)
+ { () }
+ | eof
+ { () }
+ | _
+ { Output.char (lexeme_char lexbuf 0); comments lexbuf }
+
+(*s Skip comments *)
+
+and comment = parse
+ | "(*" { incr comment_level;
+ if !Cdglobals.parse_comments then Output.start_comment ();
+ comment lexbuf }
+ | "*)" space* nl {
+ if !Cdglobals.parse_comments then
+ (Output.end_comment (); Output.line_break ());
+ decr comment_level; if !comment_level > 0 then comment lexbuf else true }
+ | "*)" {
+ if !Cdglobals.parse_comments then (Output.end_comment ());
+ decr comment_level; if !comment_level > 0 then comment lexbuf else false }
+ | "[" {
+ if !Cdglobals.parse_comments then
+ if !Cdglobals.plain_comments then Output.char '['
+ else (brackets := 1; Output.start_inline_coq ();
+ escaped_coq lexbuf; Output.end_inline_coq ());
+ comment lexbuf }
+ | "[[" nl {
+ if !Cdglobals.parse_comments then
+ if !Cdglobals.plain_comments then (Output.char '['; Output.char '[')
+ else (formatted := true;
+ Output.start_inline_coq_block ();
+ let _ = body_bol lexbuf in
+ Output.end_inline_coq_block (); formatted := false);
+ comment lexbuf}
+ | "$"
+ { if !Cdglobals.parse_comments then
+ if !Cdglobals.plain_comments then Output.char '$'
+ else (Output.start_latex_math (); escaped_math_latex lexbuf);
+ comment lexbuf }
+ | "$$"
+ { if !Cdglobals.parse_comments
+ then
+ (if !Cdglobals.plain_comments then Output.char '$'; Output.char '$');
+ doc None lexbuf }
+ | "%"
+ { if !Cdglobals.parse_comments
+ then
+ if !Cdglobals.plain_comments then Output.char '%'
+ else escaped_latex lexbuf; comment lexbuf }
+ | "%%"
+ { if !Cdglobals.parse_comments
+ then
+ (if !Cdglobals.plain_comments then Output.char '%'; Output.char '%');
+ comment lexbuf }
+ | "#"
+ { if !Cdglobals.parse_comments
+ then
+ if !Cdglobals.plain_comments then Output.char '$'
+ else escaped_html lexbuf; comment lexbuf }
+ | "##"
+ { if !Cdglobals.parse_comments
+ then
+ (if !Cdglobals.plain_comments then Output.char '#'; Output.char '#');
+ comment lexbuf }
+ | eof { false }
+ | space+ { if !Cdglobals.parse_comments
+ then Output.indentation (fst (count_spaces (lexeme lexbuf)));
+ comment lexbuf }
+ | nl { if !Cdglobals.parse_comments
+ then Output.line_break (); comment lexbuf }
+ | _ { if !Cdglobals.parse_comments then Output.char (lexeme_char lexbuf 0);
+ comment lexbuf }
+
+and skip_to_dot = parse
+ | '.' space* nl { true }
+ | eof | '.' space+ { false }
+ | "(*" { comment_level := 1; ignore (comment lexbuf); skip_to_dot lexbuf }
+ | _ { skip_to_dot lexbuf }
+
+and body_bol = parse
+ | space+
+ { Output.indentation (fst (count_spaces (lexeme lexbuf))); body lexbuf }
+ | _ { backtrack lexbuf; Output.indentation 0; body lexbuf }
+
+and body = parse
+ | nl {Tokens.flush_sublexer(); Output.line_break(); new_line lexbuf; body_bol lexbuf}
+ | nl+ space* "]]" space* nl
+ { Tokens.flush_sublexer();
+ if not !formatted then
+ begin
+ let s = lexeme lexbuf in
+ let nlsp,s = remove_newline s in
+ let _,isp = count_spaces s in
+ let loc = lexeme_start lexbuf + nlsp + isp in
+ Output.sublexer ']' loc;
+ Output.sublexer ']' (loc+1);
+ Tokens.flush_sublexer();
+ body lexbuf
+ end
+ else
+ begin
+ Output.paragraph ();
+ true
+ end }
+ | "]]" space* nl
+ { Tokens.flush_sublexer();
+ if not !formatted then
+ begin
+ let loc = lexeme_start lexbuf in
+ Output.sublexer ']' loc;
+ Output.sublexer ']' (loc+1);
+ Tokens.flush_sublexer();
+ Output.line_break();
+ body lexbuf
+ end
+ else
+ begin
+ Output.paragraph ();
+ true
+ end }
+ | eof { Tokens.flush_sublexer(); false }
+ | '.' space* nl | '.' space* eof
+ { Tokens.flush_sublexer(); Output.char '.'; Output.line_break();
+ if not !formatted then true else body_bol lexbuf }
+ | '.' space* nl "]]" space* nl
+ { Tokens.flush_sublexer(); Output.char '.';
+ if not !formatted then
+ begin
+ eprintf "Error: stray ]] at %d\n" (lexeme_start lexbuf);
+ flush stderr;
+ exit 1
+ end
+ else
+ begin
+ Output.paragraph ();
+ true
+ end
+ }
+ | '.' space+
+ { Tokens.flush_sublexer(); Output.char '.'; Output.char ' ';
+ if not !formatted then false else body lexbuf }
+ | "(**" space_nl
+ { Tokens.flush_sublexer(); Output.end_coq (); Output.start_doc ();
+ let eol = doc_bol lexbuf in
+ Output.end_doc (); Output.start_coq ();
+ if eol then body_bol lexbuf else body lexbuf }
+ | "(*" { Tokens.flush_sublexer(); comment_level := 1;
+ if !Cdglobals.parse_comments then Output.start_comment ();
+ let eol = comment lexbuf in
+ if eol
+ then begin if not !Cdglobals.parse_comments then Output.line_break(); body_bol lexbuf end
+ else body lexbuf }
+ | "where" space*
+ { Tokens.flush_sublexer();
+ Output.ident (lexeme lexbuf) (lexeme_start lexbuf);
+ start_notation_string lexbuf }
+ | identifier
+ { Tokens.flush_sublexer();
+ Output.ident (lexeme lexbuf) (lexeme_start lexbuf);
+ body lexbuf }
+ | ".."
+ { Tokens.flush_sublexer(); Output.char '.'; Output.char '.';
+ body lexbuf }
+ | '"'
+ { Tokens.flush_sublexer(); Output.char '"';
+ string lexbuf;
+ body lexbuf }
+ | space
+ { Tokens.flush_sublexer(); Output.char (lexeme_char lexbuf 0);
+ body lexbuf }
+
+ | _ { let c = lexeme_char lexbuf 0 in
+ Output.sublexer c (lexeme_start lexbuf);
+ body lexbuf }
+
+and start_notation_string = parse
+ | '"' (* a true notation *)
+ { Output.sublexer '"' (lexeme_start lexbuf);
+ notation_string lexbuf;
+ body lexbuf }
+ | _ (* an abbreviation *)
+ { backtrack lexbuf; body lexbuf }
+
+and notation_string = parse
+ | "\"\""
+ { Output.char '"'; Output.char '"'; (* Unlikely! *)
+ notation_string lexbuf }
+ | '"'
+ { Tokens.flush_sublexer(); Output.char '"' }
+ | _ { let c = lexeme_char lexbuf 0 in
+ Output.sublexer c (lexeme_start lexbuf);
+ notation_string lexbuf }
+
+and string = parse
+ | "\"\"" { Output.char '"'; Output.char '"'; string lexbuf }
+ | '"' { Output.char '"' }
+ | _ { let c = lexeme_char lexbuf 0 in Output.char c; string lexbuf }
+
+and skip_hide = parse
+ | eof | end_hide { () }
+ | _ { skip_hide lexbuf }
+
+(*s Reading token pretty-print *)
+
+and printing_token_body = parse
+ | "*)" nl? | eof
+ { let s = Buffer.contents token_buffer in
+ Buffer.clear token_buffer;
+ s }
+ | _ { Buffer.add_string token_buffer (lexeme lexbuf);
+ printing_token_body lexbuf }
+
+(*s A small scanner to support the chapter subtitle feature *)
+and st_start m = parse
+ | "(*" "*"+ space+ "*" space+
+ { st_modname m lexbuf }
+ | _
+ { None }
+
+and st_modname m = parse
+ | identifier space* ":" space*
+ { if subtitle m (lexeme lexbuf) then
+ st_subtitle lexbuf
+ else
+ None
+ }
+ | _
+ { None }
+
+and st_subtitle = parse
+ | [^ '\n']* '\n'
+ { let st = lexeme lexbuf in
+ let i = try Str.search_forward (Str.regexp "\\**)") st 0 with
+ Not_found ->
+ (eprintf "unterminated comment at beginning of file\n";
+ exit 1)
+ in
+ Some (cut_head_tail_spaces (String.sub st 0 i))
+ }
+ | _
+ { None }
+(*s Applying the scanners to files *)
+
+{
+ let coq_file f m =
+ reset ();
+ let c = open_in f in
+ let lb = from_channel c in
+ (Index.current_library := m;
+ Output.initialize ();
+ Output.start_module ();
+ Output.start_coq (); coq_bol lb; Output.end_coq ();
+ close_in c)
+
+ let detect_subtitle f m =
+ let c = open_in f in
+ let lb = from_channel c in
+ let sub = st_start m lb in
+ close_in c;
+ sub
+}