diff options
Diffstat (limited to 'tools/coqdoc/cpretty.mll')
-rw-r--r-- | tools/coqdoc/cpretty.mll | 1176 |
1 files changed, 1176 insertions, 0 deletions
diff --git a/tools/coqdoc/cpretty.mll b/tools/coqdoc/cpretty.mll new file mode 100644 index 00000000..be8bc85d --- /dev/null +++ b/tools/coqdoc/cpretty.mll @@ -0,0 +1,1176 @@ +(* -*- compile-command: "make -C ../.. bin/coqdoc" -*- *) +(************************************************************************) +(* v * The Coq Proof Assistant / The Coq Development Team *) +(* <O___,, * CNRS-Ecole Polytechnique-INRIA Futurs-Universite Paris Sud *) +(* \VV/ **************************************************************) +(* // * This file is distributed under the terms of the *) +(* * GNU Lesser General Public License Version 2.1 *) +(************************************************************************) + +(*i $Id$ i*) + +(*s Utility functions for the scanners *) + +{ + open Printf + open Lexing + + (* A function that emulates Lexing.new_line (which does not exist in OCaml < 3.11.0) *) + let new_line lexbuf = + let pos = lexbuf.lex_curr_p in + lexbuf.lex_curr_p <- { pos with + pos_lnum = pos.pos_lnum + 1; + pos_bol = pos.pos_cnum } + + (* A list function we need *) + let rec take n ls = + if n = 0 then [] else + match ls with + | [] -> [] + | (l :: ls) -> l :: (take (n-1) ls) + + (* count the number of spaces at the beginning of a string *) + let count_spaces s = + let n = String.length s in + let rec count c i = + if i == n then c,i else match s.[i] with + | '\t' -> count (c + (8 - (c mod 8))) (i + 1) + | ' ' -> count (c + 1) (i + 1) + | _ -> c,i + in + count 0 0 + + let remove_newline s = + let n = String.length s in + let rec count i = if i == n || s.[i] <> '\n' then i else count (i + 1) in + let i = count 0 in + i, String.sub s i (n - i) + + let count_dashes s = + let c = ref 0 in + for i = 0 to String.length s - 1 do if s.[i] = '-' then incr c done; + !c + + let cut_head_tail_spaces s = + let n = String.length s in + let rec look_up i = if i == n || s.[i] <> ' ' then i else look_up (i+1) in + let rec look_dn i = if i == -1 || s.[i] <> ' ' then i else look_dn (i-1) in + let l = look_up 0 in + let r = look_dn (n-1) in + if l <= r then String.sub s l (r-l+1) else s + + let sec_title s = + let rec count lev i = + if s.[i] = '*' then + count (succ lev) (succ i) + else + let t = String.sub s i (String.length s - i) in + lev, cut_head_tail_spaces t + in + count 0 (String.index s '*') + + let strip_eol s = + let eol = s.[String.length s - 1] = '\n' in + (eol, if eol then String.sub s 1 (String.length s - 1) else s) + + + let formatted = ref false + let brackets = ref 0 + let comment_level = ref 0 + let in_proof = ref None + let in_emph = ref false + + let start_emph () = in_emph := true; Output.start_emph () + let stop_emph () = if !in_emph then (Output.stop_emph (); in_emph := false) + + let backtrack lexbuf = lexbuf.lex_curr_pos <- lexbuf.lex_start_pos; + lexbuf.lex_curr_p <- lexbuf.lex_start_p + + let backtrack_past_newline lexbuf = + let buf = lexeme lexbuf in + let splits = Str.bounded_split_delim (Str.regexp "['\n']") buf 2 in + match splits with + | [] -> () + | (_ :: []) -> () + | (s1 :: rest :: _) -> + let length_skip = 1 + String.length s1 in + lexbuf.lex_curr_pos <- lexbuf.lex_start_pos + length_skip + + let is_space = function ' ' | '\t' | '\n' | '\r' -> true | _ -> false + + (* saving/restoring the PP state *) + + type state = { + st_gallina : bool; + st_light : bool + } + + let state_stack = Stack.create () + + let save_state () = + Stack.push { st_gallina = !Cdglobals.gallina; st_light = !Cdglobals.light } state_stack + + let restore_state () = + let s = Stack.pop state_stack in + Cdglobals.gallina := s.st_gallina; + Cdglobals.light := s.st_light + + let without_ref r f x = save_state (); r := false; f x; restore_state () + + let without_gallina = without_ref Cdglobals.gallina + + let without_light = without_ref Cdglobals.light + + let show_all f = without_gallina (without_light f) + + let begin_show () = save_state (); Cdglobals.gallina := false; Cdglobals.light := false + let end_show () = restore_state () + + (* Reset the globals *) + + let reset () = + formatted := false; + brackets := 0; + comment_level := 0 + + (* erasing of Section/End *) + + let section_re = Str.regexp "[ \t]*Section" + let end_re = Str.regexp "[ \t]*End" + let is_section s = Str.string_match section_re s 0 + let is_end s = Str.string_match end_re s 0 + + let sections_to_close = ref 0 + + let section_or_end s = + if is_section s then begin + incr sections_to_close; true + end else if is_end s then begin + if !sections_to_close > 0 then begin + decr sections_to_close; true + end else + false + end else + true + + (* for item lists *) + type list_compare = + | Before + | StartLevel of int + | InLevel of int * bool + + (* Before : we're before any levels + StartLevel : at the same column as the dash in a level + InLevel : after the dash of this level, but before any deeper dashes. + bool is true if this is the last level *) + let find_level levels cur_indent = + match levels with + | [] -> Before + | (l::ls) -> + if cur_indent < l then Before + else + (* cur_indent will never be less than the head of the list *) + let rec findind ls n = + match ls with + | [] -> InLevel (n,true) + | (l :: []) -> if cur_indent = l then StartLevel n + else InLevel (n,true) + | (l1 :: l2 :: ls) -> + if cur_indent = l1 then StartLevel n + else if cur_indent < l2 then InLevel (n,false) + else findind (l2 :: ls) (n+1) + in + findind (l::ls) 1 + + type is_start_list = + | Rule + | List of int + | Neither + + let check_start_list str = + let n_dashes = count_dashes str in + let (n_spaces,_) = count_spaces str in + if n_dashes >= 4 && not !Cdglobals.plain_comments then + Rule + else + if n_dashes = 1 && not !Cdglobals.plain_comments then + List n_spaces + else + Neither + + (* examine a string for subtitleness *) + let subtitle m s = + match Str.split_delim (Str.regexp ":") s with + | [] -> false + | (name::_) -> + if (cut_head_tail_spaces name) = m then + true + else + false + + + (* tokens pretty-print *) + + let token_buffer = Buffer.create 1024 + + let token_re = + Str.regexp "[ \t]*(\\*\\*[ \t]+printing[ \t]+\\([^ \t]+\\)" + let printing_token_re = + Str.regexp + "[ \t]*\\(\\(%\\([^%]*\\)%\\)\\|\\(\\$[^$]*\\$\\)\\)?[ \t]*\\(#\\(\\(&#\\|[^#]\\)*\\)#\\)?" + + let add_printing_token toks pps = + try + if Str.string_match token_re toks 0 then + let tok = Str.matched_group 1 toks in + if Str.string_match printing_token_re pps 0 then + let pp = + (try Some (Str.matched_group 3 pps) with _ -> + try Some (Str.matched_group 4 pps) with _ -> None), + (try Some (Str.matched_group 6 pps) with _ -> None) + in + Output.add_printing_token tok pp + with _ -> + () + + let remove_token_re = + Str.regexp + "[ \t]*(\\*\\*[ \t]+remove[ \t]+printing[ \t]+\\([^ \t]+\\)[ \t]*\\*)" + + let remove_printing_token toks = + try + if Str.string_match remove_token_re toks 0 then + let tok = Str.matched_group 1 toks in + Output.remove_printing_token tok + with _ -> + () + + let extract_ident_re = Str.regexp "([ \t]*\\([^ \t]+\\)[ \t]*:=" + let extract_ident s = + assert (String.length s >= 3); + if Str.string_match extract_ident_re s 0 then + Str.matched_group 1 s + else + String.sub s 1 (String.length s - 3) + + let output_indented_keyword s lexbuf = + let nbsp,isp = count_spaces s in + Output.indentation nbsp; + let s = String.sub s isp (String.length s - isp) in + Output.ident s (lexeme_start lexbuf + isp) + +} + +(*s Regular expressions *) + +let space = [' ' '\t'] +let space_nl = [' ' '\t' '\n' '\r'] +let nl = "\r\n" | '\n' + +let firstchar = + ['A'-'Z' 'a'-'z' '_'] | + (* superscript 1 *) + '\194' '\185' | + (* utf-8 latin 1 supplement *) + '\195' ['\128'-'\191'] | + (* utf-8 letterlike symbols *) + (* '\206' ([ '\145' - '\183'] | '\187') | *) + (* '\xCF' [ '\x00' - '\xCE' ] | *) + (* utf-8 letterlike symbols *) + '\206' ('\160' | [ '\177'-'\183'] | '\187') | + '\226' ('\130' [ '\128'-'\137' ] (* subscripts *) + | '\129' [ '\176'-'\187' ] (* superscripts *) + | '\132' ['\128'-'\191'] | '\133' ['\128'-'\143']) +let identchar = + firstchar | ['\'' '0'-'9' '@' ] +let id = firstchar identchar* +let pfx_id = (id '.')* +let identifier = + id | pfx_id id + +(* This misses unicode stuff, and it adds "[" and "]". It's only an + approximation of idents - used for detecting whether an underscore + is part of an identifier or meant to indicate emphasis *) +let nonidentchar = [^ 'A'-'Z' 'a'-'z' '_' '[' ']' '\'' '0'-'9' '@' ] + +let printing_token = [^ ' ' '\t']* + +let thm_token = + "Theorem" + | "Lemma" + | "Fact" + | "Remark" + | "Corollary" + | "Proposition" + | "Property" + | "Goal" + +let prf_token = + "Next" space+ "Obligation" + | "Proof" (space* "." | space+ "with") + +let def_token = + "Definition" + | "Let" + | "Class" + | "SubClass" + | "Example" + | "Local" + | "Fixpoint" + | "Boxed" + | "CoFixpoint" + | "Record" + | "Structure" + | "Scheme" + | "Inductive" + | "CoInductive" + | "Equations" + | "Instance" + | "Global" space+ "Instance" + +let decl_token = + "Hypothesis" + | "Hypotheses" + | "Parameter" + | "Axiom" 's'? + | "Conjecture" + +let gallina_ext = + "Module" + | "Include" space+ "Type" + | "Include" + | "Declare" space+ "Module" + | "Transparent" + | "Opaque" + | "Canonical" + | "Coercion" + | "Identity" + | "Implicit" + | "Tactic" space+ "Notation" + | "Section" + | "Context" + | "Variable" 's'? + | ("Hypothesis" | "Hypotheses") + | "End" + +let notation_kw = + "Notation" + | "Infix" + | "Reserved" space+ "Notation" + +let commands = + "Pwd" + | "Cd" + | "Drop" + | "ProtectedLoop" + | "Quit" + | "Load" + | "Add" + | "Remove" space+ "Loadpath" + | "Print" + | "Inspect" + | "About" + | "Search" + | "Eval" + | "Reset" + | "Check" + | "Type" + + | "Section" + | "Chapter" + | "Variable" 's'? + | ("Hypothesis" | "Hypotheses") + | "End" + +let end_kw = "Qed" | "Defined" | "Save" | "Admitted" | "Abort" + +let extraction = + "Extraction" + | "Recursive" space+ "Extraction" + | "Extract" + +let gallina_kw = thm_token | def_token | decl_token | gallina_ext | commands | extraction + +let prog_kw = + "Program" space+ gallina_kw + | "Obligation" + | "Obligations" + | "Solve" + +let gallina_kw_to_hide = + "Implicit" space+ "Arguments" + | "Ltac" + | "Require" + | "Import" + | "Export" + | "Load" + | "Hint" + | "Open" + | "Close" + | "Delimit" + | "Transparent" + | "Opaque" + | ("Declare" space+ ("Morphism" | "Step") ) + | ("Set" | "Unset") space+ "Printing" space+ "Coercions" + | "Declare" space+ ("Left" | "Right") space+ "Step" + + +let section = "*" | "**" | "***" | "****" + +let item_space = " " + +let begin_hide = "(*" space* "begin" space+ "hide" space* "*)" space* nl +let end_hide = "(*" space* "end" space+ "hide" space* "*)" space* nl +let begin_show = "(*" space* "begin" space+ "show" space* "*)" space* nl +let end_show = "(*" space* "end" space+ "show" space* "*)" space* nl +(* +let begin_verb = "(*" space* "begin" space+ "verb" space* "*)" +let end_verb = "(*" space* "end" space+ "verb" space* "*)" +*) + +(*s Scanning Coq, at beginning of line *) + +rule coq_bol = parse + | space* nl+ + { if not (!in_proof <> None && (!Cdglobals.gallina || !Cdglobals.light)) + then Output.empty_line_of_code (); + coq_bol lexbuf } + | space* "(**" space_nl + { Output.end_coq (); Output.start_doc (); + let eol = doc_bol lexbuf in + Output.end_doc (); Output.start_coq (); + if eol then coq_bol lexbuf else coq lexbuf } + | space* "Comments" space_nl + { Output.end_coq (); Output.start_doc (); comments lexbuf; Output.end_doc (); + Output.start_coq (); coq lexbuf } + | space* begin_hide + { skip_hide lexbuf; coq_bol lexbuf } + | space* begin_show + { begin_show (); coq_bol lexbuf } + | space* end_show + { end_show (); coq_bol lexbuf } + | space* gallina_kw_to_hide + { let s = lexeme lexbuf in + if !Cdglobals.light && section_or_end s then + let eol = skip_to_dot lexbuf in + if eol then (coq_bol lexbuf) else coq lexbuf + else + begin + output_indented_keyword s lexbuf; + let eol = body lexbuf in + if eol then coq_bol lexbuf else coq lexbuf + end } + | space* thm_token + { let s = lexeme lexbuf in + output_indented_keyword s lexbuf; + let eol = body lexbuf in + in_proof := Some eol; + if eol then coq_bol lexbuf else coq lexbuf } + | space* prf_token + { in_proof := Some true; + let eol = + if not !Cdglobals.gallina then + begin backtrack lexbuf; body_bol lexbuf end + else + let s = lexeme lexbuf in + if s.[String.length s - 1] = '.' then false + else skip_to_dot lexbuf + in if eol then coq_bol lexbuf else coq lexbuf } + | space* end_kw { + let eol = + if not (!in_proof <> None && !Cdglobals.gallina) then + begin backtrack lexbuf; body_bol lexbuf end + else skip_to_dot lexbuf + in + in_proof := None; + if eol then coq_bol lexbuf else coq lexbuf } + | space* gallina_kw + { + in_proof := None; + let s = lexeme lexbuf in + output_indented_keyword s lexbuf; + let eol= body lexbuf in + if eol then coq_bol lexbuf else coq lexbuf } + | space* prog_kw + { + in_proof := None; + let s = lexeme lexbuf in + output_indented_keyword s lexbuf; + let eol= body lexbuf in + if eol then coq_bol lexbuf else coq lexbuf } + | space* notation_kw space* + { let s = lexeme lexbuf in + output_indented_keyword s lexbuf; + let eol= start_notation_string lexbuf in + if eol then coq_bol lexbuf else coq lexbuf } + + | space* "(**" space+ "printing" space+ printing_token space+ + { let tok = lexeme lexbuf in + let s = printing_token_body lexbuf in + add_printing_token tok s; + coq_bol lexbuf } + | space* "(**" space+ "printing" space+ + { eprintf "warning: bad 'printing' command at character %d\n" + (lexeme_start lexbuf); flush stderr; + comment_level := 1; + ignore (comment lexbuf); + coq_bol lexbuf } + | space* "(**" space+ "remove" space+ "printing" space+ + printing_token space* "*)" + { remove_printing_token (lexeme lexbuf); + coq_bol lexbuf } + | space* "(**" space+ "remove" space+ "printing" space+ + { eprintf "warning: bad 'remove printing' command at character %d\n" + (lexeme_start lexbuf); flush stderr; + comment_level := 1; + ignore (comment lexbuf); + coq_bol lexbuf } + | space* "(*" + { comment_level := 1; + if !Cdglobals.parse_comments then begin + let s = lexeme lexbuf in + let nbsp,isp = count_spaces s in + Output.indentation nbsp; + Output.start_comment (); + end; + let eol = comment lexbuf in + if eol then coq_bol lexbuf else coq lexbuf } + | eof + { () } + | _ + { let eol = + if not !Cdglobals.gallina then + begin backtrack lexbuf; body_bol lexbuf end + else + skip_to_dot lexbuf + in + if eol then coq_bol lexbuf else coq lexbuf } + +(*s Scanning Coq elsewhere *) + +and coq = parse + | nl + { if not (!in_proof <> None && !Cdglobals.gallina) then Output.line_break(); coq_bol lexbuf } + | "(**" space_nl + { Output.end_coq (); Output.start_doc (); + let eol = doc_bol lexbuf in + Output.end_doc (); Output.start_coq (); + if eol then coq_bol lexbuf else coq lexbuf } + | "(*" + { comment_level := 1; + if !Cdglobals.parse_comments then begin + let s = lexeme lexbuf in + let nbsp,isp = count_spaces s in + Output.indentation nbsp; + Output.start_comment (); + end; + let eol = comment lexbuf in + if eol then coq_bol lexbuf + else coq lexbuf + } + | nl+ space* "]]" + { if not !formatted then + begin + (* Isn't this an anomaly *) + let s = lexeme lexbuf in + let nlsp,s = remove_newline s in + let nbsp,isp = count_spaces s in + Output.indentation nbsp; + let loc = lexeme_start lexbuf + isp + nlsp in + Output.sublexer ']' loc; + Output.sublexer ']' (loc+1); + coq lexbuf + end } + | eof + { () } + | gallina_kw_to_hide + { let s = lexeme lexbuf in + if !Cdglobals.light && section_or_end s then + begin + let eol = skip_to_dot lexbuf in + if eol then coq_bol lexbuf else coq lexbuf + end + else + begin + Output.ident s (lexeme_start lexbuf); + let eol=body lexbuf in + if eol then coq_bol lexbuf else coq lexbuf + end } + | prf_token + { let eol = + if not !Cdglobals.gallina then + begin backtrack lexbuf; body_bol lexbuf end + else + let s = lexeme lexbuf in + let eol = + if s.[String.length s - 1] = '.' then false + else skip_to_dot lexbuf + in + eol + in if eol then coq_bol lexbuf else coq lexbuf } + | end_kw { + let eol = + if not !Cdglobals.gallina then + begin backtrack lexbuf; body lexbuf end + else + let eol = skip_to_dot lexbuf in + if !in_proof <> Some true && eol then + Output.line_break (); + eol + in + in_proof := None; + if eol then coq_bol lexbuf else coq lexbuf } + | gallina_kw + { let s = lexeme lexbuf in + Output.ident s (lexeme_start lexbuf); + let eol = body lexbuf in + if eol then coq_bol lexbuf else coq lexbuf } + | notation_kw space* + { let s = lexeme lexbuf in + Output.ident s (lexeme_start lexbuf); + let eol= start_notation_string lexbuf in + if eol then coq_bol lexbuf else coq lexbuf } + | prog_kw + { let s = lexeme lexbuf in + Output.ident s (lexeme_start lexbuf); + let eol = body lexbuf in + if eol then coq_bol lexbuf else coq lexbuf } + | space+ { Output.char ' '; coq lexbuf } + | eof + { () } + | _ { let eol = + if not !Cdglobals.gallina then + begin backtrack lexbuf; body lexbuf end + else + skip_to_dot lexbuf + in + if eol then coq_bol lexbuf else coq lexbuf} + +(*s Scanning documentation, at beginning of line *) + +and doc_bol = parse + | space* nl+ + { Output.paragraph (); doc_bol lexbuf } + | space* section space+ ([^'\n' '*'] | '*'+ [^'\n' ')' '*'])* ('*'+ '\n')? + { let eol, lex = strip_eol (lexeme lexbuf) in + let lev, s = sec_title lex in + if (!Cdglobals.lib_subtitles) && + (subtitle (Output.get_module false) s) then + () + else + Output.section lev (fun () -> ignore (doc None (from_string s))); + if eol then doc_bol lexbuf else doc None lexbuf } + | space* nl space* '-'+ + { (* adding this production instead of just letting the paragraph + production and the begin list production fire eliminates + extra vertical whitespace. *) + let buf' = lexeme lexbuf in + let buf = + let bufs = Str.split_delim (Str.regexp "['\n']") buf' in + match bufs with + | (_ :: s :: []) -> s + | (_ :: _ :: s :: _) -> s + | _ -> eprintf "Internal error bad_split1 - please report\n"; + exit 1 + in + match check_start_list buf with + | Neither -> backtrack_past_newline lexbuf; doc None lexbuf + | List n -> Output.item 1; doc (Some [n]) lexbuf + | Rule -> Output.rule (); doc None lexbuf + } + | space* '-'+ + { let buf = lexeme lexbuf in + match check_start_list buf with + | Neither -> backtrack lexbuf; doc None lexbuf + | List n -> Output.item 1; doc (Some [n]) lexbuf + | Rule -> Output.rule (); doc None lexbuf + } + | "<<" space* + { Output.start_verbatim (); verbatim lexbuf; doc_bol lexbuf } + | eof + { true } + | '_' + { if !Cdglobals.plain_comments then Output.char '_' else start_emph (); + doc None lexbuf } + | _ + { backtrack lexbuf; doc None lexbuf } + +(*s Scanning lists - using whitespace *) +and doc_list_bol indents = parse + | space* '-' + { let (n_spaces,_) = count_spaces (lexeme lexbuf) in + match find_level indents n_spaces with + | Before -> backtrack lexbuf; doc_bol lexbuf + | StartLevel n -> Output.item n; doc (Some (take n indents)) lexbuf + | InLevel (n,true) -> + let items = List.length indents in + Output.item (items+1); + doc (Some (List.append indents [n_spaces])) lexbuf + | InLevel (_,false) -> + backtrack lexbuf; doc_bol lexbuf + } + | "<<" space* + { Output.start_verbatim (); + verbatim lexbuf; + doc_list_bol indents lexbuf } + | "[[" nl + { formatted := true; + Output.start_inline_coq_block (); + ignore(body_bol lexbuf); + Output.end_inline_coq_block (); + formatted := false; + doc_list_bol indents lexbuf } + | space* nl space* '-' + { (* Like in the doc_bol production, these two productions + exist only to deal properly with whitespace *) + Output.paragraph (); + backtrack_past_newline lexbuf; + doc_list_bol indents lexbuf } + | space* nl space* _ + { let buf' = lexeme lexbuf in + let buf = + let bufs = Str.split_delim (Str.regexp "['\n']") buf' in + match bufs with + | (_ :: s :: []) -> s + | (_ :: _ :: s :: _) -> s + | _ -> eprintf "Internal error bad_split2 - please report\n"; + exit 1 + in + let (n_spaces,_) = count_spaces buf in + match find_level indents n_spaces with + | InLevel _ -> + Output.paragraph (); + backtrack_past_newline lexbuf; + doc_list_bol indents lexbuf + | StartLevel n -> + if n = 1 then + begin + Output.stop_item (); + backtrack_past_newline lexbuf; + doc_bol lexbuf + end + else + begin + Output.paragraph (); + backtrack_past_newline lexbuf; + doc_list_bol indents lexbuf + end + | Before -> Output.stop_item (); + backtrack_past_newline lexbuf; + doc_bol lexbuf + + } + | space* _ + { let (n_spaces,_) = count_spaces (lexeme lexbuf) in + match find_level indents n_spaces with + | Before -> Output.stop_item (); backtrack lexbuf; + doc_bol lexbuf + | StartLevel n -> + Output.reach_item_level (n-1); + backtrack lexbuf; + doc (Some (take (n-1) indents)) lexbuf + | InLevel (n,_) -> + Output.reach_item_level n; + backtrack lexbuf; + doc (Some (take n indents)) lexbuf + } + +(*s Scanning documentation elsewhere *) +and doc indents = parse + | nl + { Output.char '\n'; + match indents with + | Some ls -> doc_list_bol ls lexbuf + | None -> doc_bol lexbuf } + | "[[" nl + { if !Cdglobals.plain_comments + then (Output.char '['; Output.char '['; doc indents lexbuf) + else (formatted := true; + Output.start_inline_coq_block (); + let eol = body_bol lexbuf in + Output.end_inline_coq_block (); formatted := false; + if eol then + match indents with + | Some ls -> doc_list_bol ls lexbuf + | None -> doc_bol lexbuf + else doc indents lexbuf)} + | "[]" + { Output.proofbox (); doc indents lexbuf } + | "[" + { if !Cdglobals.plain_comments then Output.char '[' + else (brackets := 1; Output.start_inline_coq (); escaped_coq lexbuf; + Output.end_inline_coq ()); doc indents lexbuf } + | "(*" + { backtrack lexbuf ; + let bol_parse = match indents with + | Some is -> doc_list_bol is + | None -> doc_bol + in + let eol = comment lexbuf in + if eol then bol_parse lexbuf else doc indents lexbuf + } + | '*'* "*)" space* nl + { true } + | '*'* "*)" + { false } + | "$" + { if !Cdglobals.plain_comments then Output.char '$' + else (Output.start_latex_math (); escaped_math_latex lexbuf); + doc indents lexbuf } + | "$$" + { if !Cdglobals.plain_comments then Output.char '$'; + Output.char '$'; doc indents lexbuf } + | "%" + { if !Cdglobals.plain_comments then Output.char '%' + else escaped_latex lexbuf; doc indents lexbuf } + | "%%" + { if !Cdglobals.plain_comments then Output.char '%'; + Output.char '%'; doc indents lexbuf } + | "#" + { if !Cdglobals.plain_comments then Output.char '#' + else escaped_html lexbuf; doc indents lexbuf } + | "##" + { if !Cdglobals.plain_comments then Output.char '#'; + Output.char '#'; doc indents lexbuf } + | nonidentchar '_' nonidentchar + { List.iter (fun x -> Output.char (lexeme_char lexbuf x)) [0;1;2]; + doc indents lexbuf} + | nonidentchar '_' + { Output.char (lexeme_char lexbuf 0); + if !Cdglobals.plain_comments then Output.char '_' else start_emph () ; + doc indents lexbuf } + | '_' nonidentchar + { if !Cdglobals.plain_comments then Output.char '_' else stop_emph () ; + Output.char (lexeme_char lexbuf 1); + doc indents lexbuf } + | eof + { false } + | _ + { Output.char (lexeme_char lexbuf 0); doc indents lexbuf } + +(*s Various escapings *) + +and escaped_math_latex = parse + | "$" { Output.stop_latex_math () } + | eof { Output.stop_latex_math () } + | _ { Output.latex_char (lexeme_char lexbuf 0); escaped_math_latex lexbuf } + +and escaped_latex = parse + | "%" { () } + | eof { () } + | _ { Output.latex_char (lexeme_char lexbuf 0); escaped_latex lexbuf } + +and escaped_html = parse + | "#" { () } + | "&#" + { Output.html_char '&'; Output.html_char '#'; escaped_html lexbuf } + | "##" + { Output.html_char '#'; escaped_html lexbuf } + | eof { () } + | _ { Output.html_char (lexeme_char lexbuf 0); escaped_html lexbuf } + +and verbatim = parse + | nl ">>" space* nl { Output.verbatim_char '\n'; Output.stop_verbatim () } + | nl ">>" { Output.verbatim_char '\n'; Output.stop_verbatim () } + | eof { Output.stop_verbatim () } + | _ { Output.verbatim_char (lexeme_char lexbuf 0); verbatim lexbuf } + +(*s Coq, inside quotations *) + +and escaped_coq = parse + | "]" + { decr brackets; + if !brackets > 0 then + (Output.sublexer ']' (lexeme_start lexbuf); escaped_coq lexbuf) + else Tokens.flush_sublexer () } + | "[" + { incr brackets; + Output.sublexer '[' (lexeme_start lexbuf); escaped_coq lexbuf } + | "(*" + { Tokens.flush_sublexer (); comment_level := 1; + ignore (comment lexbuf); escaped_coq lexbuf } + | "*)" + { (* likely to be a syntax error: we escape *) backtrack lexbuf } + | eof + { Tokens.flush_sublexer () } + | (identifier '.')* identifier + { Output.ident (lexeme lexbuf) (lexeme_start lexbuf); escaped_coq lexbuf } + | space + { Tokens.flush_sublexer(); Output.char (lexeme_char lexbuf 0); + escaped_coq lexbuf } + | _ + { Output.sublexer (lexeme_char lexbuf 0) (lexeme_start lexbuf); + escaped_coq lexbuf } + +(*s Coq "Comments" command. *) + +and comments = parse + | space_nl+ + { Output.char ' '; comments lexbuf } + | '"' [^ '"']* '"' + { let s = lexeme lexbuf in + let s = String.sub s 1 (String.length s - 2) in + ignore (doc None (from_string s)); comments lexbuf } + | ([^ '.' '"'] | '.' [^ ' ' '\t' '\n'])+ + { escaped_coq (from_string (lexeme lexbuf)); comments lexbuf } + | "." (space_nl | eof) + { () } + | eof + { () } + | _ + { Output.char (lexeme_char lexbuf 0); comments lexbuf } + +(*s Skip comments *) + +and comment = parse + | "(*" { incr comment_level; + if !Cdglobals.parse_comments then Output.start_comment (); + comment lexbuf } + | "*)" space* nl { + if !Cdglobals.parse_comments then + (Output.end_comment (); Output.line_break ()); + decr comment_level; if !comment_level > 0 then comment lexbuf else true } + | "*)" { + if !Cdglobals.parse_comments then (Output.end_comment ()); + decr comment_level; if !comment_level > 0 then comment lexbuf else false } + | "[" { + if !Cdglobals.parse_comments then + if !Cdglobals.plain_comments then Output.char '[' + else (brackets := 1; Output.start_inline_coq (); + escaped_coq lexbuf; Output.end_inline_coq ()); + comment lexbuf } + | "[[" nl { + if !Cdglobals.parse_comments then + if !Cdglobals.plain_comments then (Output.char '['; Output.char '[') + else (formatted := true; + Output.start_inline_coq_block (); + let _ = body_bol lexbuf in + Output.end_inline_coq_block (); formatted := false); + comment lexbuf} + | "$" + { if !Cdglobals.parse_comments then + if !Cdglobals.plain_comments then Output.char '$' + else (Output.start_latex_math (); escaped_math_latex lexbuf); + comment lexbuf } + | "$$" + { if !Cdglobals.parse_comments + then + (if !Cdglobals.plain_comments then Output.char '$'; Output.char '$'); + doc None lexbuf } + | "%" + { if !Cdglobals.parse_comments + then + if !Cdglobals.plain_comments then Output.char '%' + else escaped_latex lexbuf; comment lexbuf } + | "%%" + { if !Cdglobals.parse_comments + then + (if !Cdglobals.plain_comments then Output.char '%'; Output.char '%'); + comment lexbuf } + | "#" + { if !Cdglobals.parse_comments + then + if !Cdglobals.plain_comments then Output.char '$' + else escaped_html lexbuf; comment lexbuf } + | "##" + { if !Cdglobals.parse_comments + then + (if !Cdglobals.plain_comments then Output.char '#'; Output.char '#'); + comment lexbuf } + | eof { false } + | space+ { if !Cdglobals.parse_comments + then Output.indentation (fst (count_spaces (lexeme lexbuf))); + comment lexbuf } + | nl { if !Cdglobals.parse_comments + then Output.line_break (); comment lexbuf } + | _ { if !Cdglobals.parse_comments then Output.char (lexeme_char lexbuf 0); + comment lexbuf } + +and skip_to_dot = parse + | '.' space* nl { true } + | eof | '.' space+ { false } + | "(*" { comment_level := 1; ignore (comment lexbuf); skip_to_dot lexbuf } + | _ { skip_to_dot lexbuf } + +and body_bol = parse + | space+ + { Output.indentation (fst (count_spaces (lexeme lexbuf))); body lexbuf } + | _ { backtrack lexbuf; Output.indentation 0; body lexbuf } + +and body = parse + | nl {Tokens.flush_sublexer(); Output.line_break(); new_line lexbuf; body_bol lexbuf} + | nl+ space* "]]" space* nl + { Tokens.flush_sublexer(); + if not !formatted then + begin + let s = lexeme lexbuf in + let nlsp,s = remove_newline s in + let _,isp = count_spaces s in + let loc = lexeme_start lexbuf + nlsp + isp in + Output.sublexer ']' loc; + Output.sublexer ']' (loc+1); + Tokens.flush_sublexer(); + body lexbuf + end + else + begin + Output.paragraph (); + true + end } + | "]]" space* nl + { Tokens.flush_sublexer(); + if not !formatted then + begin + let loc = lexeme_start lexbuf in + Output.sublexer ']' loc; + Output.sublexer ']' (loc+1); + Tokens.flush_sublexer(); + Output.line_break(); + body lexbuf + end + else + begin + Output.paragraph (); + true + end } + | eof { Tokens.flush_sublexer(); false } + | '.' space* nl | '.' space* eof + { Tokens.flush_sublexer(); Output.char '.'; Output.line_break(); + if not !formatted then true else body_bol lexbuf } + | '.' space* nl "]]" space* nl + { Tokens.flush_sublexer(); Output.char '.'; + if not !formatted then + begin + eprintf "Error: stray ]] at %d\n" (lexeme_start lexbuf); + flush stderr; + exit 1 + end + else + begin + Output.paragraph (); + true + end + } + | '.' space+ + { Tokens.flush_sublexer(); Output.char '.'; Output.char ' '; + if not !formatted then false else body lexbuf } + | "(**" space_nl + { Tokens.flush_sublexer(); Output.end_coq (); Output.start_doc (); + let eol = doc_bol lexbuf in + Output.end_doc (); Output.start_coq (); + if eol then body_bol lexbuf else body lexbuf } + | "(*" { Tokens.flush_sublexer(); comment_level := 1; + if !Cdglobals.parse_comments then Output.start_comment (); + let eol = comment lexbuf in + if eol + then begin if not !Cdglobals.parse_comments then Output.line_break(); body_bol lexbuf end + else body lexbuf } + | "where" space* + { Tokens.flush_sublexer(); + Output.ident (lexeme lexbuf) (lexeme_start lexbuf); + start_notation_string lexbuf } + | identifier + { Tokens.flush_sublexer(); + Output.ident (lexeme lexbuf) (lexeme_start lexbuf); + body lexbuf } + | ".." + { Tokens.flush_sublexer(); Output.char '.'; Output.char '.'; + body lexbuf } + | '"' + { Tokens.flush_sublexer(); Output.char '"'; + string lexbuf; + body lexbuf } + | space + { Tokens.flush_sublexer(); Output.char (lexeme_char lexbuf 0); + body lexbuf } + + | _ { let c = lexeme_char lexbuf 0 in + Output.sublexer c (lexeme_start lexbuf); + body lexbuf } + +and start_notation_string = parse + | '"' (* a true notation *) + { Output.sublexer '"' (lexeme_start lexbuf); + notation_string lexbuf; + body lexbuf } + | _ (* an abbreviation *) + { backtrack lexbuf; body lexbuf } + +and notation_string = parse + | "\"\"" + { Output.char '"'; Output.char '"'; (* Unlikely! *) + notation_string lexbuf } + | '"' + { Tokens.flush_sublexer(); Output.char '"' } + | _ { let c = lexeme_char lexbuf 0 in + Output.sublexer c (lexeme_start lexbuf); + notation_string lexbuf } + +and string = parse + | "\"\"" { Output.char '"'; Output.char '"'; string lexbuf } + | '"' { Output.char '"' } + | _ { let c = lexeme_char lexbuf 0 in Output.char c; string lexbuf } + +and skip_hide = parse + | eof | end_hide { () } + | _ { skip_hide lexbuf } + +(*s Reading token pretty-print *) + +and printing_token_body = parse + | "*)" nl? | eof + { let s = Buffer.contents token_buffer in + Buffer.clear token_buffer; + s } + | _ { Buffer.add_string token_buffer (lexeme lexbuf); + printing_token_body lexbuf } + +(*s A small scanner to support the chapter subtitle feature *) +and st_start m = parse + | "(*" "*"+ space+ "*" space+ + { st_modname m lexbuf } + | _ + { None } + +and st_modname m = parse + | identifier space* ":" space* + { if subtitle m (lexeme lexbuf) then + st_subtitle lexbuf + else + None + } + | _ + { None } + +and st_subtitle = parse + | [^ '\n']* '\n' + { let st = lexeme lexbuf in + let i = try Str.search_forward (Str.regexp "\\**)") st 0 with + Not_found -> + (eprintf "unterminated comment at beginning of file\n"; + exit 1) + in + Some (cut_head_tail_spaces (String.sub st 0 i)) + } + | _ + { None } +(*s Applying the scanners to files *) + +{ + let coq_file f m = + reset (); + let c = open_in f in + let lb = from_channel c in + (Index.current_library := m; + Output.initialize (); + Output.start_module (); + Output.start_coq (); coq_bol lb; Output.end_coq (); + close_in c) + + let detect_subtitle f m = + let c = open_in f in + let lb = from_channel c in + let sub = st_start m lb in + close_in c; + sub +} |