From e031710bb59b14f39c9d1d4652296370f7aa72d3 Mon Sep 17 00:00:00 2001 From: Maxime Dénès Date: Thu, 22 Mar 2018 11:38:53 +0100 Subject: [Sphinx] Move chapter 23 to new infrastructure --- Makefile.doc | 1 - doc/refman/Extraction.tex | 620 ------------------------------------- doc/refman/Reference-Manual.tex | 1 - doc/sphinx/addendum/extraction.rst | 620 +++++++++++++++++++++++++++++++++++++ 4 files changed, 620 insertions(+), 622 deletions(-) delete mode 100644 doc/refman/Extraction.tex create mode 100644 doc/sphinx/addendum/extraction.rst diff --git a/Makefile.doc b/Makefile.doc index 8a1c6bb29..e63728649 100644 --- a/Makefile.doc +++ b/Makefile.doc @@ -60,7 +60,6 @@ REFMANCOQTEXFILES:=$(addprefix doc/refman/, \ RefMan-gal.v.tex \ RefMan-oth.v.tex RefMan-ltac.v.tex \ RefMan-pro.v.tex \ - Extraction.v.tex \ Program.v.tex Polynom.v.tex Nsatz.v.tex \ Setoid.v.tex Universes.v.tex \ Misc.v.tex) diff --git a/doc/refman/Extraction.tex b/doc/refman/Extraction.tex deleted file mode 100644 index cff7be3e9..000000000 --- a/doc/refman/Extraction.tex +++ /dev/null @@ -1,620 +0,0 @@ -\achapter{Extraction of programs in OCaml and Haskell} -%HEVEA\cutname{extraction.html} -\label{Extraction} -\aauthor{Jean-Christophe Filliâtre and Pierre Letouzey} -\index{Extraction} - -\noindent We present here the \Coq\ extraction commands, used to build certified -and relatively efficient functional programs, extracting them from -either \Coq\ functions or \Coq\ proofs of specifications. The -functional languages available as output are currently \ocaml{}, -\textsc{Haskell} and \textsc{Scheme}. In the following, ``ML'' will -be used (abusively) to refer to any of the three. - -%% \paragraph{Differences with old versions.} -%% The current extraction mechanism is new for version 7.0 of {\Coq}. -%% In particular, the \FW\ toplevel used as an intermediate step between -%% \Coq\ and ML has been withdrawn. It is also not possible -%% any more to import ML objects in this \FW\ toplevel. -%% The current mechanism also differs from -%% the one in previous versions of \Coq: there is no more -%% an explicit toplevel for the language (formerly called \textsc{Fml}). - -Before using any of the commands or options described in this chapter, -the extraction framework should first be loaded explicitly -via {\tt Require Extraction}, or via the more robust -{\tt From Coq Require Extraction}. -Note that in earlier versions of Coq, these commands and options were -directly available without any preliminary {\tt Require}. - -\begin{coq_example} -Require Extraction. -\end{coq_example} - -\asection{Generating ML code} -\comindex{Extraction} -\comindex{Recursive Extraction} -\comindex{Separate Extraction} -\comindex{Extraction Library} -\comindex{Recursive Extraction Library} - -The next two commands are meant to be used for rapid preview of -extraction. They both display extracted term(s) inside \Coq. - -\begin{description} -\item {\tt Extraction \qualid{}.} ~\par - Extraction of a constant or module in the \Coq\ toplevel. - -\item {\tt Recursive Extraction} \qualid$_1$ \dots\ \qualid$_n$. ~\par - Recursive extraction of all the globals (or modules) \qualid$_1$ \dots\ - \qualid$_n$ and all their dependencies in the \Coq\ toplevel. -\end{description} - -%% TODO error messages - -\noindent All the following commands produce real ML files. User can choose to produce -one monolithic file or one file per \Coq\ library. - -\begin{description} -\item {\tt Extraction "{\em file}"} - \qualid$_1$ \dots\ \qualid$_n$. ~\par - Recursive extraction of all the globals (or modules) \qualid$_1$ \dots\ - \qualid$_n$ and all their dependencies in one monolithic file {\em file}. - Global and local identifiers are renamed according to the chosen ML - language to fulfill its syntactic conventions, keeping original - names as much as possible. - -\item {\tt Extraction Library} \ident. ~\par - Extraction of the whole \Coq\ library {\tt\ident.v} to an ML module - {\tt\ident.ml}. In case of name clash, identifiers are here renamed - using prefixes \verb!coq_! or \verb!Coq_! to ensure a - session-independent renaming. - -\item {\tt Recursive Extraction Library} \ident. ~\par - Extraction of the \Coq\ library {\tt\ident.v} and all other modules - {\tt\ident.v} depends on. - -\item {\tt Separate Extraction} - \qualid$_1$ \dots\ \qualid$_n$. ~\par - Recursive extraction of all the globals (or modules) \qualid$_1$ \dots\ - \qualid$_n$ and all their dependencies, just as {\tt - Extraction "{\em file}"}, but instead of producing one monolithic - file, this command splits the produced code in separate ML files, one per - corresponding Coq {\tt .v} file. This command is hence quite similar - to {\tt Recursive Extraction Library}, except that only the needed - parts of Coq libraries are extracted instead of the whole. The - naming convention in case of name clash is the same one as - {\tt Extraction Library}: identifiers are here renamed - using prefixes \verb!coq_! or \verb!Coq_!. -\end{description} - -\noindent The following command is meant to help automatic testing of - the extraction, see for instance the {\tt test-suite} directory - in the \Coq\ sources. - -\begin{description} -\item {\tt Extraction TestCompile} \qualid$_1$ \dots\ \qualid$_n$. ~\par - All the globals (or modules) \qualid$_1$ \dots\ \qualid$_n$ and all - their dependencies are extracted to a temporary {\ocaml} file, just as in - {\tt Extraction "{\em file}"}. Then this temporary file and its - signature are compiled with the same {\ocaml} compiler used to built - \Coq. This command succeeds only if the extraction and the {\ocaml} - compilation succeed (and it fails if the current target language - of the extraction is not {\ocaml}). -\end{description} - -\asection{Extraction options} - -\asubsection{Setting the target language} -\comindex{Extraction Language} - -The ability to fix target language is the first and more important -of the extraction options. Default is {\ocaml}. -\begin{description} -\item {\tt Extraction Language OCaml}. -\item {\tt Extraction Language Haskell}. -\item {\tt Extraction Language Scheme}. -\end{description} - -\asubsection{Inlining and optimizations} - -Since {\ocaml} is a strict language, the extracted code has to -be optimized in order to be efficient (for instance, when using -induction principles we do not want to compute all the recursive calls -but only the needed ones). So the extraction mechanism provides an -automatic optimization routine that will be called each time the user -want to generate {\ocaml} programs. The optimizations can be split in two -groups: the type-preserving ones -- essentially constant inlining and -reductions -- and the non type-preserving ones -- some function -abstractions of dummy types are removed when it is deemed safe in order -to have more elegant types. Therefore some constants may not appear in the -resulting monolithic {\ocaml} program. In the case of modular extraction, -even if some inlining is done, the inlined constant are nevertheless -printed, to ensure session-independent programs. - -Concerning Haskell, type-preserving optimizations are less useful -because of laziness. We still make some optimizations, for example in -order to produce more readable code. - -The type-preserving optimizations are controlled by the following \Coq\ options: - -\begin{description} - -\item \optindex{Extraction Optimize} {\tt Unset Extraction Optimize.} - -Default is Set. This controls all type-preserving optimizations made on -the ML terms (mostly reduction of dummy beta/iota redexes, but also -simplifications on Cases, etc). Put this option to Unset if you want a -ML term as close as possible to the Coq term. - -\item \optindex{Extraction Conservative Types} -{\tt Set Extraction Conservative Types.} - -Default is Unset. This controls the non type-preserving optimizations -made on ML terms (which try to avoid function abstraction of dummy -types). Turn this option to Set to make sure that {\tt e:t} -implies that {\tt e':t'} where {\tt e'} and {\tt t'} are the extracted -code of {\tt e} and {\tt t} respectively. - -\item \optindex{Extraction KeepSingleton} -{\tt Set Extraction KeepSingleton.} - -Default is Unset. Normally, when the extraction of an inductive type -produces a singleton type (i.e. a type with only one constructor, and -only one argument to this constructor), the inductive structure is -removed and this type is seen as an alias to the inner type. -The typical example is {\tt sig}. This option allows disabling this -optimization when one wishes to preserve the inductive structure of types. - -\item \optindex{Extraction AutoInline} {\tt Unset Extraction AutoInline.} - -Default is Set. The extraction mechanism -inlines the bodies of some defined constants, according to some heuristics -like size of bodies, uselessness of some arguments, etc. Those heuristics are -not always perfect; if you want to disable this feature, do it by Unset. - -\item \comindex{Extraction Inline} \comindex{Extraction NoInline} -{\tt Extraction [Inline|NoInline] \qualid$_1$ \dots\ \qualid$_n$}. - -In addition to the automatic inline feature, you can tell to -inline some more constants by the {\tt Extraction Inline} command. Conversely, -you can forbid the automatic inlining of some specific constants by -the {\tt Extraction NoInline} command. -Those two commands enable a precise control of what is inlined and what is not. - -\item \comindex{Print Extraction Inline} -{\tt Print Extraction Inline}. - -Prints the current state of the table recording the custom inlinings -declared by the two previous commands. - -\item \comindex{Reset Extraction Inline} -{\tt Reset Extraction Inline}. - -Puts the table recording the custom inlinings back to empty. - -\end{description} - - -\paragraph{Inlining and printing of a constant declaration.} - -A user can explicitly ask for a constant to be extracted by two means: -\begin{itemize} -\item by mentioning it on the extraction command line -\item by extracting the whole \Coq\ module of this constant. -\end{itemize} -In both cases, the declaration of this constant will be present in the -produced file. -But this same constant may or may not be inlined in the following -terms, depending on the automatic/custom inlining mechanism. - - -For the constants non-explicitly required but needed for dependency -reasons, there are two cases: -\begin{itemize} -\item If an inlining decision is taken, whether automatically or not, -all occurrences of this constant are replaced by its extracted body, and -this constant is not declared in the generated file. -\item If no inlining decision is taken, the constant is normally - declared in the produced file. -\end{itemize} - -\asubsection{Extra elimination of useless arguments} - -The following command provides some extra manual control on the -code elimination performed during extraction, in a way which -is independent but complementary to the main elimination -principles of extraction (logical parts and types). - -\begin{description} -\item \comindex{Extraction Implicit} - {\tt Extraction Implicit} \qualid\ [ \ident$_1$ \dots\ \ident$_n$ ]. - -This experimental command allows declaring some arguments of -\qualid\ as implicit, i.e. useless in extracted code and hence to -be removed by extraction. Here \qualid\ can be any function or -inductive constructor, and \ident$_i$ are the names of the concerned -arguments. In fact, an argument can also be referred by a number -indicating its position, starting from 1. -\end{description} - -\noindent When an actual extraction takes place, an error is normally raised if the -{\tt Extraction Implicit} -declarations cannot be honored, that is if any of the implicited -variables still occurs in the final code. This behavior can be relaxed -via the following option: - -\begin{description} -\item \optindex{Extraction SafeImplicits} {\tt Unset Extraction SafeImplicits.} - -Default is Set. When this option is Unset, a warning is emitted -instead of an error if some implicited variables still occur in the -final code of an extraction. This way, the extracted code may be -obtained nonetheless and reviewed manually to locate the source of the issue -(in the code, some comments mark the location of these remaining -implicited variables). -Note that this extracted code might not compile or run properly, -depending of the use of these remaining implicited variables. - -\end{description} - -\asubsection{Realizing axioms}\label{extraction:axioms} - -Extraction will fail if it encounters an informative -axiom not realized (see Section~\ref{extraction:axioms}). -A warning will be issued if it encounters a logical axiom, to remind the -user that inconsistent logical axioms may lead to incorrect or -non-terminating extracted terms. - -It is possible to assume some axioms while developing a proof. Since -these axioms can be any kind of proposition or object or type, they may -perfectly well have some computational content. But a program must be -a closed term, and of course the system cannot guess the program which -realizes an axiom. Therefore, it is possible to tell the system -what ML term corresponds to a given axiom. - -\comindex{Extract Constant} -\begin{description} -\item{\tt Extract Constant \qualid\ => \str.} ~\par - Give an ML extraction for the given constant. - The \str\ may be an identifier or a quoted string. -\item{\tt Extract Inlined Constant \qualid\ => \str.} ~\par - Same as the previous one, except that the given ML terms will - be inlined everywhere instead of being declared via a let. -\end{description} - -\noindent Note that the {\tt Extract Inlined Constant} command is sugar -for an {\tt Extract Constant} followed by a {\tt Extraction Inline}. -Hence a {\tt Reset Extraction Inline} will have an effect on the -realized and inlined axiom. - -Of course, it is the responsibility of the user to ensure that the ML -terms given to realize the axioms do have the expected types. In -fact, the strings containing realizing code are just copied to the -extracted files. The extraction recognizes whether the realized axiom -should become a ML type constant or a ML object declaration. - -\Example -\begin{coq_example*} -Axiom X:Set. -Axiom x:X. -Extract Constant X => "int". -Extract Constant x => "0". -\end{coq_example*} - -\noindent Notice that in the case of type scheme axiom (i.e. whose type is an -arity, that is a sequence of product finished by a sort), then some type -variables have to be given. The syntax is then: - -\begin{description} -\item{\tt Extract Constant \qualid\ \str$_1$ \dots\ \str$_n$ => \str.} -\end{description} - -\noindent The number of type variables is checked by the system. - -\Example -\begin{coq_example*} -Axiom Y : Set -> Set -> Set. -Extract Constant Y "'a" "'b" => " 'a*'b ". -\end{coq_example*} - -\noindent Realizing an axiom via {\tt Extract Constant} is only useful in the -case of an informative axiom (of sort Type or Set). A logical axiom -have no computational content and hence will not appears in extracted -terms. But a warning is nonetheless issued if extraction encounters a -logical axiom. This warning reminds user that inconsistent logical -axioms may lead to incorrect or non-terminating extracted terms. - -If an informative axiom has not been realized before an extraction, a -warning is also issued and the definition of the axiom is filled with -an exception labeled {\tt AXIOM TO BE REALIZED}. The user must then -search these exceptions inside the extracted file and replace them by -real code. - -\comindex{Extract Inductive} - -The system also provides a mechanism to specify ML terms for inductive -types and constructors. For instance, the user may want to use the ML -native boolean type instead of \Coq\ one. The syntax is the following: - -\begin{description} -\item{\tt Extract Inductive \qualid\ => \str\ [ \str\ \dots\ \str\ ] {\it optstring}.}\par - Give an ML extraction for the given inductive type. You must specify - extractions for the type itself (first \str) and all its - constructors (between square brackets). If given, the final optional - string should contain a function emulating pattern-matching over this - inductive type. If this optional string is not given, the ML - extraction must be an ML inductive datatype, and the native - pattern-matching of the language will be used. -\end{description} - -\noindent For an inductive type with $k$ constructor, the function used to -emulate the match should expect $(k+1)$ arguments, first the $k$ -branches in functional form, and then the inductive element to -destruct. For instance, the match branch \verb$| S n => foo$ gives the -functional form \verb$(fun n -> foo)$. Note that a constructor with no -argument is considered to have one unit argument, in order to block -early evaluation of the branch: \verb$| O => bar$ leads to the functional -form \verb$(fun () -> bar)$. For instance, when extracting {\tt nat} -into {\tt int}, the code to provide has type: -{\tt (unit->'a)->(int->'a)->int->'a}. - -As for {\tt Extract Inductive}, this command should be used with care: -\begin{itemize} -\item The ML code provided by the user is currently \emph{not} checked at all by - extraction, even for syntax errors. - -\item Extracting an inductive type to a pre-existing ML inductive type -is quite sound. But extracting to a general type (by providing an -ad-hoc pattern-matching) will often \emph{not} be fully rigorously -correct. For instance, when extracting {\tt nat} to {\ocaml}'s {\tt -int}, it is theoretically possible to build {\tt nat} values that are -larger than {\ocaml}'s {\tt max\_int}. It is the user's responsibility to -be sure that no overflow or other bad events occur in practice. - -\item Translating an inductive type to an ML type does \emph{not} -magically improve the asymptotic complexity of functions, even if the -ML type is an efficient representation. For instance, when extracting -{\tt nat} to {\ocaml}'s {\tt int}, the function {\tt mult} stays -quadratic. It might be interesting to associate this translation with -some specific {\tt Extract Constant} when primitive counterparts exist. -\end{itemize} - -\Example -Typical examples are the following: -\begin{coq_eval} -Require Extraction. -\end{coq_eval} -\begin{coq_example} -Extract Inductive unit => "unit" [ "()" ]. -Extract Inductive bool => "bool" [ "true" "false" ]. -Extract Inductive sumbool => "bool" [ "true" "false" ]. -\end{coq_example} - -\noindent When extracting to {\ocaml}, if an inductive constructor or type -has arity 2 and the corresponding string is enclosed by parentheses, -and the string meets {\ocaml}'s lexical criteria for an infix symbol, -then the rest of the string is used as infix constructor or type. - -\begin{coq_example} -Extract Inductive list => "list" [ "[]" "(::)" ]. -Extract Inductive prod => "(*)" [ "(,)" ]. -\end{coq_example} - -\noindent As an example of translation to a non-inductive datatype, let's turn -{\tt nat} into {\ocaml}'s {\tt int} (see caveat above): -\begin{coq_example} -Extract Inductive nat => int [ "0" "succ" ] - "(fun fO fS n -> if n=0 then fO () else fS (n-1))". -\end{coq_example} - -\asubsection{Avoiding conflicts with existing filenames} - -\comindex{Extraction Blacklist} - -When using {\tt Extraction Library}, the names of the extracted files -directly depends from the names of the \Coq\ files. It may happen that -these filenames are in conflict with already existing files, -either in the standard library of the target language or in other -code that is meant to be linked with the extracted code. -For instance the module {\tt List} exists both in \Coq\ and in {\ocaml}. -It is possible to instruct the extraction not to use particular filenames. - -\begin{description} -\item{\tt Extraction Blacklist} \ident\ \dots\ \ident. ~\par - Instruct the extraction to avoid using these names as filenames - for extracted code. -\item{\tt Print Extraction Blacklist.} ~\par - Show the current list of filenames the extraction should avoid. -\item{\tt Reset Extraction Blacklist.} ~\par - Allow the extraction to use any filename. -\end{description} - -\noindent For {\ocaml}, a typical use of these commands is -{\tt Extraction Blacklist String List}. - -\asection{Differences between \Coq\ and ML type systems} - - -Due to differences between \Coq\ and ML type systems, -some extracted programs are not directly typable in ML. -We now solve this problem (at least in {\ocaml}) by adding -when needed some unsafe casting {\tt Obj.magic}, which give -a generic type {\tt 'a} to any term. - -For example, here are two kinds of problem that can occur: - -\begin{itemize} - \item If some part of the program is {\em very} polymorphic, there - may be no ML type for it. In that case the extraction to ML works - alright but the generated code may be refused by the ML - type-checker. A very well known example is the {\em distr-pair} - function: -\begin{verbatim} -Definition dp := - fun (A B:Set)(x:A)(y:B)(f:forall C:Set, C->C) => (f A x, f B y). -\end{verbatim} - -In {\ocaml}, for instance, the direct extracted term would be -\begin{verbatim} -let dp x y f = Pair((f () x),(f () y)) -\end{verbatim} - -and would have type -\begin{verbatim} -dp : 'a -> 'a -> (unit -> 'a -> 'b) -> ('b,'b) prod -\end{verbatim} - -which is not its original type, but a restriction. - -We now produce the following correct version: -\begin{verbatim} -let dp x y f = Pair ((Obj.magic f () x), (Obj.magic f () y)) -\end{verbatim} - - \item Some definitions of \Coq\ may have no counterpart in ML. This - happens when there is a quantification over types inside the type - of a constructor; for example: -\begin{verbatim} -Inductive anything : Type := dummy : forall A:Set, A -> anything. -\end{verbatim} - -which corresponds to the definition of an ML dynamic type. -In {\ocaml}, we must cast any argument of the constructor dummy. - -\end{itemize} - -\noindent Even with those unsafe castings, you should never get error like -``segmentation fault''. In fact even if your program may seem -ill-typed to the {\ocaml} type-checker, it can't go wrong: it comes -from a Coq well-typed terms, so for example inductives will always -have the correct number of arguments, etc. - -More details about the correctness of the extracted programs can be -found in \cite{Let02}. - -We have to say, though, that in most ``realistic'' programs, these -problems do not occur. For example all the programs of Coq library are -accepted by Caml type-checker without any {\tt Obj.magic} (see examples below). - - - -\asection{Some examples} - -We present here two examples of extractions, taken from the -\Coq\ Standard Library. We choose \ocaml\ as target language, -but all can be done in the other dialects with slight modifications. -We then indicate where to find other examples and tests of Extraction. - -\asubsection{A detailed example: Euclidean division} - -The file {\tt Euclid} contains the proof of Euclidean division -(theorem {\tt eucl\_dev}). The natural numbers defined in the example -files are unary integers defined by two constructors $O$ and $S$: -\begin{coq_example*} -Inductive nat : Set := - | O : nat - | S : nat -> nat. -\end{coq_example*} - -\noindent This module contains a theorem {\tt eucl\_dev}, whose type is -\begin{verbatim} -forall b:nat, b > 0 -> forall a:nat, diveucl a b -\end{verbatim} -where {\tt diveucl} is a type for the pair of the quotient and the -modulo, plus some logical assertions that disappear during extraction. -We can now extract this program to \ocaml: - -\begin{coq_eval} -Reset Initial. -\end{coq_eval} -\begin{coq_example} -Require Extraction. -Require Import Euclid Wf_nat. -Extraction Inline gt_wf_rec lt_wf_rec induction_ltof2. -Recursive Extraction eucl_dev. -\end{coq_example} - -\noindent The inlining of {\tt gt\_wf\_rec} and others is not -mandatory. It only enhances readability of extracted code. -You can then copy-paste the output to a file {\tt euclid.ml} or let -\Coq\ do it for you with the following command: - -\begin{verbatim} -Extraction "euclid" eucl_dev. -\end{verbatim} - -\noindent Let us play the resulting program: - -\begin{verbatim} -# #use "euclid.ml";; -type nat = O | S of nat -type sumbool = Left | Right -val minus : nat -> nat -> nat = -val le_lt_dec : nat -> nat -> sumbool = -val le_gt_dec : nat -> nat -> sumbool = -type diveucl = Divex of nat * nat -val eucl_dev : nat -> nat -> diveucl = -# eucl_dev (S (S O)) (S (S (S (S (S O)))));; -- : diveucl = Divex (S (S O), S O) -\end{verbatim} -It is easier to test on \ocaml\ integers: -\begin{verbatim} -# let rec nat_of_int = function 0 -> O | n -> S (nat_of_int (n-1));; -val nat_of_int : int -> nat = -# let rec int_of_nat = function O -> 0 | S p -> 1+(int_of_nat p);; -val int_of_nat : nat -> int = -# let div a b = - let Divex (q,r) = eucl_dev (nat_of_int b) (nat_of_int a) - in (int_of_nat q, int_of_nat r);; -val div : int -> int -> int * int = -# div 173 15;; -- : int * int = (11, 8) -\end{verbatim} - -\noindent Note that these {\tt nat\_of\_int} and {\tt int\_of\_nat} are now -available via a mere {\tt Require Import ExtrOcamlIntConv} and then -adding these functions to the list of functions to extract. This file -{\tt ExtrOcamlIntConv.v} and some others in {\tt plugins/extraction/} -are meant to help building concrete program via extraction. - -\asubsection{Extraction's horror museum} - -Some pathological examples of extraction are grouped in the file\\ -{\tt test-suite/success/extraction.v} of the sources of \Coq. - -\asubsection{Users' Contributions} - -Several of the \Coq\ Users' Contributions use extraction to produce -certified programs. In particular the following ones have an automatic -extraction test: - -\begin{itemize} -\item {\tt additions} -\item {\tt bdds} -\item {\tt canon-bdds} -\item {\tt chinese} -\item {\tt continuations} -\item {\tt coq-in-coq} -\item {\tt exceptions} -\item {\tt firing-squad} -\item {\tt founify} -\item {\tt graphs} -\item {\tt higman-cf} -\item {\tt higman-nw} -\item {\tt hardware} -\item {\tt multiplier} -\item {\tt search-trees} -\item {\tt stalmarck} -\end{itemize} - -\noindent {\tt continuations} and {\tt multiplier} are a bit particular. They are -examples of developments where {\tt Obj.magic} are needed. This is -probably due to an heavy use of impredicativity. After compilation, those -two examples run nonetheless, thanks to the correction of the -extraction~\cite{Let02}. - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: "Reference-Manual" -%%% End: diff --git a/doc/refman/Reference-Manual.tex b/doc/refman/Reference-Manual.tex index 0c159e059..3feb9014f 100644 --- a/doc/refman/Reference-Manual.tex +++ b/doc/refman/Reference-Manual.tex @@ -117,7 +117,6 @@ Options A and B of the licence are {\em not} elected.} %END LATEX \part{Addendum to the Reference Manual} \include{AddRefMan-pre}% -\include{Extraction.v}% \include{Program.v}% \include{Polynom.v}% = Ring \include{Nsatz.v}% diff --git a/doc/sphinx/addendum/extraction.rst b/doc/sphinx/addendum/extraction.rst new file mode 100644 index 000000000..cff7be3e9 --- /dev/null +++ b/doc/sphinx/addendum/extraction.rst @@ -0,0 +1,620 @@ +\achapter{Extraction of programs in OCaml and Haskell} +%HEVEA\cutname{extraction.html} +\label{Extraction} +\aauthor{Jean-Christophe Filliâtre and Pierre Letouzey} +\index{Extraction} + +\noindent We present here the \Coq\ extraction commands, used to build certified +and relatively efficient functional programs, extracting them from +either \Coq\ functions or \Coq\ proofs of specifications. The +functional languages available as output are currently \ocaml{}, +\textsc{Haskell} and \textsc{Scheme}. In the following, ``ML'' will +be used (abusively) to refer to any of the three. + +%% \paragraph{Differences with old versions.} +%% The current extraction mechanism is new for version 7.0 of {\Coq}. +%% In particular, the \FW\ toplevel used as an intermediate step between +%% \Coq\ and ML has been withdrawn. It is also not possible +%% any more to import ML objects in this \FW\ toplevel. +%% The current mechanism also differs from +%% the one in previous versions of \Coq: there is no more +%% an explicit toplevel for the language (formerly called \textsc{Fml}). + +Before using any of the commands or options described in this chapter, +the extraction framework should first be loaded explicitly +via {\tt Require Extraction}, or via the more robust +{\tt From Coq Require Extraction}. +Note that in earlier versions of Coq, these commands and options were +directly available without any preliminary {\tt Require}. + +\begin{coq_example} +Require Extraction. +\end{coq_example} + +\asection{Generating ML code} +\comindex{Extraction} +\comindex{Recursive Extraction} +\comindex{Separate Extraction} +\comindex{Extraction Library} +\comindex{Recursive Extraction Library} + +The next two commands are meant to be used for rapid preview of +extraction. They both display extracted term(s) inside \Coq. + +\begin{description} +\item {\tt Extraction \qualid{}.} ~\par + Extraction of a constant or module in the \Coq\ toplevel. + +\item {\tt Recursive Extraction} \qualid$_1$ \dots\ \qualid$_n$. ~\par + Recursive extraction of all the globals (or modules) \qualid$_1$ \dots\ + \qualid$_n$ and all their dependencies in the \Coq\ toplevel. +\end{description} + +%% TODO error messages + +\noindent All the following commands produce real ML files. User can choose to produce +one monolithic file or one file per \Coq\ library. + +\begin{description} +\item {\tt Extraction "{\em file}"} + \qualid$_1$ \dots\ \qualid$_n$. ~\par + Recursive extraction of all the globals (or modules) \qualid$_1$ \dots\ + \qualid$_n$ and all their dependencies in one monolithic file {\em file}. + Global and local identifiers are renamed according to the chosen ML + language to fulfill its syntactic conventions, keeping original + names as much as possible. + +\item {\tt Extraction Library} \ident. ~\par + Extraction of the whole \Coq\ library {\tt\ident.v} to an ML module + {\tt\ident.ml}. In case of name clash, identifiers are here renamed + using prefixes \verb!coq_! or \verb!Coq_! to ensure a + session-independent renaming. + +\item {\tt Recursive Extraction Library} \ident. ~\par + Extraction of the \Coq\ library {\tt\ident.v} and all other modules + {\tt\ident.v} depends on. + +\item {\tt Separate Extraction} + \qualid$_1$ \dots\ \qualid$_n$. ~\par + Recursive extraction of all the globals (or modules) \qualid$_1$ \dots\ + \qualid$_n$ and all their dependencies, just as {\tt + Extraction "{\em file}"}, but instead of producing one monolithic + file, this command splits the produced code in separate ML files, one per + corresponding Coq {\tt .v} file. This command is hence quite similar + to {\tt Recursive Extraction Library}, except that only the needed + parts of Coq libraries are extracted instead of the whole. The + naming convention in case of name clash is the same one as + {\tt Extraction Library}: identifiers are here renamed + using prefixes \verb!coq_! or \verb!Coq_!. +\end{description} + +\noindent The following command is meant to help automatic testing of + the extraction, see for instance the {\tt test-suite} directory + in the \Coq\ sources. + +\begin{description} +\item {\tt Extraction TestCompile} \qualid$_1$ \dots\ \qualid$_n$. ~\par + All the globals (or modules) \qualid$_1$ \dots\ \qualid$_n$ and all + their dependencies are extracted to a temporary {\ocaml} file, just as in + {\tt Extraction "{\em file}"}. Then this temporary file and its + signature are compiled with the same {\ocaml} compiler used to built + \Coq. This command succeeds only if the extraction and the {\ocaml} + compilation succeed (and it fails if the current target language + of the extraction is not {\ocaml}). +\end{description} + +\asection{Extraction options} + +\asubsection{Setting the target language} +\comindex{Extraction Language} + +The ability to fix target language is the first and more important +of the extraction options. Default is {\ocaml}. +\begin{description} +\item {\tt Extraction Language OCaml}. +\item {\tt Extraction Language Haskell}. +\item {\tt Extraction Language Scheme}. +\end{description} + +\asubsection{Inlining and optimizations} + +Since {\ocaml} is a strict language, the extracted code has to +be optimized in order to be efficient (for instance, when using +induction principles we do not want to compute all the recursive calls +but only the needed ones). So the extraction mechanism provides an +automatic optimization routine that will be called each time the user +want to generate {\ocaml} programs. The optimizations can be split in two +groups: the type-preserving ones -- essentially constant inlining and +reductions -- and the non type-preserving ones -- some function +abstractions of dummy types are removed when it is deemed safe in order +to have more elegant types. Therefore some constants may not appear in the +resulting monolithic {\ocaml} program. In the case of modular extraction, +even if some inlining is done, the inlined constant are nevertheless +printed, to ensure session-independent programs. + +Concerning Haskell, type-preserving optimizations are less useful +because of laziness. We still make some optimizations, for example in +order to produce more readable code. + +The type-preserving optimizations are controlled by the following \Coq\ options: + +\begin{description} + +\item \optindex{Extraction Optimize} {\tt Unset Extraction Optimize.} + +Default is Set. This controls all type-preserving optimizations made on +the ML terms (mostly reduction of dummy beta/iota redexes, but also +simplifications on Cases, etc). Put this option to Unset if you want a +ML term as close as possible to the Coq term. + +\item \optindex{Extraction Conservative Types} +{\tt Set Extraction Conservative Types.} + +Default is Unset. This controls the non type-preserving optimizations +made on ML terms (which try to avoid function abstraction of dummy +types). Turn this option to Set to make sure that {\tt e:t} +implies that {\tt e':t'} where {\tt e'} and {\tt t'} are the extracted +code of {\tt e} and {\tt t} respectively. + +\item \optindex{Extraction KeepSingleton} +{\tt Set Extraction KeepSingleton.} + +Default is Unset. Normally, when the extraction of an inductive type +produces a singleton type (i.e. a type with only one constructor, and +only one argument to this constructor), the inductive structure is +removed and this type is seen as an alias to the inner type. +The typical example is {\tt sig}. This option allows disabling this +optimization when one wishes to preserve the inductive structure of types. + +\item \optindex{Extraction AutoInline} {\tt Unset Extraction AutoInline.} + +Default is Set. The extraction mechanism +inlines the bodies of some defined constants, according to some heuristics +like size of bodies, uselessness of some arguments, etc. Those heuristics are +not always perfect; if you want to disable this feature, do it by Unset. + +\item \comindex{Extraction Inline} \comindex{Extraction NoInline} +{\tt Extraction [Inline|NoInline] \qualid$_1$ \dots\ \qualid$_n$}. + +In addition to the automatic inline feature, you can tell to +inline some more constants by the {\tt Extraction Inline} command. Conversely, +you can forbid the automatic inlining of some specific constants by +the {\tt Extraction NoInline} command. +Those two commands enable a precise control of what is inlined and what is not. + +\item \comindex{Print Extraction Inline} +{\tt Print Extraction Inline}. + +Prints the current state of the table recording the custom inlinings +declared by the two previous commands. + +\item \comindex{Reset Extraction Inline} +{\tt Reset Extraction Inline}. + +Puts the table recording the custom inlinings back to empty. + +\end{description} + + +\paragraph{Inlining and printing of a constant declaration.} + +A user can explicitly ask for a constant to be extracted by two means: +\begin{itemize} +\item by mentioning it on the extraction command line +\item by extracting the whole \Coq\ module of this constant. +\end{itemize} +In both cases, the declaration of this constant will be present in the +produced file. +But this same constant may or may not be inlined in the following +terms, depending on the automatic/custom inlining mechanism. + + +For the constants non-explicitly required but needed for dependency +reasons, there are two cases: +\begin{itemize} +\item If an inlining decision is taken, whether automatically or not, +all occurrences of this constant are replaced by its extracted body, and +this constant is not declared in the generated file. +\item If no inlining decision is taken, the constant is normally + declared in the produced file. +\end{itemize} + +\asubsection{Extra elimination of useless arguments} + +The following command provides some extra manual control on the +code elimination performed during extraction, in a way which +is independent but complementary to the main elimination +principles of extraction (logical parts and types). + +\begin{description} +\item \comindex{Extraction Implicit} + {\tt Extraction Implicit} \qualid\ [ \ident$_1$ \dots\ \ident$_n$ ]. + +This experimental command allows declaring some arguments of +\qualid\ as implicit, i.e. useless in extracted code and hence to +be removed by extraction. Here \qualid\ can be any function or +inductive constructor, and \ident$_i$ are the names of the concerned +arguments. In fact, an argument can also be referred by a number +indicating its position, starting from 1. +\end{description} + +\noindent When an actual extraction takes place, an error is normally raised if the +{\tt Extraction Implicit} +declarations cannot be honored, that is if any of the implicited +variables still occurs in the final code. This behavior can be relaxed +via the following option: + +\begin{description} +\item \optindex{Extraction SafeImplicits} {\tt Unset Extraction SafeImplicits.} + +Default is Set. When this option is Unset, a warning is emitted +instead of an error if some implicited variables still occur in the +final code of an extraction. This way, the extracted code may be +obtained nonetheless and reviewed manually to locate the source of the issue +(in the code, some comments mark the location of these remaining +implicited variables). +Note that this extracted code might not compile or run properly, +depending of the use of these remaining implicited variables. + +\end{description} + +\asubsection{Realizing axioms}\label{extraction:axioms} + +Extraction will fail if it encounters an informative +axiom not realized (see Section~\ref{extraction:axioms}). +A warning will be issued if it encounters a logical axiom, to remind the +user that inconsistent logical axioms may lead to incorrect or +non-terminating extracted terms. + +It is possible to assume some axioms while developing a proof. Since +these axioms can be any kind of proposition or object or type, they may +perfectly well have some computational content. But a program must be +a closed term, and of course the system cannot guess the program which +realizes an axiom. Therefore, it is possible to tell the system +what ML term corresponds to a given axiom. + +\comindex{Extract Constant} +\begin{description} +\item{\tt Extract Constant \qualid\ => \str.} ~\par + Give an ML extraction for the given constant. + The \str\ may be an identifier or a quoted string. +\item{\tt Extract Inlined Constant \qualid\ => \str.} ~\par + Same as the previous one, except that the given ML terms will + be inlined everywhere instead of being declared via a let. +\end{description} + +\noindent Note that the {\tt Extract Inlined Constant} command is sugar +for an {\tt Extract Constant} followed by a {\tt Extraction Inline}. +Hence a {\tt Reset Extraction Inline} will have an effect on the +realized and inlined axiom. + +Of course, it is the responsibility of the user to ensure that the ML +terms given to realize the axioms do have the expected types. In +fact, the strings containing realizing code are just copied to the +extracted files. The extraction recognizes whether the realized axiom +should become a ML type constant or a ML object declaration. + +\Example +\begin{coq_example*} +Axiom X:Set. +Axiom x:X. +Extract Constant X => "int". +Extract Constant x => "0". +\end{coq_example*} + +\noindent Notice that in the case of type scheme axiom (i.e. whose type is an +arity, that is a sequence of product finished by a sort), then some type +variables have to be given. The syntax is then: + +\begin{description} +\item{\tt Extract Constant \qualid\ \str$_1$ \dots\ \str$_n$ => \str.} +\end{description} + +\noindent The number of type variables is checked by the system. + +\Example +\begin{coq_example*} +Axiom Y : Set -> Set -> Set. +Extract Constant Y "'a" "'b" => " 'a*'b ". +\end{coq_example*} + +\noindent Realizing an axiom via {\tt Extract Constant} is only useful in the +case of an informative axiom (of sort Type or Set). A logical axiom +have no computational content and hence will not appears in extracted +terms. But a warning is nonetheless issued if extraction encounters a +logical axiom. This warning reminds user that inconsistent logical +axioms may lead to incorrect or non-terminating extracted terms. + +If an informative axiom has not been realized before an extraction, a +warning is also issued and the definition of the axiom is filled with +an exception labeled {\tt AXIOM TO BE REALIZED}. The user must then +search these exceptions inside the extracted file and replace them by +real code. + +\comindex{Extract Inductive} + +The system also provides a mechanism to specify ML terms for inductive +types and constructors. For instance, the user may want to use the ML +native boolean type instead of \Coq\ one. The syntax is the following: + +\begin{description} +\item{\tt Extract Inductive \qualid\ => \str\ [ \str\ \dots\ \str\ ] {\it optstring}.}\par + Give an ML extraction for the given inductive type. You must specify + extractions for the type itself (first \str) and all its + constructors (between square brackets). If given, the final optional + string should contain a function emulating pattern-matching over this + inductive type. If this optional string is not given, the ML + extraction must be an ML inductive datatype, and the native + pattern-matching of the language will be used. +\end{description} + +\noindent For an inductive type with $k$ constructor, the function used to +emulate the match should expect $(k+1)$ arguments, first the $k$ +branches in functional form, and then the inductive element to +destruct. For instance, the match branch \verb$| S n => foo$ gives the +functional form \verb$(fun n -> foo)$. Note that a constructor with no +argument is considered to have one unit argument, in order to block +early evaluation of the branch: \verb$| O => bar$ leads to the functional +form \verb$(fun () -> bar)$. For instance, when extracting {\tt nat} +into {\tt int}, the code to provide has type: +{\tt (unit->'a)->(int->'a)->int->'a}. + +As for {\tt Extract Inductive}, this command should be used with care: +\begin{itemize} +\item The ML code provided by the user is currently \emph{not} checked at all by + extraction, even for syntax errors. + +\item Extracting an inductive type to a pre-existing ML inductive type +is quite sound. But extracting to a general type (by providing an +ad-hoc pattern-matching) will often \emph{not} be fully rigorously +correct. For instance, when extracting {\tt nat} to {\ocaml}'s {\tt +int}, it is theoretically possible to build {\tt nat} values that are +larger than {\ocaml}'s {\tt max\_int}. It is the user's responsibility to +be sure that no overflow or other bad events occur in practice. + +\item Translating an inductive type to an ML type does \emph{not} +magically improve the asymptotic complexity of functions, even if the +ML type is an efficient representation. For instance, when extracting +{\tt nat} to {\ocaml}'s {\tt int}, the function {\tt mult} stays +quadratic. It might be interesting to associate this translation with +some specific {\tt Extract Constant} when primitive counterparts exist. +\end{itemize} + +\Example +Typical examples are the following: +\begin{coq_eval} +Require Extraction. +\end{coq_eval} +\begin{coq_example} +Extract Inductive unit => "unit" [ "()" ]. +Extract Inductive bool => "bool" [ "true" "false" ]. +Extract Inductive sumbool => "bool" [ "true" "false" ]. +\end{coq_example} + +\noindent When extracting to {\ocaml}, if an inductive constructor or type +has arity 2 and the corresponding string is enclosed by parentheses, +and the string meets {\ocaml}'s lexical criteria for an infix symbol, +then the rest of the string is used as infix constructor or type. + +\begin{coq_example} +Extract Inductive list => "list" [ "[]" "(::)" ]. +Extract Inductive prod => "(*)" [ "(,)" ]. +\end{coq_example} + +\noindent As an example of translation to a non-inductive datatype, let's turn +{\tt nat} into {\ocaml}'s {\tt int} (see caveat above): +\begin{coq_example} +Extract Inductive nat => int [ "0" "succ" ] + "(fun fO fS n -> if n=0 then fO () else fS (n-1))". +\end{coq_example} + +\asubsection{Avoiding conflicts with existing filenames} + +\comindex{Extraction Blacklist} + +When using {\tt Extraction Library}, the names of the extracted files +directly depends from the names of the \Coq\ files. It may happen that +these filenames are in conflict with already existing files, +either in the standard library of the target language or in other +code that is meant to be linked with the extracted code. +For instance the module {\tt List} exists both in \Coq\ and in {\ocaml}. +It is possible to instruct the extraction not to use particular filenames. + +\begin{description} +\item{\tt Extraction Blacklist} \ident\ \dots\ \ident. ~\par + Instruct the extraction to avoid using these names as filenames + for extracted code. +\item{\tt Print Extraction Blacklist.} ~\par + Show the current list of filenames the extraction should avoid. +\item{\tt Reset Extraction Blacklist.} ~\par + Allow the extraction to use any filename. +\end{description} + +\noindent For {\ocaml}, a typical use of these commands is +{\tt Extraction Blacklist String List}. + +\asection{Differences between \Coq\ and ML type systems} + + +Due to differences between \Coq\ and ML type systems, +some extracted programs are not directly typable in ML. +We now solve this problem (at least in {\ocaml}) by adding +when needed some unsafe casting {\tt Obj.magic}, which give +a generic type {\tt 'a} to any term. + +For example, here are two kinds of problem that can occur: + +\begin{itemize} + \item If some part of the program is {\em very} polymorphic, there + may be no ML type for it. In that case the extraction to ML works + alright but the generated code may be refused by the ML + type-checker. A very well known example is the {\em distr-pair} + function: +\begin{verbatim} +Definition dp := + fun (A B:Set)(x:A)(y:B)(f:forall C:Set, C->C) => (f A x, f B y). +\end{verbatim} + +In {\ocaml}, for instance, the direct extracted term would be +\begin{verbatim} +let dp x y f = Pair((f () x),(f () y)) +\end{verbatim} + +and would have type +\begin{verbatim} +dp : 'a -> 'a -> (unit -> 'a -> 'b) -> ('b,'b) prod +\end{verbatim} + +which is not its original type, but a restriction. + +We now produce the following correct version: +\begin{verbatim} +let dp x y f = Pair ((Obj.magic f () x), (Obj.magic f () y)) +\end{verbatim} + + \item Some definitions of \Coq\ may have no counterpart in ML. This + happens when there is a quantification over types inside the type + of a constructor; for example: +\begin{verbatim} +Inductive anything : Type := dummy : forall A:Set, A -> anything. +\end{verbatim} + +which corresponds to the definition of an ML dynamic type. +In {\ocaml}, we must cast any argument of the constructor dummy. + +\end{itemize} + +\noindent Even with those unsafe castings, you should never get error like +``segmentation fault''. In fact even if your program may seem +ill-typed to the {\ocaml} type-checker, it can't go wrong: it comes +from a Coq well-typed terms, so for example inductives will always +have the correct number of arguments, etc. + +More details about the correctness of the extracted programs can be +found in \cite{Let02}. + +We have to say, though, that in most ``realistic'' programs, these +problems do not occur. For example all the programs of Coq library are +accepted by Caml type-checker without any {\tt Obj.magic} (see examples below). + + + +\asection{Some examples} + +We present here two examples of extractions, taken from the +\Coq\ Standard Library. We choose \ocaml\ as target language, +but all can be done in the other dialects with slight modifications. +We then indicate where to find other examples and tests of Extraction. + +\asubsection{A detailed example: Euclidean division} + +The file {\tt Euclid} contains the proof of Euclidean division +(theorem {\tt eucl\_dev}). The natural numbers defined in the example +files are unary integers defined by two constructors $O$ and $S$: +\begin{coq_example*} +Inductive nat : Set := + | O : nat + | S : nat -> nat. +\end{coq_example*} + +\noindent This module contains a theorem {\tt eucl\_dev}, whose type is +\begin{verbatim} +forall b:nat, b > 0 -> forall a:nat, diveucl a b +\end{verbatim} +where {\tt diveucl} is a type for the pair of the quotient and the +modulo, plus some logical assertions that disappear during extraction. +We can now extract this program to \ocaml: + +\begin{coq_eval} +Reset Initial. +\end{coq_eval} +\begin{coq_example} +Require Extraction. +Require Import Euclid Wf_nat. +Extraction Inline gt_wf_rec lt_wf_rec induction_ltof2. +Recursive Extraction eucl_dev. +\end{coq_example} + +\noindent The inlining of {\tt gt\_wf\_rec} and others is not +mandatory. It only enhances readability of extracted code. +You can then copy-paste the output to a file {\tt euclid.ml} or let +\Coq\ do it for you with the following command: + +\begin{verbatim} +Extraction "euclid" eucl_dev. +\end{verbatim} + +\noindent Let us play the resulting program: + +\begin{verbatim} +# #use "euclid.ml";; +type nat = O | S of nat +type sumbool = Left | Right +val minus : nat -> nat -> nat = +val le_lt_dec : nat -> nat -> sumbool = +val le_gt_dec : nat -> nat -> sumbool = +type diveucl = Divex of nat * nat +val eucl_dev : nat -> nat -> diveucl = +# eucl_dev (S (S O)) (S (S (S (S (S O)))));; +- : diveucl = Divex (S (S O), S O) +\end{verbatim} +It is easier to test on \ocaml\ integers: +\begin{verbatim} +# let rec nat_of_int = function 0 -> O | n -> S (nat_of_int (n-1));; +val nat_of_int : int -> nat = +# let rec int_of_nat = function O -> 0 | S p -> 1+(int_of_nat p);; +val int_of_nat : nat -> int = +# let div a b = + let Divex (q,r) = eucl_dev (nat_of_int b) (nat_of_int a) + in (int_of_nat q, int_of_nat r);; +val div : int -> int -> int * int = +# div 173 15;; +- : int * int = (11, 8) +\end{verbatim} + +\noindent Note that these {\tt nat\_of\_int} and {\tt int\_of\_nat} are now +available via a mere {\tt Require Import ExtrOcamlIntConv} and then +adding these functions to the list of functions to extract. This file +{\tt ExtrOcamlIntConv.v} and some others in {\tt plugins/extraction/} +are meant to help building concrete program via extraction. + +\asubsection{Extraction's horror museum} + +Some pathological examples of extraction are grouped in the file\\ +{\tt test-suite/success/extraction.v} of the sources of \Coq. + +\asubsection{Users' Contributions} + +Several of the \Coq\ Users' Contributions use extraction to produce +certified programs. In particular the following ones have an automatic +extraction test: + +\begin{itemize} +\item {\tt additions} +\item {\tt bdds} +\item {\tt canon-bdds} +\item {\tt chinese} +\item {\tt continuations} +\item {\tt coq-in-coq} +\item {\tt exceptions} +\item {\tt firing-squad} +\item {\tt founify} +\item {\tt graphs} +\item {\tt higman-cf} +\item {\tt higman-nw} +\item {\tt hardware} +\item {\tt multiplier} +\item {\tt search-trees} +\item {\tt stalmarck} +\end{itemize} + +\noindent {\tt continuations} and {\tt multiplier} are a bit particular. They are +examples of developments where {\tt Obj.magic} are needed. This is +probably due to an heavy use of impredicativity. After compilation, those +two examples run nonetheless, thanks to the correction of the +extraction~\cite{Let02}. + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: "Reference-Manual" +%%% End: -- cgit v1.2.3