diff options
author | 2013-08-30 12:20:07 +0000 | |
---|---|---|
committer | 2013-08-30 12:20:07 +0000 | |
commit | de9798346c7df0593d4d7b83a8f316991f6fb230 (patch) | |
tree | 3541e5bfc7127f14a3a789c0076776c3d0917e9b | |
parent | e458f2e2beb65e49c4bb45c54ab07ce3b0b8a9bb (diff) |
Stm: if slave process dies badly go back to local lazy evaluation
Processing the proof in a slave process may fail for an implementation
error, e.g. the state could not be marshalled, or an anomaly is raised
by the slave.
In this case we fall back to local, lazy, evaluation in the master
process.
git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/coq/trunk@16743 85f007b7-540e-0410-9357-904b9bb8a0f7
-rw-r--r-- | lib/future.ml | 8 | ||||
-rw-r--r-- | lib/future.mli | 3 | ||||
-rw-r--r-- | toplevel/stm.ml | 30 |
3 files changed, 28 insertions, 13 deletions
diff --git a/lib/future.ml b/lib/future.ml index 292fd6648..3cbaa95dc 100644 --- a/lib/future.ml +++ b/lib/future.ml @@ -59,9 +59,15 @@ let proj = function let create f = ref (Closure f) +type 'a assignement = [ `Val of 'a | `Exn of exn | `Comp of 'a computation] let create_delegate () = let c = ref Delegated in - c, (function `Val v -> c := Val (v, None) | `Exn e -> c := Exn e) + c, (fun v -> + assert (!c = Delegated); + match v with + | `Val v -> c := Val (v, None) + | `Exn e -> c := Exn e + | `Comp f -> c := !f) (* TODO: get rid of try/catch *) let compute ~pure c : 'a value = match !c with diff --git a/lib/future.mli b/lib/future.mli index 3da0e2fdc..39be0c180 100644 --- a/lib/future.mli +++ b/lib/future.mli @@ -19,7 +19,8 @@ val create : (unit -> 'a) -> 'a computation val from_val : 'a -> 'a computation (* Run remotely, returns the function to assign *) -val create_delegate : unit -> 'a computation * ('a value -> unit) +type 'a assignement = [ `Val of 'a | `Exn of exn | `Comp of 'a computation] +val create_delegate : unit -> 'a computation * ('a assignement -> unit) (* Variants to stock a copy of the current environment *) val create_here : (unit -> 'a) -> 'a computation diff --git a/toplevel/stm.ml b/toplevel/stm.ml index 1b075276c..f68c4c3ff 100644 --- a/toplevel/stm.ml +++ b/toplevel/stm.ml @@ -598,7 +598,7 @@ end = struct (* {{{ *) type task = | TaskBuildProof of (Stateid.t * Stateid.t) * Stateid.t * Stateid.t * - (Entries.proof_output list Future.value -> unit) + (Entries.proof_output list Future.assignement -> unit) let pr_task = function | TaskBuildProof(_,bop,eop,_) -> "TaskBuilProof("^Stateid.to_string bop^","^Stateid.to_string eop^")" @@ -668,9 +668,11 @@ end = struct (* {{{ *) let rec manage_slave respawn = let ic, oc, pid = respawn () in + let last_task = ref None in try while true do let task = TQueue.pop queue in + last_task := Some task; try marshal_request oc (request_of_task task); let rec loop () = @@ -692,20 +694,28 @@ end = struct (* {{{ *) Pp.feedback ~state_id msg; loop () | _, RespFeedback _ -> assert false (* Parsing in master process *) - in loop () + in + loop (); last_task := None with | VCS.Expired -> (* task cancelled: e.g. the user did backtrack *) prerr_endline ("Task expired: " ^ pr_task task) - | MarshalError -> (* TODO *) - prerr_endline "TODO: Marshalling Error"; - prerr_endline "We should be resilient and fall back to lazy" - | e -> + | MarshalError -> + msg_warning(strbrk("Marshalling error. "^ + "The system state could not be sent to the slave process. "^ + "Falling back to local, lazy, evaluation.")); + let TaskBuildProof (exn_info, _, stop, assign) = task in + assign(`Comp(build_proof_here exn_info stop)) + | e -> (* TODO: do something sensible here *) prerr_endline (string_of_ppcmds (print e)) done with Sys_error _ | Invalid_argument _ | End_of_file -> - (* TODO *) - prerr_endline "TODO: The slave died"; - prerr_endline "We should be resilient and fall back to lazy"; + msg_warning(strbrk "The slave process died badly."); + (match !last_task with + | Some task -> + msg_warning(strbrk "Falling back to local, lazy, evaluation."); + let TaskBuildProof (exn_info, _, stop, assign) = task in + assign(`Comp(build_proof_here exn_info stop)); + | None -> ()); ignore(Unix.waitpid [] pid); close_in ic; close_out oc; @@ -756,8 +766,6 @@ end = struct (* {{{ *) prerr_endline "Slave: failed with the following exception:"; prerr_endline (string_of_ppcmds (print e)) | e -> - (* TODO special exit code in case of MarshalError so that master - * can fall back to lazy *) msg_error(str"Slave: failed with the following CRITICAL exception:"); msg_error(print e); msg_error(str"Slave: bailing out"); |