Fix #5081 by more fine-grained LtacProf recording

To fix #5081, that LtacProf associates time spent in tactic-evaluation with the wrong tactic, I added two additional calls to the profiler during tactic evaluation phase. These two calls do not update the call count of the relevant tactics, but simply add time to them. Although this fixes #5081, it introduces a new bug, involving tactics which are aliases of other tactics, which I am not sure how to fix. Here is the explanation of the issue, as I currently understand it (also recorded in a comment in `profile_ltac.mli`): Ltac semantics are a bit insane. There isn't really a good notion of how many times a tactic has been "called", because tactics can be partially evaluated, and it's unclear whether the number of "calls" should be the number of times the body is fetched and unfolded, or the number of times the code is executed to a value, etc. The logic in `Tacinterp.eval_tactic` gives a decent approximation, which I believe roughly corresponds to the number of times that the engine runs the tactic value which results from evaluating the tactic expression bound to the name we're considering. However, this is a poor approximation of the time spent in the tactic; we want to consider time spent evaluating a tactic expression to a tactic value to be time spent in the expression, not just time spent in the caller of the expression. So we need to wrap some nodes in additional profiling calls which don't count towards to total call count. Whether or not a call "counts" is indicated by the `count_call` boolean argument. Unfortunately, at present, we can get very strange call graphs when a named tactic expression never runs as a tactic value: if we have `Ltac t0 := t.` and `Ltac t1 := t0.`, then `t1` is considered to run 0(!) times. It evaluates to `t` during tactic expression evaluation, and although the call trace records the fact that it was called by `t0` which was called by `t1`, the tactic running phase never sees this. Thus we get one call tree (from expression evaluation) that has `t1` calls `t0` calls `t`, and another call tree which says that the caller of `t1` calls `t` directly; the expression evaluation time goes in the first tree, and the call count and tactic running time goes in the second tree. Alas, I suspect that fixing this requires a redesign of how the profiler hooks into the tactic engine.
author: Jason Gross <jgross@mit.edu> 2017-12-12 20:12:53 -0500
committer: Jason Gross <jgross@mit.edu> 2017-12-12 20:51:38 -0500
commit: 79e97ce799d35c1082ccc1a57468f8bb4f8efe42 (patch)
tree: 8399607e98bee2ac0eb67eed8ffcec717e0df423 /plugins/ltac
parent: 2c2a08083bc535397359299690d0bfb3523a9ee1 (diff)
3 files changed, 39 insertions, 8 deletions
diff --git a/plugins/ltac/profile_ltac.ml b/plugins/ltac/profile_ltac.ml
index 9ae8bfe65..1f29f4860 100644
--- a/plugins/ltac/profile_ltac.ml
+++ b/plugins/ltac/profile_ltac.ml
@@ -289,7 +289,7 @@ let rec find_in_stack what acc = function
   | { name } as x :: rest when String.equal name what -> Some(acc, x, rest)
   | { name } as x :: rest -> find_in_stack what (x :: acc) rest
 
-let exit_tactic start_time c =
+let exit_tactic ~count_call start_time c =
   let diff = time () -. start_time in
   match Local.(!stack) with
   | [] | [_] ->
@@ -304,7 +304,7 @@ let exit_tactic start_time c =
     let node = { node with
       total = node.total +. diff;
       local = node.local +. diff;
-      ncalls = node.ncalls + 1;
+      ncalls = node.ncalls + (if count_call then 1 else 0);
       max_total = max node.max_total diff;
     } in
     (* updating the stack *)
@@ -341,7 +341,7 @@ let tclFINALLY tac (finally : unit Proofview.tactic) =
     (fun v -> finally <*> Proofview.tclUNIT v)
     (fun (exn, info) -> finally <*> Proofview.tclZERO ~info exn)
 
-let do_profile s call_trace tac =
+let do_profile s call_trace ?(count_call=true) tac =
   let open Proofview.Notations in
   Proofview.tclLIFT (Proofview.NonLogical.make (fun () ->
   if !is_profiling then
@@ -359,7 +359,7 @@ let do_profile s call_trace tac =
       tac
       (Proofview.tclLIFT (Proofview.NonLogical.make (fun () ->
         (match call_trace with
-        | (_, c) :: _ -> exit_tactic start_time c
+        | (_, c) :: _ -> exit_tactic ~count_call start_time c
         | [] -> ()))))
   | None -> tac
 
diff --git a/plugins/ltac/profile_ltac.mli b/plugins/ltac/profile_ltac.mli
index 52827cb36..feb777352 100644
--- a/plugins/ltac/profile_ltac.mli
+++ b/plugins/ltac/profile_ltac.mli
@@ -9,9 +9,39 @@
 
 (** Ltac profiling primitives *)
 
+(* Note(JasonGross): Ltac semantics are a bit insane.  There isn't
+   really a good notion of how many times a tactic has been "called",
+   because tactics can be partially evaluated, and it's unclear
+   whether the number of "calls" should be the number of times the
+   body is fetched and unfolded, or the number of times the code is
+   executed to a value, etc.  The logic in [Tacinterp.eval_tactic]
+   gives a decent approximation, which I believe roughly corresponds
+   to the number of times that the engine runs the tactic value which
+   results from evaluating the tactic expression bound to the name
+   we're considering.  However, this is a poor approximation of the
+   time spent in the tactic; we want to consider time spent evaluating
+   a tactic expression to a tactic value to be time spent in the
+   expression, not just time spent in the caller of the expression.
+   So we need to wrap some nodes in additional profiling calls which
+   don't count towards to total call count.  Whether or not a call
+   "counts" is indicated by the [count_call] boolean argument.
+
+   Unfortunately, at present, we can get very strange call graphs when
+   a named tactic expression never runs as a tactic value: if we have
+   [Ltac t0 := t.] and [Ltac t1 := t0.], then [t1] is considered to
+   run 0(!) times.  It evaluates to [t] during tactic expression
+   evaluation, and although the call trace records the fact that it
+   was called by [t0] which was called by [t1], the tactic running
+   phase never sees this.  Thus we get one call tree (from expression
+   evaluation) that has [t1] calls [t0] calls [t], and another call
+   tree which says that the caller of [t1] calls [t] directly; the
+   expression evaluation time goes in the first tree, and the call
+   count and tactic running time goes in the second tree.  Alas, I
+   suspect that fixing this requires a redesign of how the profiler
+   hooks into the tactic engine. *)
 val do_profile :
   string -> ('a * Tacexpr.ltac_call_kind) list ->
-    'b Proofview.tactic -> 'b Proofview.tactic
+    ?count_call:bool -> 'b Proofview.tactic -> 'b Proofview.tactic
 
 val set_profiling : bool -> unit
 
@@ -46,4 +76,3 @@ type treenode = {
 (* Returns the profiling results known by the current process *)
 val get_local_profiling_results : unit -> treenode
 val feedback_results : treenode -> unit
-
diff --git a/plugins/ltac/tacinterp.ml b/plugins/ltac/tacinterp.ml
index e0d7eca5f..e25c99323 100644
--- a/plugins/ltac/tacinterp.ml
+++ b/plugins/ltac/tacinterp.ml
@@ -1272,7 +1272,8 @@ and interp_ltac_reference ?loc' mustbetac ist r : Val.t Ftactic.t =
       let extra = TacStore.set extra f_trace trace in
       let ist = { lfun = Id.Map.empty; extra = extra; } in
       let appl = GlbAppl[r,[]] in
-      val_interp ~appl ist (Tacenv.interp_ltac r)
+      Profile_ltac.do_profile "interp_ltac_reference" trace ~count_call:false
+        (val_interp ~appl ist (Tacenv.interp_ltac r))
 
 and interp_tacarg ist arg : Val.t Ftactic.t =
   match arg with
@@ -1338,7 +1339,8 @@ and interp_app loc ist fv largs : Val.t Ftactic.t =
               let ist = {
                 lfun = newlfun;
                 extra = TacStore.set ist.extra f_trace []; } in
-              catch_error_tac trace (val_interp ist body) >>= fun v ->
+              Profile_ltac.do_profile "interp_app" trace ~count_call:false
+                (catch_error_tac trace (val_interp ist body)) >>= fun v ->
               Ftactic.return (name_vfun (push_appl appl largs) v)
             end
 	    begin fun (e, info) ->
author	Jason Gross <jgross@mit.edu>	2017-12-12 20:12:53 -0500
committer	Jason Gross <jgross@mit.edu>	2017-12-12 20:51:38 -0500
commit	79e97ce799d35c1082ccc1a57468f8bb4f8efe42 (patch)
tree	8399607e98bee2ac0eb67eed8ffcec717e0df423 /plugins/ltac
parent	2c2a08083bc535397359299690d0bfb3523a9ee1 (diff)