VLOG(1) some compilation stats on a compile.

Since a compilation is a 1 millisecond+ event anyway I figured VLOG(1)'ng would be fine. PiperOrigin-RevId: 207598334
author: Sanjoy Das <sanjoy@google.com> 2018-08-06 13:31:26 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2018-08-06 13:40:32 -0700
commit: 7e4199223e8d6e5fd8154087ab584f30d29d9c60 (patch)
tree: 3cceddeac325037d2c7483db80ba9021edc59ccd
parent: 941165e082c057862fb645b1e23778818676ddfa (diff)
2 files changed, 40 insertions, 4 deletions
diff --git a/tensorflow/compiler/jit/xla_compilation_cache.cc b/tensorflow/compiler/jit/xla_compilation_cache.cc
index 0e2cdcf630..7140d47a94 100644
--- a/tensorflow/compiler/jit/xla_compilation_cache.cc
+++ b/tensorflow/compiler/jit/xla_compilation_cache.cc
@@ -296,7 +296,7 @@ Status XlaCompilationCache::CompileImpl(
   // protect the contents of the cache entry.
   Entry* entry;
   {
-    mutex_lock lock(mu_);
+    mutex_lock lock(compile_cache_mu_);
     // Find or create a cache entry.
     std::unique_ptr<Entry>& e = cache_[signature];
     if (!e) {
@@ -312,6 +312,8 @@ Status XlaCompilationCache::CompileImpl(
   if (!entry->compiled) {
     VLOG(1) << "Compilation cache miss for signature: "
             << SignatureDebugString(signature);
+    tensorflow::Env* env = tensorflow::Env::Default();
+    const uint64 compile_start_us = env->NowMicros();
     // Do the actual JIT compilation without holding the lock (it can take
     // a long time.)
     std::vector<XlaCompiler::Argument> args;
@@ -334,6 +336,26 @@ Status XlaCompilationCache::CompileImpl(
     CHECK_EQ(entry->executable.get(), nullptr);
     entry->compilation_status =
         BuildExecutable(options, entry->compilation_result, &entry->executable);
+
+    const uint64 compile_end_us = env->NowMicros();
+    const uint64 compile_time_us = compile_end_us - compile_start_us;
+    {
+      mutex_lock lock(compile_stats_mu_);
+      auto it = compile_stats_.emplace(function.name(), CompileStats{}).first;
+      it->second.compile_count++;
+      it->second.cumulative_compile_time_us += compile_time_us;
+      VLOG(1) << "compiled " << function.name() << " "
+              << it->second.compile_count
+              << " times, compile time: " << compile_time_us
+              << " us, cumulative: " << it->second.cumulative_compile_time_us
+              << " us ("
+              << tensorflow::strings::HumanReadableElapsedTime(compile_time_us /
+                                                               1.0e6)
+              << " / "
+              << tensorflow::strings::HumanReadableElapsedTime(
+                     it->second.cumulative_compile_time_us / 1.0e6)
+              << ")";
+    }
   }
   TF_RETURN_IF_ERROR(entry->compilation_status);
   *compilation_result = &entry->compilation_result;
diff --git a/tensorflow/compiler/jit/xla_compilation_cache.h b/tensorflow/compiler/jit/xla_compilation_cache.h
index be1043d8c3..fc5f008f4f 100644
--- a/tensorflow/compiler/jit/xla_compilation_cache.h
+++ b/tensorflow/compiler/jit/xla_compilation_cache.h
@@ -24,6 +24,7 @@ limitations under the License.
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/lib/core/threadpool.h"
+#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/thread_annotations.h"
 
@@ -150,9 +151,22 @@ class XlaCompilationCache : public ResourceBase {
     std::unique_ptr<xla::LocalExecutable> executable GUARDED_BY(mu);
   };
 
-  mutex mu_;
-  std::unordered_map<Signature, std::unique_ptr<Entry>, Signature::Hash> cache_
-      GUARDED_BY(mu_);
+  mutex compile_cache_mu_;
+  gtl::FlatMap<Signature, std::unique_ptr<Entry>, Signature::Hash> cache_
+      GUARDED_BY(compile_cache_mu_);
+
+  struct CompileStats {
+    // Number of times the cluster has been (re-)compiled.
+    int64 compile_count = 0;
+
+    // Cumulative time spent compiling the cluster.
+    int64 cumulative_compile_time_us = 0;
+  };
+  mutex compile_stats_mu_;
+
+  // Maps cluster names to compilation statistics for said cluster.
+  gtl::FlatMap<string, CompileStats> compile_stats_
+      GUARDED_BY(compile_stats_mu_);
 
   TF_DISALLOW_COPY_AND_ASSIGN(XlaCompilationCache);
 };
author	Sanjoy Das <sanjoy@google.com>	2018-08-06 13:31:26 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2018-08-06 13:40:32 -0700
commit	7e4199223e8d6e5fd8154087ab584f30d29d9c60 (patch)
tree	3cceddeac325037d2c7483db80ba9021edc59ccd
parent	941165e082c057862fb645b1e23778818676ddfa (diff)