Introduce the abstraction of RunHandler which each DirectSession can use for

the duration of a single RunInternal() call from RunHandlerPool. We want to leverage this abstraction for improving the cross-session inter-op parallelism for lower latency inference in the future. In the case that global pools aren't used, this change should be a no-op. PiperOrigin-RevId: 214818187
author: A. Unique TensorFlower <gardener@tensorflow.org> 2018-09-27 12:37:05 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2018-09-27 12:41:43 -0700
commit: 750466c6e6624d279de7f9a43accd682d487509c (patch)
tree: a97a88d432433b3c55775f64bb7a5f86a9f034b2 /tensorflow/core/common_runtime/direct_session.cc
parent: 561a3c4331ebfaac3e61c524911bf6fe85f4ebc9 (diff)
1 files changed, 43 insertions, 6 deletions
diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc
index 841181f8c3..458e133b68 100644
--- a/tensorflow/core/common_runtime/direct_session.cc
+++ b/tensorflow/core/common_runtime/direct_session.cc
@@ -40,6 +40,7 @@ limitations under the License.
 #include "tensorflow/core/framework/graph_def_util.h"
 #include "tensorflow/core/framework/log_memory.h"
 #include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/run_handler.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/versions.pb.h"
 #include "tensorflow/core/graph/algorithm.h"
@@ -244,6 +245,21 @@ void DirectSession::SchedClosure(thread::ThreadPool* pool,
 #endif  // __ANDROID__
 }
 
+static RunHandlerPool* GetOrCreateRunHandlerPool(
+    const SessionOptions& options) {
+  static RunHandlerPool* pool =
+      new RunHandlerPool(NumInterOpThreadsFromSessionOptions(options));
+  return pool;
+}
+
+bool DirectSession::ShouldUseRunHandlerPool() const {
+  if (options_.config.session_inter_op_thread_pool_size() > 0 ||
+      options_.config.use_per_session_threads()) {
+    return false;
+  }
+  return true;
+}
+
 DirectSession::DirectSession(const SessionOptions& options,
                              const DeviceMgr* device_mgr,
                              DirectSessionFactory* const factory)
@@ -582,16 +598,37 @@ Status DirectSession::RunInternal(int64 step_id, const RunOptions& run_options,
     }
   }
 
-  Executor::Args::Runner default_runner = [this,
-                                           pool](Executor::Args::Closure c) {
-    SchedClosure(pool, std::move(c));
-  };
+  std::unique_ptr<RunHandler> handler;
+  if (ShouldUseRunHandlerPool() &&
+      run_options.experimental().use_run_handler_pool()) {
+    // Non-null only when a global inter-op pool is used.
+    VLOG(1) << "Using RunHandler to scheduler inter-op closures.";
+    handler = GetOrCreateRunHandlerPool(options_)->Get();
+  }
+  auto* handler_ptr = handler.get();
+
+  Executor::Args::Runner default_runner = nullptr;
+
+  if (pool == nullptr) {
+    default_runner = [](Executor::Args::Closure c) { c(); };
+  } else if (handler_ptr != nullptr) {
+    default_runner = [handler_ptr](Executor::Args::Closure c) {
+      handler_ptr->ScheduleInterOpClosure(std::move(c));
+    };
+  } else {
+    default_runner = [this, pool](Executor::Args::Closure c) {
+      SchedClosure(pool, std::move(c));
+    };
+  }
+
   for (const auto& item : executors_and_keys->items) {
-    // TODO(zhengxq): support partial run.
-    // TODO(zhengxq): if the device picks its own threadpool, we need to assign
+    // TODO(azaks): support partial run.
+    // TODO(azaks): if the device picks its own threadpool, we need to assign
     //     less threads to the main compute pool by default.
     thread::ThreadPool* device_thread_pool =
         item.device->tensorflow_device_thread_pool();
+    // TODO(crk): Investigate usage of RunHandlerPool when using device specific
+    // thread pool(s).
     if (!device_thread_pool) {
       args.runner = default_runner;
     } else {
author	A. Unique TensorFlower <gardener@tensorflow.org>	2018-09-27 12:37:05 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2018-09-27 12:41:43 -0700
commit	750466c6e6624d279de7f9a43accd682d487509c (patch)
tree	a97a88d432433b3c55775f64bb7a5f86a9f034b2 /tensorflow/core/common_runtime/direct_session.cc
parent	561a3c4331ebfaac3e61c524911bf6fe85f4ebc9 (diff)