diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2018-09-28 14:08:25 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-09-28 14:34:43 -0700 |
commit | f83da5b0aa37ba55c1b2eaa093e6d043b73f5982 (patch) | |
tree | d28c727251f910dc0c7b7a6184286919d436e88f /tensorflow/core/common_runtime | |
parent | 1724d155f00b49bc817189247cbfb0df2092a9da (diff) |
Introduce the abstraction of RunHandler which each DirectSession can use for
the duration of a single RunInternal() call from RunHandlerPool. It is used for
running inter-op closures with a global scheduler (which in the future) to
improve both median and tail latency (for use-cases like CPU inference).
In the case that global pools aren't used, this change should be a no-op.
PiperOrigin-RevId: 214992852
Diffstat (limited to 'tensorflow/core/common_runtime')
-rw-r--r-- | tensorflow/core/common_runtime/direct_session.cc | 49 | ||||
-rw-r--r-- | tensorflow/core/common_runtime/direct_session.h | 3 | ||||
-rw-r--r-- | tensorflow/core/common_runtime/direct_session_test.cc | 28 |
3 files changed, 74 insertions, 6 deletions
diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc index 841181f8c3..458e133b68 100644 --- a/tensorflow/core/common_runtime/direct_session.cc +++ b/tensorflow/core/common_runtime/direct_session.cc @@ -40,6 +40,7 @@ limitations under the License. #include "tensorflow/core/framework/graph_def_util.h" #include "tensorflow/core/framework/log_memory.h" #include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/run_handler.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/versions.pb.h" #include "tensorflow/core/graph/algorithm.h" @@ -244,6 +245,21 @@ void DirectSession::SchedClosure(thread::ThreadPool* pool, #endif // __ANDROID__ } +static RunHandlerPool* GetOrCreateRunHandlerPool( + const SessionOptions& options) { + static RunHandlerPool* pool = + new RunHandlerPool(NumInterOpThreadsFromSessionOptions(options)); + return pool; +} + +bool DirectSession::ShouldUseRunHandlerPool() const { + if (options_.config.session_inter_op_thread_pool_size() > 0 || + options_.config.use_per_session_threads()) { + return false; + } + return true; +} + DirectSession::DirectSession(const SessionOptions& options, const DeviceMgr* device_mgr, DirectSessionFactory* const factory) @@ -582,16 +598,37 @@ Status DirectSession::RunInternal(int64 step_id, const RunOptions& run_options, } } - Executor::Args::Runner default_runner = [this, - pool](Executor::Args::Closure c) { - SchedClosure(pool, std::move(c)); - }; + std::unique_ptr<RunHandler> handler; + if (ShouldUseRunHandlerPool() && + run_options.experimental().use_run_handler_pool()) { + // Non-null only when a global inter-op pool is used. + VLOG(1) << "Using RunHandler to scheduler inter-op closures."; + handler = GetOrCreateRunHandlerPool(options_)->Get(); + } + auto* handler_ptr = handler.get(); + + Executor::Args::Runner default_runner = nullptr; + + if (pool == nullptr) { + default_runner = [](Executor::Args::Closure c) { c(); }; + } else if (handler_ptr != nullptr) { + default_runner = [handler_ptr](Executor::Args::Closure c) { + handler_ptr->ScheduleInterOpClosure(std::move(c)); + }; + } else { + default_runner = [this, pool](Executor::Args::Closure c) { + SchedClosure(pool, std::move(c)); + }; + } + for (const auto& item : executors_and_keys->items) { - // TODO(zhengxq): support partial run. - // TODO(zhengxq): if the device picks its own threadpool, we need to assign + // TODO(azaks): support partial run. + // TODO(azaks): if the device picks its own threadpool, we need to assign // less threads to the main compute pool by default. thread::ThreadPool* device_thread_pool = item.device->tensorflow_device_thread_pool(); + // TODO(crk): Investigate usage of RunHandlerPool when using device specific + // thread pool(s). if (!device_thread_pool) { args.runner = default_runner; } else { diff --git a/tensorflow/core/common_runtime/direct_session.h b/tensorflow/core/common_runtime/direct_session.h index 4a6a921ea7..3a168bbe3f 100644 --- a/tensorflow/core/common_runtime/direct_session.h +++ b/tensorflow/core/common_runtime/direct_session.h @@ -247,6 +247,9 @@ class DirectSession : public Session { ExecutorsAndKeys* executors_and_keys, RunMetadata* run_metadata); + // Returns whether inter-op execution uses a global pool. + bool ShouldUseRunHandlerPool() const; + ::tensorflow::Status ExtendLocked(const GraphDef& graph) EXCLUSIVE_LOCKS_REQUIRED(graph_state_lock_); diff --git a/tensorflow/core/common_runtime/direct_session_test.cc b/tensorflow/core/common_runtime/direct_session_test.cc index 65e816c202..e3e431f800 100644 --- a/tensorflow/core/common_runtime/direct_session_test.cc +++ b/tensorflow/core/common_runtime/direct_session_test.cc @@ -625,6 +625,34 @@ TEST_F(DirectSessionMinusAXTest, RunSimpleNetworkWithOpts_Callable) { EXPECT_EQ(run_metadata.step_stats().dev_stats_size(), 2); } +TEST_F(DirectSessionMinusAXTest, UseRunHandlerPool) { + Initialize({3, 2, -1, 0}); + auto session = CreateSession(); + ASSERT_TRUE(session != nullptr); + TF_ASSERT_OK(session->Create(def_)); + std::vector<std::pair<string, Tensor>> inputs; + + // Request two targets: one fetch output and one non-fetched output. + std::vector<string> output_names = {y_ + ":0"}; + std::vector<string> target_nodes = {y_neg_}; + std::vector<Tensor> outputs; + + // Prepares RunOptions and RunMetadata + RunOptions run_options; + run_options.mutable_experimental()->set_use_run_handler_pool(true); + + Status s = session->Run(run_options, inputs, output_names, target_nodes, + &outputs, nullptr); + TF_ASSERT_OK(s); + + ASSERT_EQ(1, outputs.size()); + // The first output should be initialized and have the correct + // output. + auto mat = outputs[0].matrix<float>(); + ASSERT_TRUE(outputs[0].IsInitialized()); + EXPECT_FLOAT_EQ(5.0, mat(0, 0)); +} + TEST(DirectSessionTest, KeepsStateAcrossRunsOfSession) { GraphDef def; Graph g(OpRegistry::Global()); |