Consolidate worker state behind a session-centric abstraction.

State in workers is currently splayed across graph_mgr, rendezvous_mgr, and additional components. This has resulted in it being difficult to ensure proper cleanup and shut down of the worker components. In addition to paving the way for a more reliable shut down, this CL also sets up the beginnings of ClusterSpec propagation. ClusterSpec propagation is a capability upgrade for TensorFlow that should make it much easier to (1) build distributed TensorFlow clusters, and (2) handle node failures. After the ClusterSpec propagation capability is fully implemented, the TensorFlow workers can be booted independently of each other, and with no knowledge about others. A client can then query a central cluster scheduler or other API to find all of the workers, and then send the ClusterDef (ClusterSpec) to the TF master, which then propagates that along to all of the workers. This change is only the first of a sequence to fully implement ClusterSpec propagation in TensorFlow. Change: 151229111
author: Brennan Saeta <saeta@google.com> 2017-03-25 11:17:23 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-03-25 12:33:30 -0700
commit: 396b6bd1af7bd5a9295b13f30c5ed34e7de42daa (patch)
tree: 0d27b7da7f8ec1386526b7c594ea00bd041b93b0 /tensorflow/core/distributed_runtime/worker.h
parent: 0612f369876d2991008de418e78011b71040a807 (diff)
1 files changed, 7 insertions, 1 deletions
diff --git a/tensorflow/core/distributed_runtime/worker.h b/tensorflow/core/distributed_runtime/worker.h
index 6d1c8e3b00..290fc6de95 100644
--- a/tensorflow/core/distributed_runtime/worker.h
+++ b/tensorflow/core/distributed_runtime/worker.h
@@ -19,13 +19,15 @@ limitations under the License.
 #include <unordered_map>
 
 #include "tensorflow/core/distributed_runtime/graph_mgr.h"
+#include "tensorflow/core/distributed_runtime/session_mgr.h"
 #include "tensorflow/core/distributed_runtime/worker_interface.h"
 
 namespace tensorflow {
 
 class CancellationManager;
 class Device;
-class WorkerEnv;
+struct WorkerEnv;
+struct WorkerSession;
 
 // A TensorFlow Worker runs registered graphs and supports worker-to-worker
 // Tensor transfer.
@@ -45,6 +47,10 @@ class Worker : public WorkerInterface {
                       GetStatusResponse* response,
                       StatusCallback done) override;
 
+  void CreateWorkerSessionAsync(const CreateWorkerSessionRequest* request,
+                                CreateWorkerSessionResponse* response,
+                                StatusCallback done) override;
+
   void RegisterGraphAsync(const RegisterGraphRequest* request,
                           RegisterGraphResponse* response,
                           StatusCallback done) override;
author	Brennan Saeta <saeta@google.com>	2017-03-25 11:17:23 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-03-25 12:33:30 -0700
commit	396b6bd1af7bd5a9295b13f30c5ed34e7de42daa (patch)
tree	0d27b7da7f8ec1386526b7c594ea00bd041b93b0 /tensorflow/core/distributed_runtime/worker.h
parent	0612f369876d2991008de418e78011b71040a807 (diff)