diff options
author | 2018-08-10 14:30:28 -0700 | |
---|---|---|
committer | 2018-08-10 14:30:28 -0700 | |
commit | 88df15fb4bc999825605f114d17f8baea983f979 (patch) | |
tree | 5df32451b04ab82c1c3d6ee9c4a2fba2411e64f2 /tensorflow/core/distributed_runtime | |
parent | abf2f27b264603d3fc1c6e8c35789e20a26c0b7e (diff) | |
parent | 7d2872f91d454a45a84162cc0e1e881857158730 (diff) |
Merge pull request #20549 from naurril:bug-fix-grpc-server
PiperOrigin-RevId: 208266944
Diffstat (limited to 'tensorflow/core/distributed_runtime')
-rw-r--r-- | tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc | 45 |
1 files changed, 24 insertions, 21 deletions
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc index 8a6903be9e..bcd46a4c06 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc @@ -120,27 +120,8 @@ Status GrpcServer::Init( master_env_.env = env_; worker_env_.env = env_; - SessionOptions sess_opts; - ConfigProto config = server_def_.default_session_config(); - sess_opts.config = config; - - // Configure shared devices between master and worker. - string name_prefix = - strings::StrCat("/job:", server_def_.job_name(), "/replica:0", - "/task:", server_def_.task_index()); - TF_RETURN_IF_ERROR(DeviceFactory::AddDevices(sess_opts, name_prefix, - &master_env_.local_devices)); - worker_env_.local_devices = master_env_.local_devices; - worker_env_.device_mgr = new DeviceMgr(worker_env_.local_devices); - worker_env_.rendezvous_mgr = rendezvous_mgr_func == nullptr - ? new RpcRendezvousMgr(&worker_env_) - : rendezvous_mgr_func(&worker_env_); - string unused; - string default_worker_name; - if (!DeviceNameUtils::SplitDeviceName(master_env_.local_devices[0]->name(), - &default_worker_name, &unused)) { - return errors::Internal("Could not parse worker name."); - } + // Check parameters before DeviceFactory::AddDevices, + // otherwise if 'task_index=-1' the program will abort. // Look up the port that has been requested for this task in `server_def_`. int requested_port = -1; @@ -167,6 +148,28 @@ Status GrpcServer::Init( "\" was not defined in cluster"); } + SessionOptions sess_opts; + ConfigProto config = server_def_.default_session_config(); + sess_opts.config = config; + + // Configure shared devices between master and worker. + string name_prefix = + strings::StrCat("/job:", server_def_.job_name(), "/replica:0", + "/task:", server_def_.task_index()); + TF_RETURN_IF_ERROR(DeviceFactory::AddDevices(sess_opts, name_prefix, + &master_env_.local_devices)); + worker_env_.local_devices = master_env_.local_devices; + worker_env_.device_mgr = new DeviceMgr(worker_env_.local_devices); + worker_env_.rendezvous_mgr = rendezvous_mgr_func == nullptr + ? new RpcRendezvousMgr(&worker_env_) + : rendezvous_mgr_func(&worker_env_); + string unused; + string default_worker_name; + if (!DeviceNameUtils::SplitDeviceName(master_env_.local_devices[0]->name(), + &default_worker_name, &unused)) { + return errors::Internal("Could not parse worker name."); + } + // N.B. The order of initialization here is intricate, because we // wish to allow `requested_port == 0` (for choosing any port, // mostly for testing). Therefore, the construction of the channel |