aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Derek Murray <mrry@google.com>2016-06-07 09:03:47 -0800
committerGravatar Derek Murray <mrry@google.com>2016-06-15 14:45:16 -0700
commit38983397bc5151bcafee5b5dfbdc11ad12adc8d9 (patch)
treeded4a5c70a51fde324a0c1c8bbef69b202091f8d
parentea00daa3945d10bbce59e65164acc9d40f17f9a5 (diff)
Ensure that all RunManyGraphs calls complete before returning from cancellation.
This fixes a potential race condition and segfault, where the response buffer could be deallocated with an RPC in flight. This would happen if a step was cancelled before its master blocked on the RunGraph RPCs. Change: 124255996
-rw-r--r--tensorflow/core/distributed_runtime/master_session.cc8
1 files changed, 6 insertions, 2 deletions
diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc
index 9231fae362..870970b7ca 100644
--- a/tensorflow/core/distributed_runtime/master_session.cc
+++ b/tensorflow/core/distributed_runtime/master_session.cc
@@ -569,11 +569,15 @@ Status MasterSession::ReffedClientGraph::RunPartitions(
bool success =
cm->RegisterCallback(token, [&calls]() { calls.StartCancel(); });
if (!success) {
- return errors::Cancelled("Step was cancelled");
+ calls.StartCancel();
}
calls.Wait();
- cm->DeregisterCallback(token);
call_opts->ClearCancelCallback();
+ if (success) {
+ cm->DeregisterCallback(token);
+ } else {
+ return errors::Cancelled("Step was cancelled");
+ }
// Collects fetches.
Status status = calls.status();