aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/common_runtime/direct_session.cc
diff options
context:
space:
mode:
authorGravatar Shanqing Cai <cais@google.com>2017-04-29 08:26:06 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-04-29 09:48:44 -0700
commita25509eda3e42dddc88155965eaffe4b3c455af5 (patch)
tree79bc1880238cd291c64123c2c241122da1c000b5 /tensorflow/core/common_runtime/direct_session.cc
parentea25bc496e30ecc1f90622906390669913d92e74 (diff)
Add TFDBG support to GrpcSession
* Along the way, unify the way the debugger works in DirectSession (non-distributed Sessions) and MasterSession (for distributed Sessions). * The SummarizDebugTensorWatches method is invoked in DirectSession::GetOrCreateExecutors() and MasterSession::HashBuildGraphOptions() method to generate keys for partition graphs and executors. * The DebugStateInterface::PublishDebugMetadata() method is used to send metadata about the debugged Session::Run() call to debug URLs. This happens in DirectSession::Run() and MasterSession::DoRunWithLocalExecution() respectively. * The DebugGraphDecoratorInterface::DecorateGraph() and DebugGraphDecoratorInterface::PublishGraph() methods are used to insert debug ops to the debugged graph and send the modified graph to debug URLs. This happens in DirectSession::GetOrCreateExecutors() and GraphMgr::InitItem(), respectively. Change: 154631802
Diffstat (limited to 'tensorflow/core/common_runtime/direct_session.cc')
-rw-r--r--tensorflow/core/common_runtime/direct_session.cc79
1 files changed, 58 insertions, 21 deletions
diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc
index f208e4b78e..7c017f9584 100644
--- a/tensorflow/core/common_runtime/direct_session.cc
+++ b/tensorflow/core/common_runtime/direct_session.cc
@@ -370,6 +370,43 @@ Status DirectSession::Run(const NamedTensorList& inputs,
&run_metadata);
}
+Status DirectSession::CreateDebuggerState(
+ const DebugOptions& debug_options, int64 session_run_count,
+ int64 executor_step_count, const std::vector<string>& input_names,
+ const std::vector<string>& output_names,
+ const std::vector<string>& target_names,
+ std::unique_ptr<DebuggerStateInterface>* debugger_state) {
+ std::unique_ptr<DebuggerStateInterface> state =
+ DebuggerStateRegistry::CreateState(debug_options);
+ if (!state) {
+ return errors::Internal(
+ "Debugger options are set, but creation of debugger state failed. "
+ "It appears that debugger is not linked in this TensorFlow build.");
+ }
+
+ TF_RETURN_IF_ERROR(state->PublishDebugMetadata(
+ debug_options.global_step(), session_run_count, executor_step_count,
+ input_names, output_names, target_names));
+
+ *debugger_state = std::move(state);
+ return Status::OK();
+}
+
+Status DirectSession::DecorateAndPublishGraphForDebug(
+ const DebugOptions& debug_options, Graph* graph, Device* device) {
+ std::unique_ptr<DebugGraphDecoratorInterface> decorator =
+ DebugGraphDecoratorRegistry::CreateDecorator(debug_options);
+ if (!decorator) {
+ return errors::Internal(
+ "Debugger options are set, but creation of debug graph publisher ",
+ "failed.");
+ }
+
+ TF_RETURN_IF_ERROR(decorator->DecorateGraph(graph, device));
+ TF_RETURN_IF_ERROR(decorator->PublishGraph(*graph));
+ return Status::OK();
+}
+
Status DirectSession::Run(const RunOptions& run_options,
const NamedTensorList& inputs,
const std::vector<string>& output_names,
@@ -402,27 +439,21 @@ Status DirectSession::Run(const RunOptions& run_options,
// Check if we already have an executor for these arguments.
ExecutorsAndKeys* executors_and_keys;
- RunStateArgs run_state_args;
+ RunStateArgs run_state_args(run_options.debug_options());
Executor::Args args;
args.step_id = step_id_counter_.fetch_add(1);
- // EXPERIMENTAL: Options that allow the client to insert nodes into partition
- // graphs for debugging.
- if (!run_options.debug_options().debug_tensor_watch_opts().empty()) {
- run_state_args.debugger_state =
- DebuggerStateRegistry::CreateState(run_options.debug_options());
- }
-
TF_RETURN_IF_ERROR(
GetOrCreateExecutors(pool, input_tensor_names, output_names, target_nodes,
&executors_and_keys, &run_state_args));
const int64 executor_step_count = executors_and_keys->step_count.fetch_add(1);
- if (run_state_args.debugger_state) {
- TF_RETURN_IF_ERROR(run_state_args.debugger_state->PublishDebugMetadata(
- run_options.debug_options().global_step(), args.step_id,
- executor_step_count, input_tensor_names, output_names, target_nodes));
+ std::unique_ptr<DebuggerStateInterface> debugger_state;
+ if (!run_options.debug_options().debug_tensor_watch_opts().empty()) {
+ TF_RETURN_IF_ERROR(CreateDebuggerState(
+ run_options.debug_options(), args.step_id, executor_step_count,
+ input_tensor_names, output_names, target_nodes, &debugger_state));
}
// Configure a call frame for the step, which we use to feed and
@@ -629,7 +660,9 @@ Status DirectSession::PRunSetup(const std::vector<string>& input_names,
// Check if we already have an executor for these arguments.
ExecutorsAndKeys* executors_and_keys;
- RunStateArgs run_state_args;
+ // TODO(cais): TFDBG support for partial runs.
+ DebugOptions debug_options;
+ RunStateArgs run_state_args(debug_options);
run_state_args.is_partial_run = true;
TF_RETURN_IF_ERROR(GetOrCreateExecutors(pool, input_names, output_names,
target_nodes, &executors_and_keys,
@@ -960,14 +993,15 @@ Status DirectSession::GetOrCreateExecutors(
thread::ThreadPool* pool, gtl::ArraySlice<string> inputs,
gtl::ArraySlice<string> outputs, gtl::ArraySlice<string> target_nodes,
ExecutorsAndKeys** executors_and_keys, RunStateArgs* run_state_args) {
- string debug_tensor_watches_summary;
int64 handle_name_counter_value = -1;
if (LogMemory::IsEnabled() || run_state_args->is_partial_run) {
handle_name_counter_value = handle_name_counter_.fetch_add(1);
}
- if (run_state_args->debugger_state) {
- debug_tensor_watches_summary =
- run_state_args->debugger_state->SummarizeDebugTensorWatches();
+
+ string debug_tensor_watches_summary;
+ if (!run_state_args->debug_options.debug_tensor_watch_opts().empty()) {
+ debug_tensor_watches_summary = SummarizeDebugTensorWatches(
+ run_state_args->debug_options.debug_tensor_watch_opts());
}
// Fast lookup path, no sorting.
@@ -1032,6 +1066,9 @@ Status DirectSession::GetOrCreateExecutors(
options.fetch_endpoints = outputs_sorted;
options.target_nodes = tn_sorted;
options.use_function_convention = !run_state_args->is_partial_run;
+ if (!run_state_args->debug_options.debug_tensor_watch_opts().empty()) {
+ options.debug_options = run_state_args->debug_options;
+ }
std::shared_ptr<ExecutorsAndKeys> ek(new ExecutorsAndKeys);
@@ -1107,10 +1144,10 @@ Status DirectSession::GetOrCreateExecutors(
optimizer.Optimize(lib, options_.env, device, &iter->second);
- // EXPERIMENTAL: tfdbg inserts debug nodes (i.e., probes) to the graph
- if (run_state_args->debugger_state) {
- TF_RETURN_IF_ERROR(run_state_args->debugger_state->DecorateGraphForDebug(
- partition_graph.get(), params.device));
+ // EXPERIMENTAL: tfdbg inserts debug nodes in the graph.
+ if (!options.debug_options.debug_tensor_watch_opts().empty()) {
+ TF_RETURN_IF_ERROR(DecorateAndPublishGraphForDebug(
+ options.debug_options, partition_graph.get(), params.device));
}
TF_RETURN_IF_ERROR(EnsureMemoryTypes(DeviceType(device->device_type()),