tfdbg: add device name to deubg ops

This CL also forms a basis for solving the issue that data from multiple GPUs do not work with file:// debug URLs. In a later CL, the debug tensors will be saved in separate directory trees for different devices (e.g., GPUs). PiperOrigin-RevId: 157180359
author: Shanqing Cai <cais@google.com> 2017-05-25 18:52:09 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-05-25 18:55:34 -0700
commit: 258b5073203573fd9a33fd9f0b133289fb22a2d0 (patch)
tree: a03885c14e083f584d113a84bf61d4d6f08ac93d /tensorflow/core/debug/debug_io_utils.h
parent: 2251633a509d6807d309bff60013c86650bb891d (diff)
1 files changed, 30 insertions, 26 deletions
diff --git a/tensorflow/core/debug/debug_io_utils.h b/tensorflow/core/debug/debug_io_utils.h
index e6118cb15d..f3e76cc0ee 100644
--- a/tensorflow/core/debug/debug_io_utils.h
+++ b/tensorflow/core/debug/debug_io_utils.h
@@ -40,11 +40,22 @@ struct DebugWatchAndURLSpec {
   const bool gated_grpc;
 };
 
+struct DebugNodeKey {
+  DebugNodeKey(const string& device_name, const string& node_name,
+               const int32 output_slot, const string& debug_op);
+
+  const string device_name;
+  const string node_name;
+  const int32 output_slot;
+  const string debug_op;
+  const string debug_node_name;
+};
+
 class DebugIO {
  public:
   static Status PublishDebugMetadata(
-      const int64 global_step, const int64 session_run_count,
-      const int64 executor_step_count, const std::vector<string>& input_names,
+      const int64 global_step, const int64 session_run_index,
+      const int64 executor_step_index, const std::vector<string>& input_names,
       const std::vector<string>& output_names,
       const std::vector<string>& target_nodes,
       const std::unordered_set<string>& debug_urls);
@@ -52,25 +63,21 @@ class DebugIO {
   // Publish a tensor to a debug target URL.
   //
   // Args:
-  //   tensor_name: Name of the tensor being published: node_name followed by
-  //     a colon, followed by the output slot index. E.g., "node_a:0".
-  //     N.B.: Use the original tensor name, i.e., name of the input tensor to
-  //     the debug op, even if the debug_op is not DebugIdentity.
-  //   debug_op: Name of the debug op, e.g., "DebugIdentity".
+  //   debug_node_key: A DebugNodeKey identifying the debug node.
   //   tensor: The Tensor object being published.
   //   wall_time_us: Time stamp for the Tensor. Unit: microseconds (us).
   //   debug_urls: An array of debug target URLs, e.g.,
   //     "file:///foo/tfdbg_dump", "grpc://localhost:11011"
   //   gated_grpc: Whether this call is subject to gRPC gating.
-  static Status PublishDebugTensor(const string& tensor_name,
-                                   const string& debug_op, const Tensor& tensor,
+  static Status PublishDebugTensor(const DebugNodeKey& debug_node_key,
+                                   const Tensor& tensor,
                                    const uint64 wall_time_us,
                                    const gtl::ArraySlice<string>& debug_urls,
                                    const bool gated_grpc);
 
   // Convenience overload of the method above for no gated_grpc by default.
-  static Status PublishDebugTensor(const string& tensor_name,
-                                   const string& debug_op, const Tensor& tensor,
+  static Status PublishDebugTensor(const DebugNodeKey& debug_node_key,
+                                   const Tensor& tensor,
                                    const uint64 wall_time_us,
                                    const gtl::ArraySlice<string>& debug_urls);
 
@@ -150,16 +157,12 @@ class DebugFileIO {
   //   /tmp/tfdbg_dump/foo/bar_0_DebugIdentity_1467891234512345.
   //
   // Args:
-  //   node_name: Name of the node from which the tensor is output.
-  //   output_slot: Output slot index.
-  //   debug_op: Name of the debug op, e.g., "DebugIdentity".
-  //   tensor: The Tensor object to be dumped to file.
+  //   debug_node_key: A DebugNodeKey identifying the debug node.
   //   wall_time_us: Wall time at which the Tensor is generated during graph
   //     execution. Unit: microseconds (us).
   //   dump_root_dir: Root directory for dumping the tensor.
   //   dump_file_path: The actual dump file path (passed as reference).
-  static Status DumpTensorToDir(const string& node_name,
-                                const int32 output_slot, const string& debug_op,
+  static Status DumpTensorToDir(const DebugNodeKey& debug_node_key,
                                 const Tensor& tensor, const uint64 wall_time_us,
                                 const string& dump_root_dir,
                                 string* dump_file_path);
@@ -174,8 +177,7 @@ class DebugFileIO {
   //   debug_op: Name of the debug op, e.g., DebugIdentity.
   //   wall_time_us: Time stamp of the dumped tensor, in microseconds (us).
   static string GetDumpFilePath(const string& dump_root_dir,
-                                const string& node_name,
-                                const int32 output_slot, const string& debug_op,
+                                const DebugNodeKey& debug_node_key,
                                 const uint64 wall_time_us);
 
   static Status DumpEventProtoToFile(const Event& event_proto,
@@ -184,9 +186,10 @@ class DebugFileIO {
 
  private:
   // Encapsulate the Tensor in an Event protobuf and write it to file.
-  static Status DumpTensorToEventFile(
-      const string& node_name, const int32 output_slot, const string& debug_op,
-      const Tensor& tensor, const uint64 wall_time_us, const string& file_path);
+  static Status DumpTensorToEventFile(const DebugNodeKey& debug_node_key,
+                                      const Tensor& tensor,
+                                      const uint64 wall_time_us,
+                                      const string& file_path);
 
   // Implemented ad hoc here for now.
   // TODO(cais): Replace with shared implementation once http://b/30497715 is
@@ -256,10 +259,11 @@ class DebugGrpcChannel {
 class DebugGrpcIO {
  public:
   // Send a tensor through a debug gRPC stream.
-  static Status SendTensorThroughGrpcStream(
-      const string& node_name, const int32 output_slot, const string& debug_op,
-      const Tensor& tensor, const uint64 wall_time_us,
-      const string& grpc_stream_url, const bool gated);
+  static Status SendTensorThroughGrpcStream(const DebugNodeKey& debug_node_key,
+                                            const Tensor& tensor,
+                                            const uint64 wall_time_us,
+                                            const string& grpc_stream_url,
+                                            const bool gated);
 
   // Send an Event proto through a debug gRPC stream.
   // Thread-safety: Safe with respect to other calls to the same method and
author	Shanqing Cai <cais@google.com>	2017-05-25 18:52:09 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-05-25 18:55:34 -0700
commit	258b5073203573fd9a33fd9f0b133289fb22a2d0 (patch)
tree	a03885c14e083f584d113a84bf61d4d6f08ac93d /tensorflow/core/debug/debug_io_utils.h
parent	2251633a509d6807d309bff60013c86650bb891d (diff)