diff options
author | 2017-05-25 18:52:09 -0700 | |
---|---|---|
committer | 2017-05-25 18:55:34 -0700 | |
commit | 258b5073203573fd9a33fd9f0b133289fb22a2d0 (patch) | |
tree | a03885c14e083f584d113a84bf61d4d6f08ac93d /tensorflow/core/debug/debug_io_utils.h | |
parent | 2251633a509d6807d309bff60013c86650bb891d (diff) |
tfdbg: add device name to deubg ops
This CL also forms a basis for solving the issue that data
from multiple GPUs do not work with file:// debug URLs. In a
later CL, the debug tensors will be saved in separate directory
trees for different devices (e.g., GPUs).
PiperOrigin-RevId: 157180359
Diffstat (limited to 'tensorflow/core/debug/debug_io_utils.h')
-rw-r--r-- | tensorflow/core/debug/debug_io_utils.h | 56 |
1 files changed, 30 insertions, 26 deletions
diff --git a/tensorflow/core/debug/debug_io_utils.h b/tensorflow/core/debug/debug_io_utils.h index e6118cb15d..f3e76cc0ee 100644 --- a/tensorflow/core/debug/debug_io_utils.h +++ b/tensorflow/core/debug/debug_io_utils.h @@ -40,11 +40,22 @@ struct DebugWatchAndURLSpec { const bool gated_grpc; }; +struct DebugNodeKey { + DebugNodeKey(const string& device_name, const string& node_name, + const int32 output_slot, const string& debug_op); + + const string device_name; + const string node_name; + const int32 output_slot; + const string debug_op; + const string debug_node_name; +}; + class DebugIO { public: static Status PublishDebugMetadata( - const int64 global_step, const int64 session_run_count, - const int64 executor_step_count, const std::vector<string>& input_names, + const int64 global_step, const int64 session_run_index, + const int64 executor_step_index, const std::vector<string>& input_names, const std::vector<string>& output_names, const std::vector<string>& target_nodes, const std::unordered_set<string>& debug_urls); @@ -52,25 +63,21 @@ class DebugIO { // Publish a tensor to a debug target URL. // // Args: - // tensor_name: Name of the tensor being published: node_name followed by - // a colon, followed by the output slot index. E.g., "node_a:0". - // N.B.: Use the original tensor name, i.e., name of the input tensor to - // the debug op, even if the debug_op is not DebugIdentity. - // debug_op: Name of the debug op, e.g., "DebugIdentity". + // debug_node_key: A DebugNodeKey identifying the debug node. // tensor: The Tensor object being published. // wall_time_us: Time stamp for the Tensor. Unit: microseconds (us). // debug_urls: An array of debug target URLs, e.g., // "file:///foo/tfdbg_dump", "grpc://localhost:11011" // gated_grpc: Whether this call is subject to gRPC gating. - static Status PublishDebugTensor(const string& tensor_name, - const string& debug_op, const Tensor& tensor, + static Status PublishDebugTensor(const DebugNodeKey& debug_node_key, + const Tensor& tensor, const uint64 wall_time_us, const gtl::ArraySlice<string>& debug_urls, const bool gated_grpc); // Convenience overload of the method above for no gated_grpc by default. - static Status PublishDebugTensor(const string& tensor_name, - const string& debug_op, const Tensor& tensor, + static Status PublishDebugTensor(const DebugNodeKey& debug_node_key, + const Tensor& tensor, const uint64 wall_time_us, const gtl::ArraySlice<string>& debug_urls); @@ -150,16 +157,12 @@ class DebugFileIO { // /tmp/tfdbg_dump/foo/bar_0_DebugIdentity_1467891234512345. // // Args: - // node_name: Name of the node from which the tensor is output. - // output_slot: Output slot index. - // debug_op: Name of the debug op, e.g., "DebugIdentity". - // tensor: The Tensor object to be dumped to file. + // debug_node_key: A DebugNodeKey identifying the debug node. // wall_time_us: Wall time at which the Tensor is generated during graph // execution. Unit: microseconds (us). // dump_root_dir: Root directory for dumping the tensor. // dump_file_path: The actual dump file path (passed as reference). - static Status DumpTensorToDir(const string& node_name, - const int32 output_slot, const string& debug_op, + static Status DumpTensorToDir(const DebugNodeKey& debug_node_key, const Tensor& tensor, const uint64 wall_time_us, const string& dump_root_dir, string* dump_file_path); @@ -174,8 +177,7 @@ class DebugFileIO { // debug_op: Name of the debug op, e.g., DebugIdentity. // wall_time_us: Time stamp of the dumped tensor, in microseconds (us). static string GetDumpFilePath(const string& dump_root_dir, - const string& node_name, - const int32 output_slot, const string& debug_op, + const DebugNodeKey& debug_node_key, const uint64 wall_time_us); static Status DumpEventProtoToFile(const Event& event_proto, @@ -184,9 +186,10 @@ class DebugFileIO { private: // Encapsulate the Tensor in an Event protobuf and write it to file. - static Status DumpTensorToEventFile( - const string& node_name, const int32 output_slot, const string& debug_op, - const Tensor& tensor, const uint64 wall_time_us, const string& file_path); + static Status DumpTensorToEventFile(const DebugNodeKey& debug_node_key, + const Tensor& tensor, + const uint64 wall_time_us, + const string& file_path); // Implemented ad hoc here for now. // TODO(cais): Replace with shared implementation once http://b/30497715 is @@ -256,10 +259,11 @@ class DebugGrpcChannel { class DebugGrpcIO { public: // Send a tensor through a debug gRPC stream. - static Status SendTensorThroughGrpcStream( - const string& node_name, const int32 output_slot, const string& debug_op, - const Tensor& tensor, const uint64 wall_time_us, - const string& grpc_stream_url, const bool gated); + static Status SendTensorThroughGrpcStream(const DebugNodeKey& debug_node_key, + const Tensor& tensor, + const uint64 wall_time_us, + const string& grpc_stream_url, + const bool gated); // Send an Event proto through a debug gRPC stream. // Thread-safety: Safe with respect to other calls to the same method and |