diff options
author | 2017-01-30 21:09:16 -0800 | |
---|---|---|
committer | 2017-01-30 21:34:48 -0800 | |
commit | 8691453b900aef43bf4a3966a5a96b14fbb7bfc3 (patch) | |
tree | 50f653d76cda5ca6d1daaa258ed39f52bcd5c571 /tensorflow/core/util/stat_summarizer.cc | |
parent | 643fc7712fcedd1252eae855e4e89fc642cf0426 (diff) |
StatSummarizer: Make it work without needing the GraphDef.
This will allow the StatSummarizer to be instantiated and used
even when the GraphDef is not easily accessible. A consequence of
this is that the BY_DEFINITION_ORDER ordering of stats is no
longer available, but that was deemed acceptable for this change.
Other notables:
- Added a basic C++ unittest for stat_summarizer.
- Added some commentary about caveats about summaries over runs
that involve GPUs or partitioned graphs. These caveats existed
in the prior implementation as well.
Change: 146076563
Diffstat (limited to 'tensorflow/core/util/stat_summarizer.cc')
-rw-r--r-- | tensorflow/core/util/stat_summarizer.cc | 87 |
1 files changed, 51 insertions, 36 deletions
diff --git a/tensorflow/core/util/stat_summarizer.cc b/tensorflow/core/util/stat_summarizer.cc index 96f0cb0234..9a72f25ad5 100644 --- a/tensorflow/core/util/stat_summarizer.cc +++ b/tensorflow/core/util/stat_summarizer.cc @@ -21,7 +21,6 @@ limitations under the License. #include <sstream> #include <string> -#include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/framework/step_stats.pb.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/logging.h" @@ -29,19 +28,15 @@ limitations under the License. namespace tensorflow { +StatSummarizer::StatSummarizer(const StatSummarizerOptions& options) + : options_(options) {} + StatSummarizer::StatSummarizer(const tensorflow::GraphDef& tensorflow_graph) - : StatSummarizer(tensorflow_graph, StatSummarizerOptions()) {} + : StatSummarizer(StatSummarizerOptions()) {} StatSummarizer::StatSummarizer(const tensorflow::GraphDef& tensorflow_graph, const StatSummarizerOptions& options) - : options_(options) { - LOG(INFO) << "StatSummarizer found " << tensorflow_graph.node_size() - << " nodes"; - for (const auto& node : tensorflow_graph.node()) { - nodes_in_def_order_.push_back(node.name()); - node_types_[node.name()] = node.op(); - } -} + : StatSummarizer(StatSummarizerOptions()) {} void StatSummarizer::Validate(const Detail* detail, const NodeExecStats& ns) const { @@ -78,6 +73,47 @@ void StatSummarizer::Validate(const Detail* detail, } } +namespace { +std::string OpType(const DeviceStepStats& ds, const NodeExecStats& ns) { + // There is no published specification of how DeviceStats and NodeStats + // are filled in. Thus, we live with the fragility of this implementation. + // + // Note that NodeStats.node_name may NOT refer to a node in the Graph. + // This can happen if, either: + // (1) The DeviceStats corresponds to statistics from the GPUTracer + // logging (which adds devices whose name contains either "/stream" + // or "/memcpy" to the StepStats), OR + // (2) The graph was partitioned, and thus the NodeStats refers to + // the SendTensor or RecvTensor operations added. + // For these cases, return "<>" as the "type" of the operation. + // + // The StatSummarizer was initially aimed at CPU execution on mobile, where + // there was no GPUTracing and no graph partitioning, so the conditions above + // do not occur. + // + // It would be nice to have a clearer spec for StepStats so utilities such as + // this class can handle nodes that do not appear in the original graph + // gracefully. Till then, duplicate what is done by: + // https://www.tensorflow.org/code/tensorflow/python/client/timeline.py + // and rely on the unittest. + if (ds.device().find("/stream") != std::string::npos || + ds.device().find("/memcpy") != std::string::npos) { + // Stats from the GPUTracer, does not correspond to TensorFlow ops. + return "<>"; + } + // timeline_label should be of the format: <node_name> = <op_type>(<args>) + // Extract <op_type>. + const std::string sep(" = "); + const std::string& label = ns.timeline_label(); + std::string::size_type start = label.find(sep); + if (start == std::string::npos) return "<>"; + start += sep.size(); + std::string::size_type end = label.find("(", start); + if (end == std::string::npos) return "<>"; + return label.substr(start, end - start); +} +} // namespace + void StatSummarizer::ProcessStepStats(const StepStats& step_stats) { int64 curr_total_us = 0; int64 mem_total = 0; @@ -100,11 +136,7 @@ void StatSummarizer::ProcessStepStats(const StepStats& step_stats) { // If this is the first pass, initialize some values. if (result.second) { detail->name = ns.node_name(); - - auto node_type_it = node_types_.find(detail->name); - if (node_type_it != node_types_.end()) { - detail->type = node_type_it->second; - } + detail->type = OpType(ds, ns); detail->run_order = node_num; @@ -146,8 +178,7 @@ std::string StatSummarizer::ShortSummary() const { memory_.OutputToStream(&stream); stream << std::endl; - stream << node_types_.size() << " nodes defined " << details_.size() - << " nodes observed" << std::endl; + stream << details_.size() << " nodes observed" << std::endl; return stream.str(); } @@ -200,7 +231,7 @@ std::string StatSummarizer::ColumnString(const Detail& detail, void StatSummarizer::OrderNodesByMetric( SortingMetric metric, std::vector<const Detail*>* details) const { std::priority_queue<std::pair<string, const Detail*>> sorted_list; - const int num_nodes = nodes_in_def_order_.size(); + const int num_nodes = details_.size(); for (const auto& det : details_) { const Detail* detail = &(det.second); @@ -208,20 +239,10 @@ void StatSummarizer::OrderNodesByMetric( stream << std::setw(20) << std::right << std::setprecision(10) << std::fixed; - int definition_index = 0; - auto it = std::find(nodes_in_def_order_.begin(), nodes_in_def_order_.end(), - detail->name); - if (it != nodes_in_def_order_.end()) { - definition_index = std::distance(nodes_in_def_order_.begin(), it); - } - switch (metric) { case BY_NAME: stream << detail->name; break; - case BY_DEFINITION_ORDER: - stream << num_nodes - definition_index; - break; case BY_RUN_ORDER: stream << num_nodes - detail->run_order; break; @@ -264,8 +285,7 @@ std::string StatSummarizer::GetStatsByNodeType() const { int64 num_processed = 0; - LOG(INFO) << "nodes_in_def_order_ size: " << nodes_in_def_order_.size(); - LOG(INFO) << "timing_details_ size: " << details_.size(); + LOG(INFO) << "Number of nodes executed: " << details_.size(); for (const auto& det : details_) { const string node_name = det.first; const Detail& detail = det.second; @@ -277,12 +297,7 @@ std::string StatSummarizer::GetStatsByNodeType() const { int64 curr_memory_val = detail.mem_used.newest(); accumulated_bytes += curr_memory_val; - string node_type = "<>"; - - auto node_type_it = node_types_.find(node_name); - if (node_type_it != node_types_.end()) { - node_type = node_type_it->second; - } + const string& node_type = detail.type; node_type_map_count[node_type] += 1; node_type_map_time[node_type] += curr_time_val; |