aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/util/stat_summarizer.cc
diff options
context:
space:
mode:
authorGravatar Asim Shankar <ashankar@google.com>2017-01-30 21:09:16 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-01-30 21:34:48 -0800
commit8691453b900aef43bf4a3966a5a96b14fbb7bfc3 (patch)
tree50f653d76cda5ca6d1daaa258ed39f52bcd5c571 /tensorflow/core/util/stat_summarizer.cc
parent643fc7712fcedd1252eae855e4e89fc642cf0426 (diff)
StatSummarizer: Make it work without needing the GraphDef.
This will allow the StatSummarizer to be instantiated and used even when the GraphDef is not easily accessible. A consequence of this is that the BY_DEFINITION_ORDER ordering of stats is no longer available, but that was deemed acceptable for this change. Other notables: - Added a basic C++ unittest for stat_summarizer. - Added some commentary about caveats about summaries over runs that involve GPUs or partitioned graphs. These caveats existed in the prior implementation as well. Change: 146076563
Diffstat (limited to 'tensorflow/core/util/stat_summarizer.cc')
-rw-r--r--tensorflow/core/util/stat_summarizer.cc87
1 files changed, 51 insertions, 36 deletions
diff --git a/tensorflow/core/util/stat_summarizer.cc b/tensorflow/core/util/stat_summarizer.cc
index 96f0cb0234..9a72f25ad5 100644
--- a/tensorflow/core/util/stat_summarizer.cc
+++ b/tensorflow/core/util/stat_summarizer.cc
@@ -21,7 +21,6 @@ limitations under the License.
#include <sstream>
#include <string>
-#include "tensorflow/core/framework/graph.pb.h"
#include "tensorflow/core/framework/step_stats.pb.h"
#include "tensorflow/core/platform/env.h"
#include "tensorflow/core/platform/logging.h"
@@ -29,19 +28,15 @@ limitations under the License.
namespace tensorflow {
+StatSummarizer::StatSummarizer(const StatSummarizerOptions& options)
+ : options_(options) {}
+
StatSummarizer::StatSummarizer(const tensorflow::GraphDef& tensorflow_graph)
- : StatSummarizer(tensorflow_graph, StatSummarizerOptions()) {}
+ : StatSummarizer(StatSummarizerOptions()) {}
StatSummarizer::StatSummarizer(const tensorflow::GraphDef& tensorflow_graph,
const StatSummarizerOptions& options)
- : options_(options) {
- LOG(INFO) << "StatSummarizer found " << tensorflow_graph.node_size()
- << " nodes";
- for (const auto& node : tensorflow_graph.node()) {
- nodes_in_def_order_.push_back(node.name());
- node_types_[node.name()] = node.op();
- }
-}
+ : StatSummarizer(StatSummarizerOptions()) {}
void StatSummarizer::Validate(const Detail* detail,
const NodeExecStats& ns) const {
@@ -78,6 +73,47 @@ void StatSummarizer::Validate(const Detail* detail,
}
}
+namespace {
+std::string OpType(const DeviceStepStats& ds, const NodeExecStats& ns) {
+ // There is no published specification of how DeviceStats and NodeStats
+ // are filled in. Thus, we live with the fragility of this implementation.
+ //
+ // Note that NodeStats.node_name may NOT refer to a node in the Graph.
+ // This can happen if, either:
+ // (1) The DeviceStats corresponds to statistics from the GPUTracer
+ // logging (which adds devices whose name contains either "/stream"
+ // or "/memcpy" to the StepStats), OR
+ // (2) The graph was partitioned, and thus the NodeStats refers to
+ // the SendTensor or RecvTensor operations added.
+ // For these cases, return "<>" as the "type" of the operation.
+ //
+ // The StatSummarizer was initially aimed at CPU execution on mobile, where
+ // there was no GPUTracing and no graph partitioning, so the conditions above
+ // do not occur.
+ //
+ // It would be nice to have a clearer spec for StepStats so utilities such as
+ // this class can handle nodes that do not appear in the original graph
+ // gracefully. Till then, duplicate what is done by:
+ // https://www.tensorflow.org/code/tensorflow/python/client/timeline.py
+ // and rely on the unittest.
+ if (ds.device().find("/stream") != std::string::npos ||
+ ds.device().find("/memcpy") != std::string::npos) {
+ // Stats from the GPUTracer, does not correspond to TensorFlow ops.
+ return "<>";
+ }
+ // timeline_label should be of the format: <node_name> = <op_type>(<args>)
+ // Extract <op_type>.
+ const std::string sep(" = ");
+ const std::string& label = ns.timeline_label();
+ std::string::size_type start = label.find(sep);
+ if (start == std::string::npos) return "<>";
+ start += sep.size();
+ std::string::size_type end = label.find("(", start);
+ if (end == std::string::npos) return "<>";
+ return label.substr(start, end - start);
+}
+} // namespace
+
void StatSummarizer::ProcessStepStats(const StepStats& step_stats) {
int64 curr_total_us = 0;
int64 mem_total = 0;
@@ -100,11 +136,7 @@ void StatSummarizer::ProcessStepStats(const StepStats& step_stats) {
// If this is the first pass, initialize some values.
if (result.second) {
detail->name = ns.node_name();
-
- auto node_type_it = node_types_.find(detail->name);
- if (node_type_it != node_types_.end()) {
- detail->type = node_type_it->second;
- }
+ detail->type = OpType(ds, ns);
detail->run_order = node_num;
@@ -146,8 +178,7 @@ std::string StatSummarizer::ShortSummary() const {
memory_.OutputToStream(&stream);
stream << std::endl;
- stream << node_types_.size() << " nodes defined " << details_.size()
- << " nodes observed" << std::endl;
+ stream << details_.size() << " nodes observed" << std::endl;
return stream.str();
}
@@ -200,7 +231,7 @@ std::string StatSummarizer::ColumnString(const Detail& detail,
void StatSummarizer::OrderNodesByMetric(
SortingMetric metric, std::vector<const Detail*>* details) const {
std::priority_queue<std::pair<string, const Detail*>> sorted_list;
- const int num_nodes = nodes_in_def_order_.size();
+ const int num_nodes = details_.size();
for (const auto& det : details_) {
const Detail* detail = &(det.second);
@@ -208,20 +239,10 @@ void StatSummarizer::OrderNodesByMetric(
stream << std::setw(20) << std::right << std::setprecision(10)
<< std::fixed;
- int definition_index = 0;
- auto it = std::find(nodes_in_def_order_.begin(), nodes_in_def_order_.end(),
- detail->name);
- if (it != nodes_in_def_order_.end()) {
- definition_index = std::distance(nodes_in_def_order_.begin(), it);
- }
-
switch (metric) {
case BY_NAME:
stream << detail->name;
break;
- case BY_DEFINITION_ORDER:
- stream << num_nodes - definition_index;
- break;
case BY_RUN_ORDER:
stream << num_nodes - detail->run_order;
break;
@@ -264,8 +285,7 @@ std::string StatSummarizer::GetStatsByNodeType() const {
int64 num_processed = 0;
- LOG(INFO) << "nodes_in_def_order_ size: " << nodes_in_def_order_.size();
- LOG(INFO) << "timing_details_ size: " << details_.size();
+ LOG(INFO) << "Number of nodes executed: " << details_.size();
for (const auto& det : details_) {
const string node_name = det.first;
const Detail& detail = det.second;
@@ -277,12 +297,7 @@ std::string StatSummarizer::GetStatsByNodeType() const {
int64 curr_memory_val = detail.mem_used.newest();
accumulated_bytes += curr_memory_val;
- string node_type = "<>";
-
- auto node_type_it = node_types_.find(node_name);
- if (node_type_it != node_types_.end()) {
- node_type = node_type_it->second;
- }
+ const string& node_type = detail.type;
node_type_map_count[node_type] += 1;
node_type_map_time[node_type] += curr_time_val;