aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/util/stat_summarizer.cc
diff options
context:
space:
mode:
authorGravatar Andrew Harp <andrewharp@google.com>2016-12-13 22:32:46 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-12-13 22:43:54 -0800
commit9466e2c7b6cc7924ae02017686c60209679fb3c4 (patch)
tree45d55686601f25ca276536ef96d263823d212b8b /tensorflow/core/util/stat_summarizer.cc
parent9004714bc4de1c78735bfe8178b441a371df1762 (diff)
Refactor StatSummarizer: use Stats for more things, remove redundant methods, combine mem/time data structures, and overhaul sorting / output formatting.
Change: 141981032
Diffstat (limited to 'tensorflow/core/util/stat_summarizer.cc')
-rw-r--r--tensorflow/core/util/stat_summarizer.cc430
1 files changed, 175 insertions, 255 deletions
diff --git a/tensorflow/core/util/stat_summarizer.cc b/tensorflow/core/util/stat_summarizer.cc
index 65b7039b41..ebe7bebde7 100644
--- a/tensorflow/core/util/stat_summarizer.cc
+++ b/tensorflow/core/util/stat_summarizer.cc
@@ -74,26 +74,35 @@ void StatSummarizer::Validate(const Detail* detail,
}
void StatSummarizer::ProcessStepStats(const StepStats& step_stats) {
- int64 curr_total = 0;
+ int64 curr_total_us = 0;
int64 mem_total = 0;
- if (timing_details_.empty() && !step_stats.dev_stats().empty() &&
- !step_stats.dev_stats(0).node_stats().empty()) {
- first_node_start_micros_ =
- step_stats.dev_stats(0).node_stats(0).all_start_micros();
- }
+ int64 first_node_start_us =
+ step_stats.dev_stats(0).node_stats(0).all_start_micros();
+
+ int node_num = 0;
for (const auto& ds : step_stats.dev_stats()) {
- // timing stats
for (const auto& ns : ds.node_stats()) {
+ ++node_num;
const int64 curr_time = ns.all_end_rel_micros();
- curr_total += curr_time;
- auto result = timing_details_.emplace(ns.node_name(), Detail());
+ curr_total_us += curr_time;
+ auto result = details_.emplace(ns.node_name(), Detail());
Detail* detail = &(result.first->second);
+ detail->start_us.UpdateStat(ns.all_start_micros() - first_node_start_us);
+ detail->rel_end_us.UpdateStat(curr_time);
+
+ // If this is the first pass, initialize some values.
if (result.second) {
- detail->total = 0;
- detail->first_rel_end = curr_time;
- detail->first_start = ns.all_start_micros() - first_node_start_micros_;
+ detail->name = ns.node_name();
+
+ auto node_type_it = node_types_.find(detail->name);
+ if (node_type_it != node_types_.end()) {
+ detail->type = node_type_it->second;
+ }
+
+ detail->run_order = node_num;
+
detail->outputs.resize(ns.output_size());
for (const auto& output : ns.output()) {
const int32 slot = output.slot();
@@ -106,251 +115,143 @@ void StatSummarizer::ProcessStepStats(const StepStats& step_stats) {
}
}
- detail->total += curr_time;
- Validate(detail, ns);
- }
- run_total_micros_.UpdateStat(curr_total);
-
- // memory stats
- for (const auto& ns : ds.node_stats()) {
- auto mem_result = memory_details_.emplace(ns.node_name(), Detail());
- bool first = mem_result.second;
- auto& val = mem_result.first->second;
+ int64 curr_node_mem = 0;
for (const auto& mem : ns.memory()) {
const int64 mem_usage = mem.total_bytes();
- mem_total += mem_usage;
-
- if (first) {
- first = false;
- val.first_start = mem_total - mem_usage;
- val.first_rel_end = mem_usage;
- val.total = 0;
- } else if (mem_result.second) {
- val.first_rel_end += mem_usage;
- }
- val.total += mem_usage;
+ curr_node_mem += mem_usage;
}
+ detail->mem_used.UpdateStat(curr_node_mem);
+ mem_total += curr_node_mem;
+
+ Validate(detail, ns);
}
}
+
+ run_total_us_.UpdateStat(curr_total_us);
memory_.UpdateStat(mem_total);
}
std::string StatSummarizer::ShortSummary() const {
std::stringstream stream;
- stream << run_total_micros_.count() << " runs, avg " << std::setprecision(4)
- << run_total_micros_.avg() / 1000.0 << " ms, " << node_types_.size()
- << " nodes defined " << timing_details_.size() << " nodes observed"
- << std::endl;
- stream << std::setprecision(9) << memory_.avg() / 1000.0 << " avg KB per run."
- << std::endl;
+ stream << "Timings (microseconds): ";
+ run_total_us_.OutputToStream(&stream);
+ stream << std::endl;
+
+ stream << "Memory (bytes): ";
+ memory_.OutputToStream(&stream);
+ stream << std::endl;
+
+ stream << node_types_.size() << " nodes defined " << details_.size()
+ << " nodes observed" << std::endl;
return stream.str();
}
-std::string StatSummarizer::HeaderString() const {
- std::stringstream stream;
- stream << std::setw(9) << "[start]" << std::setw(9) << "[first]"
- << std::setw(9) << "[avg]";
- stream << "\t" << std::setw(8) << "[%]"
- << " ";
- stream << "\t" << std::setw(8) << "[cdf%]"
- << " ";
- stream << "\t" << std::setw(10) << "[Op]";
- stream << "\t"
- << "[Name]";
- return stream.str();
+std::ostream& InitField(std::ostream& stream, int width) {
+ stream << "\t" << std::right << std::setw(width) << std::fixed
+ << std::setprecision(3);
+ return stream;
}
-std::string StatSummarizer::ColumnString(const std::string& name,
- const Detail& detail,
- int64 cumulative_stat_on_node,
- Stat<int64> stat) const {
+std::string StatSummarizer::HeaderString(const string& title) const {
std::stringstream stream;
- stream << std::fixed << std::setprecision(3) << std::setw(9)
- << detail.first_start / 1000.0;
- stream << std::fixed << std::setprecision(3) << std::setw(9)
- << detail.first_rel_end / 1000.0;
-
- double avg_time_ms = detail.total / 1000.0 / num_runs();
- stream << std::fixed << std::setprecision(3) << std::setw(9) << avg_time_ms;
-
- double percentage = detail.total * 100.0 / stat.sum();
- stream << "\t" << std::fixed << std::setprecision(3) << std::setw(7)
- << percentage << "%";
-
- double cdf_percentage = cumulative_stat_on_node * 100.0 / stat.sum();
- stream << "\t" << std::fixed << std::setprecision(3) << std::setw(7)
- << cdf_percentage << "%";
-
- stream << "\t" << std::setw(10);
- auto op_it = node_types_.find(name);
- if (op_it != node_types_.end()) {
- stream << op_it->second;
- } else {
- stream << " ";
- }
- stream << "\t" << name;
+ stream << "============================== " << title
+ << " ==============================" << std::endl;
+ InitField(stream, 24) << "[node type]";
+ InitField(stream, 9) << "[start]";
+ InitField(stream, 9) << "[first]";
+ InitField(stream, 9) << "[avg ms]";
+ InitField(stream, 8) << "[%]";
+ InitField(stream, 8) << "[cdf%]";
+ InitField(stream, 10) << "[mem KB]";
+ stream << "\t"
+ << "[Name]";
return stream.str();
}
-std::string StatSummarizer::GetStatsBySorting(SortingMetric sorting_metric,
- double cdf_cutoff_ratio,
- int num_max_nodes_to_print,
- bool use_memory) const {
- std::map<std::string, Detail> details;
- Stat<int64> stat;
- if (use_memory) {
- details = memory_details_;
- stat = memory_;
- } else {
- details = timing_details_;
- stat = run_total_micros_;
- }
- num_max_nodes_to_print =
- std::min<int>(num_max_nodes_to_print, details.size());
+std::string StatSummarizer::ColumnString(const Detail& detail,
+ const int64 cumulative_stat_on_node,
+ const Stat<int64>& stat) const {
+ const double start_ms = detail.start_us.avg() / 1000.0;
+ const double first_time_ms = detail.rel_end_us.first() / 1000.0;
+ const double avg_time_ms = detail.rel_end_us.avg() / 1000.0;
+ const double percentage = detail.rel_end_us.sum() * 100.0 / stat.sum();
+ const double cdf_percentage = (cumulative_stat_on_node * 100.0f) / stat.sum();
std::stringstream stream;
- string out_opt = "duration";
- string unit = "ms";
- if (use_memory) {
- out_opt = "memory usage";
- unit = "KB";
- }
- if (sorting_metric == SortingMetric::BY_TOTAL) {
- stream << "============ Top by " << out_opt << " in " << unit
- << " =================" << std::endl;
- } else {
- CHECK(sorting_metric == SortingMetric::BY_RUN_ORDER);
- stream << "============ By run order (" << unit
- << ") =================" << std::endl;
- }
- stream << HeaderString() << std::endl;
-
- std::priority_queue<
- std::pair<int64, const std::pair<const std::string, Detail>*>>
- statistics;
- for (const auto& entry : details) {
- statistics.emplace(sorting_metric == SortingMetric::BY_TOTAL
- ? entry.second.total
- : -entry.second.first_start,
- &entry);
- }
-
- const int64 cutoff_point = stat.sum() * cdf_cutoff_ratio;
- int64 accumulated_us = 0;
-
- for (int i = 0; !statistics.empty() && i < num_max_nodes_to_print &&
- accumulated_us <= cutoff_point;
- ++i) {
- accumulated_us += statistics.top().second->second.total;
- stream << ColumnString(statistics.top().second->first,
- statistics.top().second->second, accumulated_us,
- stat)
- << std::endl;
- statistics.pop();
- }
+ InitField(stream, 24) << detail.type;
+ InitField(stream, 9) << start_ms;
+ InitField(stream, 9) << first_time_ms;
+ InitField(stream, 9) << avg_time_ms;
+ InitField(stream, 7) << percentage << "%";
+ InitField(stream, 7) << cdf_percentage << "%";
+ InitField(stream, 10) << detail.mem_used.newest() / 1000.0;
+ stream << "\t" << detail.name;
return stream.str();
}
-std::string StatSummarizer::GetStatsByTopDurations(
- double cdf_cutoff, int num_max_nodes_to_print) const {
- return GetTimingStatsByTopDurations(cdf_cutoff, num_max_nodes_to_print);
-}
-
-std::string StatSummarizer::GetStatsByRunOrder() const {
- return GetTimingStatsByRunOrder();
-}
-
-std::string StatSummarizer::GetStatsByOrderOfNodeDefinitions() const {
- return GetTimingStatsByOrderOfNodeDefinitions();
-}
-
-std::string StatSummarizer::GetTimingStatsByTopDurations(
- double cdf_cutoff, int num_max_nodes_to_print) const {
- return GetStatsBySorting(SortingMetric::BY_TOTAL, cdf_cutoff,
- num_max_nodes_to_print, false);
-}
+void StatSummarizer::OrderNodesByMetric(
+ SortingMetric metric, std::vector<const Detail*>* details) const {
+ std::priority_queue<std::pair<string, const Detail*>> sorted_list;
+ const int num_nodes = nodes_in_def_order_.size();
-std::string StatSummarizer::GetTimingStatsByRunOrder() const {
- return GetStatsBySorting(SortingMetric::BY_RUN_ORDER, 1.0,
- std::numeric_limits<int>::max(), false);
-}
-
-std::string StatSummarizer::GetTimingStatsByOrderOfNodeDefinitions() const {
- return GetStatsByOrderOfNodeDefinitions(false);
-}
-
-std::string StatSummarizer::GetMemoryStatsByUsage(
- double cdf_cutoff, int num_max_nodes_to_print) const {
- return GetStatsBySorting(SortingMetric::BY_TOTAL, cdf_cutoff,
- num_max_nodes_to_print, true);
-}
-
-std::string StatSummarizer::GetMemoryStatsByRunOrder() const {
- return GetStatsBySorting(SortingMetric::BY_RUN_ORDER, 1.0,
- std::numeric_limits<int>::max(), true);
-}
-
-std::string StatSummarizer::GetMemoryStatsByOrderOfNodeDefinitions() const {
- return GetStatsByOrderOfNodeDefinitions(true);
-}
-
-std::string StatSummarizer::GetStatsByOrderOfNodeDefinitions(
- bool use_memory) const {
- string type;
- if (use_memory) {
- type = "Memory Usage";
- } else {
- type = "Timings";
- }
- std::stringstream stream;
- stream << "============ " << type
- << " by order of graph definition =================" << std::endl;
- stream << HeaderString() << std::endl;
-
- int64 accumulated_us = 0;
- auto details = use_memory ? memory_details_ : timing_details_;
- Stat<int64> stat = use_memory ? memory_ : run_total_micros_;
+ for (const auto& det : details_) {
+ const Detail* detail = &(det.second);
+ std::stringstream stream;
+ stream << std::setw(20) << std::right << std::setprecision(10);
- for (const auto& node_name_op : nodes_in_def_order_) {
- auto detail_it = details.find(node_name_op);
- if (detail_it == details.end()) {
- continue;
+ int definition_index = 0;
+ auto it = std::find(nodes_in_def_order_.begin(), nodes_in_def_order_.end(),
+ detail->name);
+ if (it != nodes_in_def_order_.end()) {
+ definition_index = std::distance(nodes_in_def_order_.begin(), it);
}
- accumulated_us += detail_it->second.total;
- stream << ColumnString(detail_it->first, detail_it->second, accumulated_us,
- stat)
- << std::endl;
- details.erase(detail_it);
- }
- if (!details.empty()) {
- stream << "============ "
- << "The rest have different names between NodeExecStats and GraphDef"
- << "============ " << std::endl;
-
- for (const auto& entry : details) {
- // Prints the remaining nodes whose names are different from the name in
- // graph definition.
- accumulated_us += entry.second.total;
- stream << ColumnString(entry.first, entry.second, accumulated_us, stat)
- << std::endl;
+ switch (metric) {
+ case BY_NAME:
+ stream << detail->name;
+ break;
+ case BY_DEFINITION_ORDER:
+ stream << num_nodes - definition_index;
+ break;
+ case BY_RUN_ORDER:
+ stream << num_nodes - detail->run_order;
+ break;
+ case BY_TIME:
+ stream << detail->rel_end_us.avg();
+ break;
+ case BY_MEMORY:
+ stream << detail->mem_used.avg();
+ break;
+ case BY_TYPE:
+ stream << detail->type;
+ break;
+ default:
+ stream << "";
+ break;
}
+
+ sorted_list.emplace(stream.str(), detail);
}
- return stream.str();
+ while (!sorted_list.empty()) {
+ auto entry = sorted_list.top();
+ sorted_list.pop();
+ details->push_back(entry.second);
+ }
}
std::string StatSummarizer::GetStatsByNodeType() const {
std::stringstream stream;
- stream << "================ Summary by node type ================"
+ stream << "============================== Summary by node type "
+ "=============================="
<< std::endl;
int64 accumulated_us = 0;
- int64 accumulated_kb = 0;
+ int64 accumulated_bytes = 0;
std::map<string, int64> node_type_map_count;
std::map<string, int64> node_type_map_time;
std::map<string, int64> node_type_map_memory;
@@ -358,22 +259,17 @@ std::string StatSummarizer::GetStatsByNodeType() const {
int64 num_processed = 0;
LOG(INFO) << "nodes_in_def_order_ size: " << nodes_in_def_order_.size();
- LOG(INFO) << "timing_details_ size: " << timing_details_.size();
- for (const auto& timing_detail : timing_details_) {
- const string node_name = timing_detail.first;
+ LOG(INFO) << "timing_details_ size: " << details_.size();
+ for (const auto& det : details_) {
+ const string node_name = det.first;
+ const Detail& detail = det.second;
- int64 curr_time_val = timing_detail.second.total;
+ int64 curr_time_val = detail.rel_end_us.avg();
accumulated_us += curr_time_val;
- auto memory_details_it = memory_details_.find(node_name);
- if (memory_details_it == memory_details_.end()) {
- LOG(WARNING) << "Timing found but not memory! " << node_name;
- continue;
- }
-
++num_processed;
- int64 curr_memory_val = memory_details_it->second.total;
- accumulated_kb += curr_memory_val;
+ int64 curr_memory_val = detail.mem_used.newest();
+ accumulated_bytes += curr_memory_val;
string node_type = "<>";
@@ -397,12 +293,13 @@ std::string StatSummarizer::GetStatsByNodeType() const {
std::pair<string, int64>(node_type.first, mem_used));
}
- stream << std::setw(24) << std::right << "[Node type]"
- << "\t" << std::setw(9) << std::right << "[count]"
- << "\t" << std::setw(10) << std::right << "[avg ms]"
- << "\t" << std::setw(11) << std::right << "[avg %]"
- << "\t" << std::setw(11) << std::right << "[cdf %]"
- << "\t" << std::setw(10) << std::right << "[mem KB]" << std::endl;
+ InitField(stream, 24) << "[Node type]";
+ InitField(stream, 9) << "[count]";
+ InitField(stream, 10) << "[avg ms]";
+ InitField(stream, 11) << "[avg %]";
+ InitField(stream, 11) << "[cdf %]";
+ InitField(stream, 10) << "[mem KB]";
+ stream << std::endl;
float avg_total_time_ms = 0.0f;
float cdf = 0.0f;
@@ -411,40 +308,63 @@ std::string StatSummarizer::GetStatsByNodeType() const {
timings.pop();
const string node_type = entry.second.first;
- const float memory =
- entry.second.second / 1000.0f / static_cast<float>(num_runs());
- const int64 node_type_total_us = entry.first;
+ const float memory = entry.second.second / 1000.0f;
- const float time_per_run_ms =
- (node_type_total_us / static_cast<float>(num_runs()) / 1000.0f);
+ const int64 node_type_total_us = entry.first;
+ const float time_per_run_ms = node_type_total_us / 1000.0f;
avg_total_time_ms += time_per_run_ms;
const float percentage =
((entry.first / static_cast<float>(accumulated_us)) * 100.0f);
cdf += percentage;
- stream << std::setw(24) << std::right << node_type << "\t" << std::setw(9)
- << std::right << node_type_map_count[node_type] << "\t"
- << std::setw(10) << std::right << std::fixed << std::setprecision(2)
- << time_per_run_ms << "\t" << std::setw(10) << std::right
- << std::fixed << std::setprecision(2) << percentage << "%"
- << "\t" << std::setw(10) << std::right << std::fixed
- << std::setprecision(2) << cdf << "%"
- << "\t" << std::setw(10) << std::right << std::fixed
- << std::setprecision(2) << memory << std::endl;
+ InitField(stream, 24) << node_type;
+ InitField(stream, 9) << node_type_map_count[node_type];
+ InitField(stream, 10) << time_per_run_ms;
+ InitField(stream, 10) << percentage << "%";
+ InitField(stream, 10) << cdf << "%";
+ InitField(stream, 10) << memory;
+ stream << std::endl;
}
+ stream << std::endl;
+ return stream.str();
+}
+
+std::string StatSummarizer::GetStatsByMetric(const string& title,
+ SortingMetric sorting_metric,
+ int num_stats) const {
+ std::vector<const Detail*> details;
+ OrderNodesByMetric(sorting_metric, &details);
+
+ double cumulative_stat_on_node = 0;
+
+ std::stringstream stream;
+ stream << HeaderString(title) << std::endl;
+ int stat_num = 0;
+ for (auto detail : details) {
+ ++stat_num;
+ if (num_stats > 0 && stat_num > num_stats) {
+ break;
+ }
+
+ // TODO(andrewharp): Make this keep track of the particular metric for cdf.
+ cumulative_stat_on_node += detail->rel_end_us.sum();
+ stream << ColumnString(*detail, cumulative_stat_on_node, run_total_us_)
+ << std::endl;
+ }
+ stream << std::endl;
return stream.str();
}
std::string StatSummarizer::GetOutputString() const {
std::stringstream stream;
- stream << "Total time (us): " << run_total_micros_ << std::endl;
- stream << GetTimingStatsByRunOrder();
- stream << GetTimingStatsByTopDurations();
- stream << "Total Memory (bytes): " << memory_ << std::endl;
- stream << GetMemoryStatsByRunOrder();
- stream << GetMemoryStatsByUsage();
+ // TODO(andrewharp): Allow sorting metrics to be specified externally, e.g.
+ // on command line.
+ stream << GetStatsByMetric("Run Order", BY_RUN_ORDER, 0);
+ stream << GetStatsByMetric("Top by Computation Time", BY_TIME, 10);
+ stream << GetStatsByMetric("Top by Memory Use", BY_MEMORY, 10);
+
stream << GetStatsByNodeType();
stream << ShortSummary() << std::endl;
@@ -464,8 +384,8 @@ void StatSummarizer::PrintOutputs() const {
std::priority_queue<
std::pair<int64, const std::pair<const std::string, Detail>*>>
timings;
- for (const auto& entry : timing_details_) {
- timings.emplace(-entry.second.first_start, &entry);
+ for (const auto& entry : details_) {
+ timings.emplace(-entry.second.start_us.avg(), &entry);
}
LOG(INFO) << "============ Node output tensor sizes in run order ========";