aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/util/stat_summarizer.cc
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2016-08-26 14:33:09 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-08-26 15:46:35 -0700
commit26475fec68285929bad8fbb903fb1a3f62f1ad75 (patch)
treeda9686066c227d60180f1680a9ee7622b20e5df8 /tensorflow/core/util/stat_summarizer.cc
parent2a73e37e3c1be3886015908169f934096b9d9f9f (diff)
Added the ability to view memory usage for a tensorflow graph execution.
Change: 131449413
Diffstat (limited to 'tensorflow/core/util/stat_summarizer.cc')
-rw-r--r--tensorflow/core/util/stat_summarizer.cc201
1 files changed, 143 insertions, 58 deletions
diff --git a/tensorflow/core/util/stat_summarizer.cc b/tensorflow/core/util/stat_summarizer.cc
index a4c02cb9b2..33cf5fe961 100644
--- a/tensorflow/core/util/stat_summarizer.cc
+++ b/tensorflow/core/util/stat_summarizer.cc
@@ -40,7 +40,8 @@ StatSummarizer::StatSummarizer(const tensorflow::GraphDef& tensorflow_graph) {
void StatSummarizer::ProcessStepStats(const StepStats& step_stats) {
int64 curr_total = 0;
- if (details_.empty() && !step_stats.dev_stats().empty() &&
+ int64 mem_total = 0;
+ if (timing_details_.empty() && !step_stats.dev_stats().empty() &&
!step_stats.dev_stats(0).node_stats().empty()) {
first_node_start_micros_ =
step_stats.dev_stats(0).node_stats(0).all_start_micros();
@@ -48,15 +49,15 @@ void StatSummarizer::ProcessStepStats(const StepStats& step_stats) {
for (const auto& ds : step_stats.dev_stats()) {
for (const auto& ns : ds.node_stats()) {
+ // timing stats
const int64 curr_time = ns.all_end_rel_micros();
curr_total += curr_time;
- auto result = details_.emplace(ns.node_name(), Detail());
+ auto result = timing_details_.emplace(ns.node_name(), Detail());
Detail* detail = &(result.first->second);
if (result.second) {
- detail->first_rel_end_micros = curr_time;
- detail->first_start_micros =
- ns.all_start_micros() - first_node_start_micros_;
- detail->total_micros = curr_time;
+ detail->first_rel_end = curr_time;
+ detail->first_start = ns.all_start_micros() - first_node_start_micros_;
+ detail->total = curr_time;
detail->outputs.resize(ns.output_size());
for (const auto& output : ns.output()) {
const int32 slot = output.slot();
@@ -68,7 +69,7 @@ void StatSummarizer::ProcessStepStats(const StepStats& step_stats) {
detail->outputs[slot] = output.tensor_description();
}
} else {
- detail->total_micros += curr_time;
+ detail->total += curr_time;
if (detail->outputs.size() != ns.output_size()) {
LOG(WARNING) << "Number of outputs changed between runs for '"
<< ns.node_name() << "' - was " << detail->outputs.size()
@@ -102,16 +103,39 @@ void StatSummarizer::ProcessStepStats(const StepStats& step_stats) {
}
}
}
+
+ // memory stats
+ auto mem_result = memory_details_.emplace(ns.node_name(), Detail());
+ bool first = mem_result.second;
+ auto& val = mem_result.first->second;
+ for (const auto& mem : ns.memory()) {
+ const int64 mem_usage = mem.total_bytes();
+ mem_total += mem_usage;
+
+ if (first) {
+ first = false;
+ val.first_start = mem_total - mem_usage;
+ val.first_rel_end = mem_usage;
+ val.total = 0;
+ } else if (mem_result.second) {
+ val.first_rel_end += mem_usage;
+ }
+ val.total += mem_usage;
+ }
}
}
run_total_micros_.UpdateStat(curr_total);
+ memory_.UpdateStat(mem_total);
}
std::string StatSummarizer::ShortSummary() const {
std::stringstream stream;
stream << run_total_micros_.count() << " runs, avg " << std::setprecision(4)
<< run_total_micros_.avg() / 1000.0 << " ms, " << node_types_.size()
- << " nodes defined " << details_.size() << " nodes observed";
+ << " nodes defined " << timing_details_.size() << " nodes observed"
+ << std::endl;
+ stream << std::setprecision(9) << memory_.avg() / 1000.0 << " avg KB per run."
+ << std::endl;
return stream.str();
}
@@ -129,24 +153,24 @@ std::string StatSummarizer::HeaderString() const {
return stream.str();
}
-std::string StatSummarizer::ColumnString(
- const std::string& name, const Detail& detail,
- int64 cumulative_time_us_on_node) const {
+std::string StatSummarizer::ColumnString(const std::string& name,
+ const Detail& detail,
+ int64 cumulative_stat_on_node,
+ Stat<int64> stat) const {
std::stringstream stream;
stream << std::fixed << std::setprecision(3) << std::setw(9)
- << detail.first_start_micros / 1000.0;
+ << detail.first_start / 1000.0;
stream << std::fixed << std::setprecision(3) << std::setw(9)
- << detail.first_rel_end_micros / 1000.0;
+ << detail.first_rel_end / 1000.0;
- double avg_time_ms = detail.total_micros / 1000.0 / num_runs();
+ double avg_time_ms = detail.total / 1000.0 / num_runs();
stream << std::fixed << std::setprecision(3) << std::setw(9) << avg_time_ms;
- double percentage = detail.total_micros * 100.0 / run_total_micros_.sum();
+ double percentage = detail.total * 100.0 / stat.sum();
stream << "\t" << std::fixed << std::setprecision(3) << std::setw(7)
<< percentage << "%";
- double cdf_percentage =
- cumulative_time_us_on_node * 100.0 / run_total_micros_.sum();
+ double cdf_percentage = cumulative_stat_on_node * 100.0 / stat.sum();
stream << "\t" << std::fixed << std::setprecision(3) << std::setw(7)
<< cdf_percentage << "%";
@@ -163,43 +187,62 @@ std::string StatSummarizer::ColumnString(
return stream.str();
}
-std::string StatSummarizer::GetStatsBySorting(
- SortingMetric sorting_metric, double cdf_cutoff_ratio,
- int num_max_nodes_to_print) const {
+std::string StatSummarizer::GetStatsBySorting(SortingMetric sorting_metric,
+ double cdf_cutoff_ratio,
+ int num_max_nodes_to_print,
+ bool use_memory) const {
+ std::map<std::string, Detail> details;
+ Stat<int64> stat;
+ if (use_memory) {
+ details = memory_details_;
+ stat = memory_;
+ } else {
+ details = timing_details_;
+ stat = run_total_micros_;
+ }
num_max_nodes_to_print =
- std::min<int>(num_max_nodes_to_print, details_.size());
+ std::min<int>(num_max_nodes_to_print, details.size());
std::stringstream stream;
stream << ShortSummary() << std::endl;
- if (sorting_metric == SortingMetric::BY_TOTAL_DURATION) {
- stream << "============ Top by duration =================" << std::endl;
+ string out_opt = "duration";
+ string unit = "ms";
+ if (use_memory) {
+ out_opt = "memory usage";
+ unit = "KB";
+ }
+ if (sorting_metric == SortingMetric::BY_TOTAL) {
+ stream << "============ Top by " << out_opt << " in " << unit
+ << " =================" << std::endl;
} else {
CHECK(sorting_metric == SortingMetric::BY_RUN_ORDER);
- stream << "============ By run order =================" << std::endl;
+ stream << "============ By run order (" << unit
+ << ") =================" << std::endl;
}
stream << HeaderString() << std::endl;
std::priority_queue<
std::pair<int64, const std::pair<const std::string, Detail>*>>
- timings;
- for (const auto& entry : details_) {
- timings.emplace(sorting_metric == SortingMetric::BY_TOTAL_DURATION
- ? entry.second.total_micros
- : -entry.second.first_start_micros,
- &entry);
+ statistics;
+ for (const auto& entry : details) {
+ statistics.emplace(sorting_metric == SortingMetric::BY_TOTAL
+ ? entry.second.total
+ : -entry.second.first_start,
+ &entry);
}
- const int64 cutoff_point = run_total_micros_.sum() * cdf_cutoff_ratio;
+ const int64 cutoff_point = stat.sum() * cdf_cutoff_ratio;
int64 accumulated_us = 0;
- for (int i = 0; !timings.empty() && i < num_max_nodes_to_print &&
+ for (int i = 0; !statistics.empty() && i < num_max_nodes_to_print &&
accumulated_us <= cutoff_point;
++i) {
- accumulated_us += timings.top().second->second.total_micros;
- stream << ColumnString(timings.top().second->first,
- timings.top().second->second, accumulated_us)
+ accumulated_us += statistics.top().second->second.total;
+ stream << ColumnString(statistics.top().second->first,
+ statistics.top().second->second, accumulated_us,
+ stat)
<< std::endl;
- timings.pop();
+ statistics.pop();
}
return stream.str();
@@ -207,48 +250,87 @@ std::string StatSummarizer::GetStatsBySorting(
std::string StatSummarizer::GetStatsByTopDurations(
double cdf_cutoff, int num_max_nodes_to_print) const {
- return GetStatsBySorting(SortingMetric::BY_TOTAL_DURATION, cdf_cutoff,
- num_max_nodes_to_print);
+ return GetTimingStatsByTopDurations(cdf_cutoff, num_max_nodes_to_print);
}
std::string StatSummarizer::GetStatsByRunOrder() const {
- return GetStatsBySorting(SortingMetric::BY_RUN_ORDER,
- std::numeric_limits<int>::max(),
- std::numeric_limits<int>::max());
+ return GetTimingStatsByRunOrder();
}
std::string StatSummarizer::GetStatsByOrderOfNodeDefinitions() const {
+ return GetTimingStatsByOrderOfNodeDefinitions();
+}
+
+std::string StatSummarizer::GetTimingStatsByTopDurations(
+ double cdf_cutoff, int num_max_nodes_to_print) const {
+ return GetStatsBySorting(SortingMetric::BY_TOTAL, cdf_cutoff,
+ num_max_nodes_to_print, false);
+}
+
+std::string StatSummarizer::GetTimingStatsByRunOrder() const {
+ return GetStatsBySorting(SortingMetric::BY_RUN_ORDER, 1.0,
+ std::numeric_limits<int>::max(), false);
+}
+
+std::string StatSummarizer::GetTimingStatsByOrderOfNodeDefinitions() const {
+ return GetStatsByOrderOfNodeDefinitions(false);
+}
+
+std::string StatSummarizer::GetMemoryStatsByUsage(
+ double cdf_cutoff, int num_max_nodes_to_print) const {
+ return GetStatsBySorting(SortingMetric::BY_TOTAL, cdf_cutoff,
+ num_max_nodes_to_print, true);
+}
+
+std::string StatSummarizer::GetMemoryStatsByRunOrder() const {
+ return GetStatsBySorting(SortingMetric::BY_RUN_ORDER, 1.0,
+ std::numeric_limits<int>::max(), true);
+}
+
+std::string StatSummarizer::GetMemoryStatsByOrderOfNodeDefinitions() const {
+ return GetStatsByOrderOfNodeDefinitions(true);
+}
+
+std::string StatSummarizer::GetStatsByOrderOfNodeDefinitions(
+ bool use_memory) const {
+ string type;
+ if (use_memory) {
+ type = "Memory Usage";
+ } else {
+ type = "Timings";
+ }
std::stringstream stream;
stream << ShortSummary() << std::endl;
- stream << "============ By order of graph definition ================="
- << std::endl;
+ stream << "============ " << type
+ << " by order of graph definition =================" << std::endl;
stream << HeaderString() << std::endl;
int64 accumulated_us = 0;
-
- auto details_us_copy = details_;
+ auto details = use_memory ? memory_details_ : timing_details_;
+ Stat<int64> stat = use_memory ? memory_ : run_total_micros_;
for (const auto& node_name_op : nodes_in_def_order_) {
- auto detail_it = details_us_copy.find(node_name_op);
- if (detail_it == details_us_copy.end()) {
+ auto detail_it = details.find(node_name_op);
+ if (detail_it == details.end()) {
continue;
}
- accumulated_us += detail_it->second.total_micros;
- stream << ColumnString(detail_it->first, detail_it->second, accumulated_us)
+ accumulated_us += detail_it->second.total;
+ stream << ColumnString(detail_it->first, detail_it->second, accumulated_us,
+ stat)
<< std::endl;
- details_us_copy.erase(detail_it);
+ details.erase(detail_it);
}
- if (!details_us_copy.empty()) {
+ if (!details.empty()) {
stream << "============ "
<< "The rest have different names between NodeExecStats and GraphDef"
<< "============ " << std::endl;
- for (const auto& entry : details_us_copy) {
+ for (const auto& entry : details) {
// Prints the remaining nodes whose names are different from the name in
// graph definition.
- accumulated_us += entry.second.total_micros;
- stream << ColumnString(entry.first, entry.second, accumulated_us)
+ accumulated_us += entry.second.total;
+ stream << ColumnString(entry.first, entry.second, accumulated_us, stat)
<< std::endl;
}
}
@@ -258,8 +340,11 @@ std::string StatSummarizer::GetStatsByOrderOfNodeDefinitions() const {
void StatSummarizer::PrintStepStats() const {
LOG(INFO) << "Total time (us): " << run_total_micros_;
- LOG(INFO) << GetStatsByRunOrder();
- LOG(INFO) << GetStatsByTopDurations();
+ LOG(INFO) << GetTimingStatsByRunOrder();
+ LOG(INFO) << GetTimingStatsByTopDurations();
+ LOG(INFO) << "Total Memory (bytes): " << memory_;
+ LOG(INFO) << GetMemoryStatsByRunOrder();
+ LOG(INFO) << GetMemoryStatsByUsage();
LOG(INFO);
}
@@ -267,8 +352,8 @@ void StatSummarizer::PrintOutputs() const {
std::priority_queue<
std::pair<int64, const std::pair<const std::string, Detail>*>>
timings;
- for (const auto& entry : details_) {
- timings.emplace(-entry.second.first_start_micros, &entry);
+ for (const auto& entry : timing_details_) {
+ timings.emplace(-entry.second.first_start, &entry);
}
LOG(INFO) << "============ Node output tensor sizes in run order ========";