diff options
author | 2016-08-26 14:33:09 -0800 | |
---|---|---|
committer | 2016-08-26 15:46:35 -0700 | |
commit | 26475fec68285929bad8fbb903fb1a3f62f1ad75 (patch) | |
tree | da9686066c227d60180f1680a9ee7622b20e5df8 /tensorflow/core/util/stat_summarizer.cc | |
parent | 2a73e37e3c1be3886015908169f934096b9d9f9f (diff) |
Added the ability to view memory usage for a tensorflow graph execution.
Change: 131449413
Diffstat (limited to 'tensorflow/core/util/stat_summarizer.cc')
-rw-r--r-- | tensorflow/core/util/stat_summarizer.cc | 201 |
1 files changed, 143 insertions, 58 deletions
diff --git a/tensorflow/core/util/stat_summarizer.cc b/tensorflow/core/util/stat_summarizer.cc index a4c02cb9b2..33cf5fe961 100644 --- a/tensorflow/core/util/stat_summarizer.cc +++ b/tensorflow/core/util/stat_summarizer.cc @@ -40,7 +40,8 @@ StatSummarizer::StatSummarizer(const tensorflow::GraphDef& tensorflow_graph) { void StatSummarizer::ProcessStepStats(const StepStats& step_stats) { int64 curr_total = 0; - if (details_.empty() && !step_stats.dev_stats().empty() && + int64 mem_total = 0; + if (timing_details_.empty() && !step_stats.dev_stats().empty() && !step_stats.dev_stats(0).node_stats().empty()) { first_node_start_micros_ = step_stats.dev_stats(0).node_stats(0).all_start_micros(); @@ -48,15 +49,15 @@ void StatSummarizer::ProcessStepStats(const StepStats& step_stats) { for (const auto& ds : step_stats.dev_stats()) { for (const auto& ns : ds.node_stats()) { + // timing stats const int64 curr_time = ns.all_end_rel_micros(); curr_total += curr_time; - auto result = details_.emplace(ns.node_name(), Detail()); + auto result = timing_details_.emplace(ns.node_name(), Detail()); Detail* detail = &(result.first->second); if (result.second) { - detail->first_rel_end_micros = curr_time; - detail->first_start_micros = - ns.all_start_micros() - first_node_start_micros_; - detail->total_micros = curr_time; + detail->first_rel_end = curr_time; + detail->first_start = ns.all_start_micros() - first_node_start_micros_; + detail->total = curr_time; detail->outputs.resize(ns.output_size()); for (const auto& output : ns.output()) { const int32 slot = output.slot(); @@ -68,7 +69,7 @@ void StatSummarizer::ProcessStepStats(const StepStats& step_stats) { detail->outputs[slot] = output.tensor_description(); } } else { - detail->total_micros += curr_time; + detail->total += curr_time; if (detail->outputs.size() != ns.output_size()) { LOG(WARNING) << "Number of outputs changed between runs for '" << ns.node_name() << "' - was " << detail->outputs.size() @@ -102,16 +103,39 @@ void StatSummarizer::ProcessStepStats(const StepStats& step_stats) { } } } + + // memory stats + auto mem_result = memory_details_.emplace(ns.node_name(), Detail()); + bool first = mem_result.second; + auto& val = mem_result.first->second; + for (const auto& mem : ns.memory()) { + const int64 mem_usage = mem.total_bytes(); + mem_total += mem_usage; + + if (first) { + first = false; + val.first_start = mem_total - mem_usage; + val.first_rel_end = mem_usage; + val.total = 0; + } else if (mem_result.second) { + val.first_rel_end += mem_usage; + } + val.total += mem_usage; + } } } run_total_micros_.UpdateStat(curr_total); + memory_.UpdateStat(mem_total); } std::string StatSummarizer::ShortSummary() const { std::stringstream stream; stream << run_total_micros_.count() << " runs, avg " << std::setprecision(4) << run_total_micros_.avg() / 1000.0 << " ms, " << node_types_.size() - << " nodes defined " << details_.size() << " nodes observed"; + << " nodes defined " << timing_details_.size() << " nodes observed" + << std::endl; + stream << std::setprecision(9) << memory_.avg() / 1000.0 << " avg KB per run." + << std::endl; return stream.str(); } @@ -129,24 +153,24 @@ std::string StatSummarizer::HeaderString() const { return stream.str(); } -std::string StatSummarizer::ColumnString( - const std::string& name, const Detail& detail, - int64 cumulative_time_us_on_node) const { +std::string StatSummarizer::ColumnString(const std::string& name, + const Detail& detail, + int64 cumulative_stat_on_node, + Stat<int64> stat) const { std::stringstream stream; stream << std::fixed << std::setprecision(3) << std::setw(9) - << detail.first_start_micros / 1000.0; + << detail.first_start / 1000.0; stream << std::fixed << std::setprecision(3) << std::setw(9) - << detail.first_rel_end_micros / 1000.0; + << detail.first_rel_end / 1000.0; - double avg_time_ms = detail.total_micros / 1000.0 / num_runs(); + double avg_time_ms = detail.total / 1000.0 / num_runs(); stream << std::fixed << std::setprecision(3) << std::setw(9) << avg_time_ms; - double percentage = detail.total_micros * 100.0 / run_total_micros_.sum(); + double percentage = detail.total * 100.0 / stat.sum(); stream << "\t" << std::fixed << std::setprecision(3) << std::setw(7) << percentage << "%"; - double cdf_percentage = - cumulative_time_us_on_node * 100.0 / run_total_micros_.sum(); + double cdf_percentage = cumulative_stat_on_node * 100.0 / stat.sum(); stream << "\t" << std::fixed << std::setprecision(3) << std::setw(7) << cdf_percentage << "%"; @@ -163,43 +187,62 @@ std::string StatSummarizer::ColumnString( return stream.str(); } -std::string StatSummarizer::GetStatsBySorting( - SortingMetric sorting_metric, double cdf_cutoff_ratio, - int num_max_nodes_to_print) const { +std::string StatSummarizer::GetStatsBySorting(SortingMetric sorting_metric, + double cdf_cutoff_ratio, + int num_max_nodes_to_print, + bool use_memory) const { + std::map<std::string, Detail> details; + Stat<int64> stat; + if (use_memory) { + details = memory_details_; + stat = memory_; + } else { + details = timing_details_; + stat = run_total_micros_; + } num_max_nodes_to_print = - std::min<int>(num_max_nodes_to_print, details_.size()); + std::min<int>(num_max_nodes_to_print, details.size()); std::stringstream stream; stream << ShortSummary() << std::endl; - if (sorting_metric == SortingMetric::BY_TOTAL_DURATION) { - stream << "============ Top by duration =================" << std::endl; + string out_opt = "duration"; + string unit = "ms"; + if (use_memory) { + out_opt = "memory usage"; + unit = "KB"; + } + if (sorting_metric == SortingMetric::BY_TOTAL) { + stream << "============ Top by " << out_opt << " in " << unit + << " =================" << std::endl; } else { CHECK(sorting_metric == SortingMetric::BY_RUN_ORDER); - stream << "============ By run order =================" << std::endl; + stream << "============ By run order (" << unit + << ") =================" << std::endl; } stream << HeaderString() << std::endl; std::priority_queue< std::pair<int64, const std::pair<const std::string, Detail>*>> - timings; - for (const auto& entry : details_) { - timings.emplace(sorting_metric == SortingMetric::BY_TOTAL_DURATION - ? entry.second.total_micros - : -entry.second.first_start_micros, - &entry); + statistics; + for (const auto& entry : details) { + statistics.emplace(sorting_metric == SortingMetric::BY_TOTAL + ? entry.second.total + : -entry.second.first_start, + &entry); } - const int64 cutoff_point = run_total_micros_.sum() * cdf_cutoff_ratio; + const int64 cutoff_point = stat.sum() * cdf_cutoff_ratio; int64 accumulated_us = 0; - for (int i = 0; !timings.empty() && i < num_max_nodes_to_print && + for (int i = 0; !statistics.empty() && i < num_max_nodes_to_print && accumulated_us <= cutoff_point; ++i) { - accumulated_us += timings.top().second->second.total_micros; - stream << ColumnString(timings.top().second->first, - timings.top().second->second, accumulated_us) + accumulated_us += statistics.top().second->second.total; + stream << ColumnString(statistics.top().second->first, + statistics.top().second->second, accumulated_us, + stat) << std::endl; - timings.pop(); + statistics.pop(); } return stream.str(); @@ -207,48 +250,87 @@ std::string StatSummarizer::GetStatsBySorting( std::string StatSummarizer::GetStatsByTopDurations( double cdf_cutoff, int num_max_nodes_to_print) const { - return GetStatsBySorting(SortingMetric::BY_TOTAL_DURATION, cdf_cutoff, - num_max_nodes_to_print); + return GetTimingStatsByTopDurations(cdf_cutoff, num_max_nodes_to_print); } std::string StatSummarizer::GetStatsByRunOrder() const { - return GetStatsBySorting(SortingMetric::BY_RUN_ORDER, - std::numeric_limits<int>::max(), - std::numeric_limits<int>::max()); + return GetTimingStatsByRunOrder(); } std::string StatSummarizer::GetStatsByOrderOfNodeDefinitions() const { + return GetTimingStatsByOrderOfNodeDefinitions(); +} + +std::string StatSummarizer::GetTimingStatsByTopDurations( + double cdf_cutoff, int num_max_nodes_to_print) const { + return GetStatsBySorting(SortingMetric::BY_TOTAL, cdf_cutoff, + num_max_nodes_to_print, false); +} + +std::string StatSummarizer::GetTimingStatsByRunOrder() const { + return GetStatsBySorting(SortingMetric::BY_RUN_ORDER, 1.0, + std::numeric_limits<int>::max(), false); +} + +std::string StatSummarizer::GetTimingStatsByOrderOfNodeDefinitions() const { + return GetStatsByOrderOfNodeDefinitions(false); +} + +std::string StatSummarizer::GetMemoryStatsByUsage( + double cdf_cutoff, int num_max_nodes_to_print) const { + return GetStatsBySorting(SortingMetric::BY_TOTAL, cdf_cutoff, + num_max_nodes_to_print, true); +} + +std::string StatSummarizer::GetMemoryStatsByRunOrder() const { + return GetStatsBySorting(SortingMetric::BY_RUN_ORDER, 1.0, + std::numeric_limits<int>::max(), true); +} + +std::string StatSummarizer::GetMemoryStatsByOrderOfNodeDefinitions() const { + return GetStatsByOrderOfNodeDefinitions(true); +} + +std::string StatSummarizer::GetStatsByOrderOfNodeDefinitions( + bool use_memory) const { + string type; + if (use_memory) { + type = "Memory Usage"; + } else { + type = "Timings"; + } std::stringstream stream; stream << ShortSummary() << std::endl; - stream << "============ By order of graph definition =================" - << std::endl; + stream << "============ " << type + << " by order of graph definition =================" << std::endl; stream << HeaderString() << std::endl; int64 accumulated_us = 0; - - auto details_us_copy = details_; + auto details = use_memory ? memory_details_ : timing_details_; + Stat<int64> stat = use_memory ? memory_ : run_total_micros_; for (const auto& node_name_op : nodes_in_def_order_) { - auto detail_it = details_us_copy.find(node_name_op); - if (detail_it == details_us_copy.end()) { + auto detail_it = details.find(node_name_op); + if (detail_it == details.end()) { continue; } - accumulated_us += detail_it->second.total_micros; - stream << ColumnString(detail_it->first, detail_it->second, accumulated_us) + accumulated_us += detail_it->second.total; + stream << ColumnString(detail_it->first, detail_it->second, accumulated_us, + stat) << std::endl; - details_us_copy.erase(detail_it); + details.erase(detail_it); } - if (!details_us_copy.empty()) { + if (!details.empty()) { stream << "============ " << "The rest have different names between NodeExecStats and GraphDef" << "============ " << std::endl; - for (const auto& entry : details_us_copy) { + for (const auto& entry : details) { // Prints the remaining nodes whose names are different from the name in // graph definition. - accumulated_us += entry.second.total_micros; - stream << ColumnString(entry.first, entry.second, accumulated_us) + accumulated_us += entry.second.total; + stream << ColumnString(entry.first, entry.second, accumulated_us, stat) << std::endl; } } @@ -258,8 +340,11 @@ std::string StatSummarizer::GetStatsByOrderOfNodeDefinitions() const { void StatSummarizer::PrintStepStats() const { LOG(INFO) << "Total time (us): " << run_total_micros_; - LOG(INFO) << GetStatsByRunOrder(); - LOG(INFO) << GetStatsByTopDurations(); + LOG(INFO) << GetTimingStatsByRunOrder(); + LOG(INFO) << GetTimingStatsByTopDurations(); + LOG(INFO) << "Total Memory (bytes): " << memory_; + LOG(INFO) << GetMemoryStatsByRunOrder(); + LOG(INFO) << GetMemoryStatsByUsage(); LOG(INFO); } @@ -267,8 +352,8 @@ void StatSummarizer::PrintOutputs() const { std::priority_queue< std::pair<int64, const std::pair<const std::string, Detail>*>> timings; - for (const auto& entry : details_) { - timings.emplace(-entry.second.first_start_micros, &entry); + for (const auto& entry : timing_details_) { + timings.emplace(-entry.second.first_start, &entry); } LOG(INFO) << "============ Node output tensor sizes in run order ========"; |