diff options
author | Andrew Harp <andrewharp@google.com> | 2016-12-13 22:32:46 -0800 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2016-12-13 22:43:54 -0800 |
commit | 9466e2c7b6cc7924ae02017686c60209679fb3c4 (patch) | |
tree | 45d55686601f25ca276536ef96d263823d212b8b /tensorflow/core/util/stat_summarizer.cc | |
parent | 9004714bc4de1c78735bfe8178b441a371df1762 (diff) |
Refactor StatSummarizer: use Stats for more things, remove redundant methods, combine mem/time data structures, and overhaul sorting / output formatting.
Change: 141981032
Diffstat (limited to 'tensorflow/core/util/stat_summarizer.cc')
-rw-r--r-- | tensorflow/core/util/stat_summarizer.cc | 430 |
1 files changed, 175 insertions, 255 deletions
diff --git a/tensorflow/core/util/stat_summarizer.cc b/tensorflow/core/util/stat_summarizer.cc index 65b7039b41..ebe7bebde7 100644 --- a/tensorflow/core/util/stat_summarizer.cc +++ b/tensorflow/core/util/stat_summarizer.cc @@ -74,26 +74,35 @@ void StatSummarizer::Validate(const Detail* detail, } void StatSummarizer::ProcessStepStats(const StepStats& step_stats) { - int64 curr_total = 0; + int64 curr_total_us = 0; int64 mem_total = 0; - if (timing_details_.empty() && !step_stats.dev_stats().empty() && - !step_stats.dev_stats(0).node_stats().empty()) { - first_node_start_micros_ = - step_stats.dev_stats(0).node_stats(0).all_start_micros(); - } + int64 first_node_start_us = + step_stats.dev_stats(0).node_stats(0).all_start_micros(); + + int node_num = 0; for (const auto& ds : step_stats.dev_stats()) { - // timing stats for (const auto& ns : ds.node_stats()) { + ++node_num; const int64 curr_time = ns.all_end_rel_micros(); - curr_total += curr_time; - auto result = timing_details_.emplace(ns.node_name(), Detail()); + curr_total_us += curr_time; + auto result = details_.emplace(ns.node_name(), Detail()); Detail* detail = &(result.first->second); + detail->start_us.UpdateStat(ns.all_start_micros() - first_node_start_us); + detail->rel_end_us.UpdateStat(curr_time); + + // If this is the first pass, initialize some values. if (result.second) { - detail->total = 0; - detail->first_rel_end = curr_time; - detail->first_start = ns.all_start_micros() - first_node_start_micros_; + detail->name = ns.node_name(); + + auto node_type_it = node_types_.find(detail->name); + if (node_type_it != node_types_.end()) { + detail->type = node_type_it->second; + } + + detail->run_order = node_num; + detail->outputs.resize(ns.output_size()); for (const auto& output : ns.output()) { const int32 slot = output.slot(); @@ -106,251 +115,143 @@ void StatSummarizer::ProcessStepStats(const StepStats& step_stats) { } } - detail->total += curr_time; - Validate(detail, ns); - } - run_total_micros_.UpdateStat(curr_total); - - // memory stats - for (const auto& ns : ds.node_stats()) { - auto mem_result = memory_details_.emplace(ns.node_name(), Detail()); - bool first = mem_result.second; - auto& val = mem_result.first->second; + int64 curr_node_mem = 0; for (const auto& mem : ns.memory()) { const int64 mem_usage = mem.total_bytes(); - mem_total += mem_usage; - - if (first) { - first = false; - val.first_start = mem_total - mem_usage; - val.first_rel_end = mem_usage; - val.total = 0; - } else if (mem_result.second) { - val.first_rel_end += mem_usage; - } - val.total += mem_usage; + curr_node_mem += mem_usage; } + detail->mem_used.UpdateStat(curr_node_mem); + mem_total += curr_node_mem; + + Validate(detail, ns); } } + + run_total_us_.UpdateStat(curr_total_us); memory_.UpdateStat(mem_total); } std::string StatSummarizer::ShortSummary() const { std::stringstream stream; - stream << run_total_micros_.count() << " runs, avg " << std::setprecision(4) - << run_total_micros_.avg() / 1000.0 << " ms, " << node_types_.size() - << " nodes defined " << timing_details_.size() << " nodes observed" - << std::endl; - stream << std::setprecision(9) << memory_.avg() / 1000.0 << " avg KB per run." - << std::endl; + stream << "Timings (microseconds): "; + run_total_us_.OutputToStream(&stream); + stream << std::endl; + + stream << "Memory (bytes): "; + memory_.OutputToStream(&stream); + stream << std::endl; + + stream << node_types_.size() << " nodes defined " << details_.size() + << " nodes observed" << std::endl; return stream.str(); } -std::string StatSummarizer::HeaderString() const { - std::stringstream stream; - stream << std::setw(9) << "[start]" << std::setw(9) << "[first]" - << std::setw(9) << "[avg]"; - stream << "\t" << std::setw(8) << "[%]" - << " "; - stream << "\t" << std::setw(8) << "[cdf%]" - << " "; - stream << "\t" << std::setw(10) << "[Op]"; - stream << "\t" - << "[Name]"; - return stream.str(); +std::ostream& InitField(std::ostream& stream, int width) { + stream << "\t" << std::right << std::setw(width) << std::fixed + << std::setprecision(3); + return stream; } -std::string StatSummarizer::ColumnString(const std::string& name, - const Detail& detail, - int64 cumulative_stat_on_node, - Stat<int64> stat) const { +std::string StatSummarizer::HeaderString(const string& title) const { std::stringstream stream; - stream << std::fixed << std::setprecision(3) << std::setw(9) - << detail.first_start / 1000.0; - stream << std::fixed << std::setprecision(3) << std::setw(9) - << detail.first_rel_end / 1000.0; - - double avg_time_ms = detail.total / 1000.0 / num_runs(); - stream << std::fixed << std::setprecision(3) << std::setw(9) << avg_time_ms; - - double percentage = detail.total * 100.0 / stat.sum(); - stream << "\t" << std::fixed << std::setprecision(3) << std::setw(7) - << percentage << "%"; - - double cdf_percentage = cumulative_stat_on_node * 100.0 / stat.sum(); - stream << "\t" << std::fixed << std::setprecision(3) << std::setw(7) - << cdf_percentage << "%"; - - stream << "\t" << std::setw(10); - auto op_it = node_types_.find(name); - if (op_it != node_types_.end()) { - stream << op_it->second; - } else { - stream << " "; - } - stream << "\t" << name; + stream << "============================== " << title + << " ==============================" << std::endl; + InitField(stream, 24) << "[node type]"; + InitField(stream, 9) << "[start]"; + InitField(stream, 9) << "[first]"; + InitField(stream, 9) << "[avg ms]"; + InitField(stream, 8) << "[%]"; + InitField(stream, 8) << "[cdf%]"; + InitField(stream, 10) << "[mem KB]"; + stream << "\t" + << "[Name]"; return stream.str(); } -std::string StatSummarizer::GetStatsBySorting(SortingMetric sorting_metric, - double cdf_cutoff_ratio, - int num_max_nodes_to_print, - bool use_memory) const { - std::map<std::string, Detail> details; - Stat<int64> stat; - if (use_memory) { - details = memory_details_; - stat = memory_; - } else { - details = timing_details_; - stat = run_total_micros_; - } - num_max_nodes_to_print = - std::min<int>(num_max_nodes_to_print, details.size()); +std::string StatSummarizer::ColumnString(const Detail& detail, + const int64 cumulative_stat_on_node, + const Stat<int64>& stat) const { + const double start_ms = detail.start_us.avg() / 1000.0; + const double first_time_ms = detail.rel_end_us.first() / 1000.0; + const double avg_time_ms = detail.rel_end_us.avg() / 1000.0; + const double percentage = detail.rel_end_us.sum() * 100.0 / stat.sum(); + const double cdf_percentage = (cumulative_stat_on_node * 100.0f) / stat.sum(); std::stringstream stream; - string out_opt = "duration"; - string unit = "ms"; - if (use_memory) { - out_opt = "memory usage"; - unit = "KB"; - } - if (sorting_metric == SortingMetric::BY_TOTAL) { - stream << "============ Top by " << out_opt << " in " << unit - << " =================" << std::endl; - } else { - CHECK(sorting_metric == SortingMetric::BY_RUN_ORDER); - stream << "============ By run order (" << unit - << ") =================" << std::endl; - } - stream << HeaderString() << std::endl; - - std::priority_queue< - std::pair<int64, const std::pair<const std::string, Detail>*>> - statistics; - for (const auto& entry : details) { - statistics.emplace(sorting_metric == SortingMetric::BY_TOTAL - ? entry.second.total - : -entry.second.first_start, - &entry); - } - - const int64 cutoff_point = stat.sum() * cdf_cutoff_ratio; - int64 accumulated_us = 0; - - for (int i = 0; !statistics.empty() && i < num_max_nodes_to_print && - accumulated_us <= cutoff_point; - ++i) { - accumulated_us += statistics.top().second->second.total; - stream << ColumnString(statistics.top().second->first, - statistics.top().second->second, accumulated_us, - stat) - << std::endl; - statistics.pop(); - } + InitField(stream, 24) << detail.type; + InitField(stream, 9) << start_ms; + InitField(stream, 9) << first_time_ms; + InitField(stream, 9) << avg_time_ms; + InitField(stream, 7) << percentage << "%"; + InitField(stream, 7) << cdf_percentage << "%"; + InitField(stream, 10) << detail.mem_used.newest() / 1000.0; + stream << "\t" << detail.name; return stream.str(); } -std::string StatSummarizer::GetStatsByTopDurations( - double cdf_cutoff, int num_max_nodes_to_print) const { - return GetTimingStatsByTopDurations(cdf_cutoff, num_max_nodes_to_print); -} - -std::string StatSummarizer::GetStatsByRunOrder() const { - return GetTimingStatsByRunOrder(); -} - -std::string StatSummarizer::GetStatsByOrderOfNodeDefinitions() const { - return GetTimingStatsByOrderOfNodeDefinitions(); -} - -std::string StatSummarizer::GetTimingStatsByTopDurations( - double cdf_cutoff, int num_max_nodes_to_print) const { - return GetStatsBySorting(SortingMetric::BY_TOTAL, cdf_cutoff, - num_max_nodes_to_print, false); -} +void StatSummarizer::OrderNodesByMetric( + SortingMetric metric, std::vector<const Detail*>* details) const { + std::priority_queue<std::pair<string, const Detail*>> sorted_list; + const int num_nodes = nodes_in_def_order_.size(); -std::string StatSummarizer::GetTimingStatsByRunOrder() const { - return GetStatsBySorting(SortingMetric::BY_RUN_ORDER, 1.0, - std::numeric_limits<int>::max(), false); -} - -std::string StatSummarizer::GetTimingStatsByOrderOfNodeDefinitions() const { - return GetStatsByOrderOfNodeDefinitions(false); -} - -std::string StatSummarizer::GetMemoryStatsByUsage( - double cdf_cutoff, int num_max_nodes_to_print) const { - return GetStatsBySorting(SortingMetric::BY_TOTAL, cdf_cutoff, - num_max_nodes_to_print, true); -} - -std::string StatSummarizer::GetMemoryStatsByRunOrder() const { - return GetStatsBySorting(SortingMetric::BY_RUN_ORDER, 1.0, - std::numeric_limits<int>::max(), true); -} - -std::string StatSummarizer::GetMemoryStatsByOrderOfNodeDefinitions() const { - return GetStatsByOrderOfNodeDefinitions(true); -} - -std::string StatSummarizer::GetStatsByOrderOfNodeDefinitions( - bool use_memory) const { - string type; - if (use_memory) { - type = "Memory Usage"; - } else { - type = "Timings"; - } - std::stringstream stream; - stream << "============ " << type - << " by order of graph definition =================" << std::endl; - stream << HeaderString() << std::endl; - - int64 accumulated_us = 0; - auto details = use_memory ? memory_details_ : timing_details_; - Stat<int64> stat = use_memory ? memory_ : run_total_micros_; + for (const auto& det : details_) { + const Detail* detail = &(det.second); + std::stringstream stream; + stream << std::setw(20) << std::right << std::setprecision(10); - for (const auto& node_name_op : nodes_in_def_order_) { - auto detail_it = details.find(node_name_op); - if (detail_it == details.end()) { - continue; + int definition_index = 0; + auto it = std::find(nodes_in_def_order_.begin(), nodes_in_def_order_.end(), + detail->name); + if (it != nodes_in_def_order_.end()) { + definition_index = std::distance(nodes_in_def_order_.begin(), it); } - accumulated_us += detail_it->second.total; - stream << ColumnString(detail_it->first, detail_it->second, accumulated_us, - stat) - << std::endl; - details.erase(detail_it); - } - if (!details.empty()) { - stream << "============ " - << "The rest have different names between NodeExecStats and GraphDef" - << "============ " << std::endl; - - for (const auto& entry : details) { - // Prints the remaining nodes whose names are different from the name in - // graph definition. - accumulated_us += entry.second.total; - stream << ColumnString(entry.first, entry.second, accumulated_us, stat) - << std::endl; + switch (metric) { + case BY_NAME: + stream << detail->name; + break; + case BY_DEFINITION_ORDER: + stream << num_nodes - definition_index; + break; + case BY_RUN_ORDER: + stream << num_nodes - detail->run_order; + break; + case BY_TIME: + stream << detail->rel_end_us.avg(); + break; + case BY_MEMORY: + stream << detail->mem_used.avg(); + break; + case BY_TYPE: + stream << detail->type; + break; + default: + stream << ""; + break; } + + sorted_list.emplace(stream.str(), detail); } - return stream.str(); + while (!sorted_list.empty()) { + auto entry = sorted_list.top(); + sorted_list.pop(); + details->push_back(entry.second); + } } std::string StatSummarizer::GetStatsByNodeType() const { std::stringstream stream; - stream << "================ Summary by node type ================" + stream << "============================== Summary by node type " + "==============================" << std::endl; int64 accumulated_us = 0; - int64 accumulated_kb = 0; + int64 accumulated_bytes = 0; std::map<string, int64> node_type_map_count; std::map<string, int64> node_type_map_time; std::map<string, int64> node_type_map_memory; @@ -358,22 +259,17 @@ std::string StatSummarizer::GetStatsByNodeType() const { int64 num_processed = 0; LOG(INFO) << "nodes_in_def_order_ size: " << nodes_in_def_order_.size(); - LOG(INFO) << "timing_details_ size: " << timing_details_.size(); - for (const auto& timing_detail : timing_details_) { - const string node_name = timing_detail.first; + LOG(INFO) << "timing_details_ size: " << details_.size(); + for (const auto& det : details_) { + const string node_name = det.first; + const Detail& detail = det.second; - int64 curr_time_val = timing_detail.second.total; + int64 curr_time_val = detail.rel_end_us.avg(); accumulated_us += curr_time_val; - auto memory_details_it = memory_details_.find(node_name); - if (memory_details_it == memory_details_.end()) { - LOG(WARNING) << "Timing found but not memory! " << node_name; - continue; - } - ++num_processed; - int64 curr_memory_val = memory_details_it->second.total; - accumulated_kb += curr_memory_val; + int64 curr_memory_val = detail.mem_used.newest(); + accumulated_bytes += curr_memory_val; string node_type = "<>"; @@ -397,12 +293,13 @@ std::string StatSummarizer::GetStatsByNodeType() const { std::pair<string, int64>(node_type.first, mem_used)); } - stream << std::setw(24) << std::right << "[Node type]" - << "\t" << std::setw(9) << std::right << "[count]" - << "\t" << std::setw(10) << std::right << "[avg ms]" - << "\t" << std::setw(11) << std::right << "[avg %]" - << "\t" << std::setw(11) << std::right << "[cdf %]" - << "\t" << std::setw(10) << std::right << "[mem KB]" << std::endl; + InitField(stream, 24) << "[Node type]"; + InitField(stream, 9) << "[count]"; + InitField(stream, 10) << "[avg ms]"; + InitField(stream, 11) << "[avg %]"; + InitField(stream, 11) << "[cdf %]"; + InitField(stream, 10) << "[mem KB]"; + stream << std::endl; float avg_total_time_ms = 0.0f; float cdf = 0.0f; @@ -411,40 +308,63 @@ std::string StatSummarizer::GetStatsByNodeType() const { timings.pop(); const string node_type = entry.second.first; - const float memory = - entry.second.second / 1000.0f / static_cast<float>(num_runs()); - const int64 node_type_total_us = entry.first; + const float memory = entry.second.second / 1000.0f; - const float time_per_run_ms = - (node_type_total_us / static_cast<float>(num_runs()) / 1000.0f); + const int64 node_type_total_us = entry.first; + const float time_per_run_ms = node_type_total_us / 1000.0f; avg_total_time_ms += time_per_run_ms; const float percentage = ((entry.first / static_cast<float>(accumulated_us)) * 100.0f); cdf += percentage; - stream << std::setw(24) << std::right << node_type << "\t" << std::setw(9) - << std::right << node_type_map_count[node_type] << "\t" - << std::setw(10) << std::right << std::fixed << std::setprecision(2) - << time_per_run_ms << "\t" << std::setw(10) << std::right - << std::fixed << std::setprecision(2) << percentage << "%" - << "\t" << std::setw(10) << std::right << std::fixed - << std::setprecision(2) << cdf << "%" - << "\t" << std::setw(10) << std::right << std::fixed - << std::setprecision(2) << memory << std::endl; + InitField(stream, 24) << node_type; + InitField(stream, 9) << node_type_map_count[node_type]; + InitField(stream, 10) << time_per_run_ms; + InitField(stream, 10) << percentage << "%"; + InitField(stream, 10) << cdf << "%"; + InitField(stream, 10) << memory; + stream << std::endl; } + stream << std::endl; + return stream.str(); +} + +std::string StatSummarizer::GetStatsByMetric(const string& title, + SortingMetric sorting_metric, + int num_stats) const { + std::vector<const Detail*> details; + OrderNodesByMetric(sorting_metric, &details); + + double cumulative_stat_on_node = 0; + + std::stringstream stream; + stream << HeaderString(title) << std::endl; + int stat_num = 0; + for (auto detail : details) { + ++stat_num; + if (num_stats > 0 && stat_num > num_stats) { + break; + } + + // TODO(andrewharp): Make this keep track of the particular metric for cdf. + cumulative_stat_on_node += detail->rel_end_us.sum(); + stream << ColumnString(*detail, cumulative_stat_on_node, run_total_us_) + << std::endl; + } + stream << std::endl; return stream.str(); } std::string StatSummarizer::GetOutputString() const { std::stringstream stream; - stream << "Total time (us): " << run_total_micros_ << std::endl; - stream << GetTimingStatsByRunOrder(); - stream << GetTimingStatsByTopDurations(); - stream << "Total Memory (bytes): " << memory_ << std::endl; - stream << GetMemoryStatsByRunOrder(); - stream << GetMemoryStatsByUsage(); + // TODO(andrewharp): Allow sorting metrics to be specified externally, e.g. + // on command line. + stream << GetStatsByMetric("Run Order", BY_RUN_ORDER, 0); + stream << GetStatsByMetric("Top by Computation Time", BY_TIME, 10); + stream << GetStatsByMetric("Top by Memory Use", BY_MEMORY, 10); + stream << GetStatsByNodeType(); stream << ShortSummary() << std::endl; @@ -464,8 +384,8 @@ void StatSummarizer::PrintOutputs() const { std::priority_queue< std::pair<int64, const std::pair<const std::string, Detail>*>> timings; - for (const auto& entry : timing_details_) { - timings.emplace(-entry.second.first_start, &entry); + for (const auto& entry : details_) { + timings.emplace(-entry.second.start_us.avg(), &entry); } LOG(INFO) << "============ Node output tensor sizes in run order ========"; |