diff options
author | Andrew Harp <andrewharp@google.com> | 2016-12-12 21:15:49 -0800 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2016-12-12 21:25:00 -0800 |
commit | b714503af6beb11a3f370c21e8a830b00d0b130e (patch) | |
tree | 26238f0d27e6b76222902b8530c67d15e310fe75 /tensorflow/core/util/stat_summarizer.cc | |
parent | f6585e1210eb5f48554d2de38e4e5ca09f734096 (diff) |
Refactor StatSummarizer and make it print a summary by node type.
Change: 141848201
Diffstat (limited to 'tensorflow/core/util/stat_summarizer.cc')
-rw-r--r-- | tensorflow/core/util/stat_summarizer.cc | 190 |
1 files changed, 148 insertions, 42 deletions
diff --git a/tensorflow/core/util/stat_summarizer.cc b/tensorflow/core/util/stat_summarizer.cc index 6bd3d9c780..65b7039b41 100644 --- a/tensorflow/core/util/stat_summarizer.cc +++ b/tensorflow/core/util/stat_summarizer.cc @@ -38,6 +38,41 @@ StatSummarizer::StatSummarizer(const tensorflow::GraphDef& tensorflow_graph) { } } +void StatSummarizer::Validate(const Detail* detail, + const NodeExecStats& ns) const { + if (detail->outputs.size() != ns.output_size()) { + LOG(WARNING) << "Number of outputs changed between runs for '" + << ns.node_name() << "' - was " << detail->outputs.size() + << ", now " << ns.output_size(); + } else { + for (const auto& output : ns.output()) { + const int32 slot = output.slot(); + if ((slot < 0) || (slot >= ns.output_size())) { + LOG(ERROR) << "Bad output slot '" << slot << "' for '" << ns.node_name() + << "'"; + return; + } + const auto& stored = detail->outputs[slot]; + const auto& current = output.tensor_description(); + bool do_shapes_match = true; + if (stored.shape().dim_size() != current.shape().dim_size()) { + do_shapes_match = false; + } else { + for (int i = 0; i < stored.shape().dim_size(); ++i) { + if (stored.shape().dim(i).size() != current.shape().dim(i).size()) { + do_shapes_match = false; + } + } + + if ((stored.dtype() != current.dtype()) || !do_shapes_match) { + LOG(WARNING) << "Output tensor changed between runs for '" + << ns.node_name(); + } + } + } + } +} + void StatSummarizer::ProcessStepStats(const StepStats& step_stats) { int64 curr_total = 0; int64 mem_total = 0; @@ -48,16 +83,17 @@ void StatSummarizer::ProcessStepStats(const StepStats& step_stats) { } for (const auto& ds : step_stats.dev_stats()) { + // timing stats for (const auto& ns : ds.node_stats()) { - // timing stats const int64 curr_time = ns.all_end_rel_micros(); curr_total += curr_time; auto result = timing_details_.emplace(ns.node_name(), Detail()); Detail* detail = &(result.first->second); + if (result.second) { + detail->total = 0; detail->first_rel_end = curr_time; detail->first_start = ns.all_start_micros() - first_node_start_micros_; - detail->total = curr_time; detail->outputs.resize(ns.output_size()); for (const auto& output : ns.output()) { const int32 slot = output.slot(); @@ -68,43 +104,15 @@ void StatSummarizer::ProcessStepStats(const StepStats& step_stats) { } detail->outputs[slot] = output.tensor_description(); } - } else { - detail->total += curr_time; - if (detail->outputs.size() != ns.output_size()) { - LOG(WARNING) << "Number of outputs changed between runs for '" - << ns.node_name() << "' - was " << detail->outputs.size() - << ", now " << ns.output_size(); - } else { - for (const auto& output : ns.output()) { - const int32 slot = output.slot(); - if ((slot < 0) || (slot >= ns.output_size())) { - LOG(ERROR) << "Bad output slot '" << slot << "' for '" - << ns.node_name() << "'"; - continue; - } - const auto& stored = detail->outputs[slot]; - const auto& current = output.tensor_description(); - bool do_shapes_match = true; - if (stored.shape().dim_size() != current.shape().dim_size()) { - do_shapes_match = false; - } else { - for (int i = 0; i < stored.shape().dim_size(); ++i) { - if (stored.shape().dim(i).size() != - current.shape().dim(i).size()) { - do_shapes_match = false; - } - } - - if ((stored.dtype() != current.dtype()) || !do_shapes_match) { - LOG(WARNING) << "Output tensor changed between runs for '" - << ns.node_name(); - } - } - } - } } - // memory stats + detail->total += curr_time; + Validate(detail, ns); + } + run_total_micros_.UpdateStat(curr_total); + + // memory stats + for (const auto& ns : ds.node_stats()) { auto mem_result = memory_details_.emplace(ns.node_name(), Detail()); bool first = mem_result.second; auto& val = mem_result.first->second; @@ -124,7 +132,6 @@ void StatSummarizer::ProcessStepStats(const StepStats& step_stats) { } } } - run_total_micros_.UpdateStat(curr_total); memory_.UpdateStat(mem_total); } @@ -204,7 +211,6 @@ std::string StatSummarizer::GetStatsBySorting(SortingMetric sorting_metric, std::min<int>(num_max_nodes_to_print, details.size()); std::stringstream stream; - stream << ShortSummary() << std::endl; string out_opt = "duration"; string unit = "ms"; if (use_memory) { @@ -300,7 +306,6 @@ std::string StatSummarizer::GetStatsByOrderOfNodeDefinitions( type = "Timings"; } std::stringstream stream; - stream << ShortSummary() << std::endl; stream << "============ " << type << " by order of graph definition =================" << std::endl; stream << HeaderString() << std::endl; @@ -338,20 +343,121 @@ std::string StatSummarizer::GetStatsByOrderOfNodeDefinitions( return stream.str(); } +std::string StatSummarizer::GetStatsByNodeType() const { + std::stringstream stream; + + stream << "================ Summary by node type ================" + << std::endl; + + int64 accumulated_us = 0; + int64 accumulated_kb = 0; + std::map<string, int64> node_type_map_count; + std::map<string, int64> node_type_map_time; + std::map<string, int64> node_type_map_memory; + + int64 num_processed = 0; + + LOG(INFO) << "nodes_in_def_order_ size: " << nodes_in_def_order_.size(); + LOG(INFO) << "timing_details_ size: " << timing_details_.size(); + for (const auto& timing_detail : timing_details_) { + const string node_name = timing_detail.first; + + int64 curr_time_val = timing_detail.second.total; + accumulated_us += curr_time_val; + + auto memory_details_it = memory_details_.find(node_name); + if (memory_details_it == memory_details_.end()) { + LOG(WARNING) << "Timing found but not memory! " << node_name; + continue; + } + + ++num_processed; + int64 curr_memory_val = memory_details_it->second.total; + accumulated_kb += curr_memory_val; + + string node_type = "<>"; + + auto node_type_it = node_types_.find(node_name); + if (node_type_it != node_types_.end()) { + node_type = node_type_it->second; + } + + node_type_map_count[node_type] += 1; + node_type_map_time[node_type] += curr_time_val; + node_type_map_memory[node_type] += curr_memory_val; + } + + LOG(INFO) << "Processed " << num_processed << " nodes"; + + // Sort them. + std::priority_queue<std::pair<int64, std::pair<string, int64>>> timings; + for (const auto& node_type : node_type_map_time) { + const int64 mem_used = node_type_map_memory[node_type.first]; + timings.emplace(node_type.second, + std::pair<string, int64>(node_type.first, mem_used)); + } + + stream << std::setw(24) << std::right << "[Node type]" + << "\t" << std::setw(9) << std::right << "[count]" + << "\t" << std::setw(10) << std::right << "[avg ms]" + << "\t" << std::setw(11) << std::right << "[avg %]" + << "\t" << std::setw(11) << std::right << "[cdf %]" + << "\t" << std::setw(10) << std::right << "[mem KB]" << std::endl; + + float avg_total_time_ms = 0.0f; + float cdf = 0.0f; + while (!timings.empty()) { + auto entry = timings.top(); + timings.pop(); + + const string node_type = entry.second.first; + const float memory = + entry.second.second / 1000.0f / static_cast<float>(num_runs()); + const int64 node_type_total_us = entry.first; + + const float time_per_run_ms = + (node_type_total_us / static_cast<float>(num_runs()) / 1000.0f); + + avg_total_time_ms += time_per_run_ms; + const float percentage = + ((entry.first / static_cast<float>(accumulated_us)) * 100.0f); + cdf += percentage; + + stream << std::setw(24) << std::right << node_type << "\t" << std::setw(9) + << std::right << node_type_map_count[node_type] << "\t" + << std::setw(10) << std::right << std::fixed << std::setprecision(2) + << time_per_run_ms << "\t" << std::setw(10) << std::right + << std::fixed << std::setprecision(2) << percentage << "%" + << "\t" << std::setw(10) << std::right << std::fixed + << std::setprecision(2) << cdf << "%" + << "\t" << std::setw(10) << std::right << std::fixed + << std::setprecision(2) << memory << std::endl; + } + return stream.str(); +} + std::string StatSummarizer::GetOutputString() const { std::stringstream stream; + stream << "Total time (us): " << run_total_micros_ << std::endl; stream << GetTimingStatsByRunOrder(); stream << GetTimingStatsByTopDurations(); stream << "Total Memory (bytes): " << memory_ << std::endl; stream << GetMemoryStatsByRunOrder(); stream << GetMemoryStatsByUsage(); + stream << GetStatsByNodeType(); + + stream << ShortSummary() << std::endl; + return stream.str(); } void StatSummarizer::PrintStepStats() const { - LOG(INFO) << GetOutputString(); - LOG(INFO); + string output = GetOutputString(); + std::istringstream iss(output); + for (std::string line; std::getline(iss, line);) { + LOG(INFO) << line; + } } void StatSummarizer::PrintOutputs() const { |