Refactor StatSummarizer: use Stats for more things, remove redundant methods, combine mem/time data structures, and overhaul sorting / output formatting.

Change: 141981032
author: Andrew Harp <andrewharp@google.com> 2016-12-13 22:32:46 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2016-12-13 22:43:54 -0800
commit: 9466e2c7b6cc7924ae02017686c60209679fb3c4 (patch)
tree: 45d55686601f25ca276536ef96d263823d212b8b /tensorflow/core/util/stat_summarizer.cc
parent: 9004714bc4de1c78735bfe8178b441a371df1762 (diff)
1 files changed, 175 insertions, 255 deletions
diff --git a/tensorflow/core/util/stat_summarizer.cc b/tensorflow/core/util/stat_summarizer.cc
index 65b7039b41..ebe7bebde7 100644
--- a/tensorflow/core/util/stat_summarizer.cc
+++ b/tensorflow/core/util/stat_summarizer.cc
@@ -74,26 +74,35 @@ void StatSummarizer::Validate(const Detail* detail,
 }
 
 void StatSummarizer::ProcessStepStats(const StepStats& step_stats) {
-  int64 curr_total = 0;
+  int64 curr_total_us = 0;
   int64 mem_total = 0;
-  if (timing_details_.empty() && !step_stats.dev_stats().empty() &&
-      !step_stats.dev_stats(0).node_stats().empty()) {
-    first_node_start_micros_ =
-        step_stats.dev_stats(0).node_stats(0).all_start_micros();
-  }
 
+  int64 first_node_start_us =
+      step_stats.dev_stats(0).node_stats(0).all_start_micros();
+
+  int node_num = 0;
   for (const auto& ds : step_stats.dev_stats()) {
-    // timing stats
     for (const auto& ns : ds.node_stats()) {
+      ++node_num;
       const int64 curr_time = ns.all_end_rel_micros();
-      curr_total += curr_time;
-      auto result = timing_details_.emplace(ns.node_name(), Detail());
+      curr_total_us += curr_time;
+      auto result = details_.emplace(ns.node_name(), Detail());
       Detail* detail = &(result.first->second);
 
+      detail->start_us.UpdateStat(ns.all_start_micros() - first_node_start_us);
+      detail->rel_end_us.UpdateStat(curr_time);
+
+      // If this is the first pass, initialize some values.
       if (result.second) {
-        detail->total = 0;
-        detail->first_rel_end = curr_time;
-        detail->first_start = ns.all_start_micros() - first_node_start_micros_;
+        detail->name = ns.node_name();
+
+        auto node_type_it = node_types_.find(detail->name);
+        if (node_type_it != node_types_.end()) {
+          detail->type = node_type_it->second;
+        }
+
+        detail->run_order = node_num;
+
         detail->outputs.resize(ns.output_size());
         for (const auto& output : ns.output()) {
           const int32 slot = output.slot();
@@ -106,251 +115,143 @@ void StatSummarizer::ProcessStepStats(const StepStats& step_stats) {
         }
       }
 
-      detail->total += curr_time;
-      Validate(detail, ns);
-    }
-    run_total_micros_.UpdateStat(curr_total);
-
-    // memory stats
-    for (const auto& ns : ds.node_stats()) {
-      auto mem_result = memory_details_.emplace(ns.node_name(), Detail());
-      bool first = mem_result.second;
-      auto& val = mem_result.first->second;
+      int64 curr_node_mem = 0;
       for (const auto& mem : ns.memory()) {
         const int64 mem_usage = mem.total_bytes();
-        mem_total += mem_usage;
-
-        if (first) {
-          first = false;
-          val.first_start = mem_total - mem_usage;
-          val.first_rel_end = mem_usage;
-          val.total = 0;
-        } else if (mem_result.second) {
-          val.first_rel_end += mem_usage;
-        }
-        val.total += mem_usage;
+        curr_node_mem += mem_usage;
       }
+      detail->mem_used.UpdateStat(curr_node_mem);
+      mem_total += curr_node_mem;
+
+      Validate(detail, ns);
     }
   }
+
+  run_total_us_.UpdateStat(curr_total_us);
   memory_.UpdateStat(mem_total);
 }
 
 std::string StatSummarizer::ShortSummary() const {
   std::stringstream stream;
-  stream << run_total_micros_.count() << " runs, avg " << std::setprecision(4)
-         << run_total_micros_.avg() / 1000.0 << " ms, " << node_types_.size()
-         << " nodes defined " << timing_details_.size() << " nodes observed"
-         << std::endl;
-  stream << std::setprecision(9) << memory_.avg() / 1000.0 << " avg KB per run."
-         << std::endl;
+  stream << "Timings (microseconds): ";
+  run_total_us_.OutputToStream(&stream);
+  stream << std::endl;
+
+  stream << "Memory (bytes): ";
+  memory_.OutputToStream(&stream);
+  stream << std::endl;
+
+  stream << node_types_.size() << " nodes defined " << details_.size()
+         << " nodes observed" << std::endl;
   return stream.str();
 }
 
-std::string StatSummarizer::HeaderString() const {
-  std::stringstream stream;
-  stream << std::setw(9) << "[start]" << std::setw(9) << "[first]"
-         << std::setw(9) << "[avg]";
-  stream << "\t" << std::setw(8) << "[%]"
-         << " ";
-  stream << "\t" << std::setw(8) << "[cdf%]"
-         << " ";
-  stream << "\t" << std::setw(10) << "[Op]";
-  stream << "\t"
-         << "[Name]";
-  return stream.str();
+std::ostream& InitField(std::ostream& stream, int width) {
+  stream << "\t" << std::right << std::setw(width) << std::fixed
+         << std::setprecision(3);
+  return stream;
 }
 
-std::string StatSummarizer::ColumnString(const std::string& name,
-                                         const Detail& detail,
-                                         int64 cumulative_stat_on_node,
-                                         Stat<int64> stat) const {
+std::string StatSummarizer::HeaderString(const string& title) const {
   std::stringstream stream;
-  stream << std::fixed << std::setprecision(3) << std::setw(9)
-         << detail.first_start / 1000.0;
-  stream << std::fixed << std::setprecision(3) << std::setw(9)
-         << detail.first_rel_end / 1000.0;
-
-  double avg_time_ms = detail.total / 1000.0 / num_runs();
-  stream << std::fixed << std::setprecision(3) << std::setw(9) << avg_time_ms;
-
-  double percentage = detail.total * 100.0 / stat.sum();
-  stream << "\t" << std::fixed << std::setprecision(3) << std::setw(7)
-         << percentage << "%";
-
-  double cdf_percentage = cumulative_stat_on_node * 100.0 / stat.sum();
-  stream << "\t" << std::fixed << std::setprecision(3) << std::setw(7)
-         << cdf_percentage << "%";
-
-  stream << "\t" << std::setw(10);
-  auto op_it = node_types_.find(name);
-  if (op_it != node_types_.end()) {
-    stream << op_it->second;
-  } else {
-    stream << " ";
-  }
 
-  stream << "\t" << name;
+  stream << "============================== " << title
+         << " ==============================" << std::endl;
 
+  InitField(stream, 24) << "[node type]";
+  InitField(stream, 9) << "[start]";
+  InitField(stream, 9) << "[first]";
+  InitField(stream, 9) << "[avg ms]";
+  InitField(stream, 8) << "[%]";
+  InitField(stream, 8) << "[cdf%]";
+  InitField(stream, 10) << "[mem KB]";
+  stream << "\t"
+         << "[Name]";
   return stream.str();
 }
 
-std::string StatSummarizer::GetStatsBySorting(SortingMetric sorting_metric,
-                                              double cdf_cutoff_ratio,
-                                              int num_max_nodes_to_print,
-                                              bool use_memory) const {
-  std::map<std::string, Detail> details;
-  Stat<int64> stat;
-  if (use_memory) {
-    details = memory_details_;
-    stat = memory_;
-  } else {
-    details = timing_details_;
-    stat = run_total_micros_;
-  }
-  num_max_nodes_to_print =
-      std::min<int>(num_max_nodes_to_print, details.size());
+std::string StatSummarizer::ColumnString(const Detail& detail,
+                                         const int64 cumulative_stat_on_node,
+                                         const Stat<int64>& stat) const {
+  const double start_ms = detail.start_us.avg() / 1000.0;
+  const double first_time_ms = detail.rel_end_us.first() / 1000.0;
+  const double avg_time_ms = detail.rel_end_us.avg() / 1000.0;
+  const double percentage = detail.rel_end_us.sum() * 100.0 / stat.sum();
+  const double cdf_percentage = (cumulative_stat_on_node * 100.0f) / stat.sum();
 
   std::stringstream stream;
-  string out_opt = "duration";
-  string unit = "ms";
-  if (use_memory) {
-    out_opt = "memory usage";
-    unit = "KB";
-  }
-  if (sorting_metric == SortingMetric::BY_TOTAL) {
-    stream << "============ Top by " << out_opt << " in " << unit
-           << " =================" << std::endl;
-  } else {
-    CHECK(sorting_metric == SortingMetric::BY_RUN_ORDER);
-    stream << "============ By run order (" << unit
-           << ") =================" << std::endl;
-  }
-  stream << HeaderString() << std::endl;
-
-  std::priority_queue<
-      std::pair<int64, const std::pair<const std::string, Detail>*>>
-      statistics;
-  for (const auto& entry : details) {
-    statistics.emplace(sorting_metric == SortingMetric::BY_TOTAL
-                           ? entry.second.total
-                           : -entry.second.first_start,
-                       &entry);
-  }
-
-  const int64 cutoff_point = stat.sum() * cdf_cutoff_ratio;
-  int64 accumulated_us = 0;
-
-  for (int i = 0; !statistics.empty() && i < num_max_nodes_to_print &&
-                  accumulated_us <= cutoff_point;
-       ++i) {
-    accumulated_us += statistics.top().second->second.total;
-    stream << ColumnString(statistics.top().second->first,
-                           statistics.top().second->second, accumulated_us,
-                           stat)
-           << std::endl;
-    statistics.pop();
-  }
+  InitField(stream, 24) << detail.type;
+  InitField(stream, 9) << start_ms;
+  InitField(stream, 9) << first_time_ms;
+  InitField(stream, 9) << avg_time_ms;
+  InitField(stream, 7) << percentage << "%";
+  InitField(stream, 7) << cdf_percentage << "%";
+  InitField(stream, 10) << detail.mem_used.newest() / 1000.0;
+  stream << "\t" << detail.name;
 
   return stream.str();
 }
 
-std::string StatSummarizer::GetStatsByTopDurations(
-    double cdf_cutoff, int num_max_nodes_to_print) const {
-  return GetTimingStatsByTopDurations(cdf_cutoff, num_max_nodes_to_print);
-}
-
-std::string StatSummarizer::GetStatsByRunOrder() const {
-  return GetTimingStatsByRunOrder();
-}
-
-std::string StatSummarizer::GetStatsByOrderOfNodeDefinitions() const {
-  return GetTimingStatsByOrderOfNodeDefinitions();
-}
-
-std::string StatSummarizer::GetTimingStatsByTopDurations(
-    double cdf_cutoff, int num_max_nodes_to_print) const {
-  return GetStatsBySorting(SortingMetric::BY_TOTAL, cdf_cutoff,
-                           num_max_nodes_to_print, false);
-}
+void StatSummarizer::OrderNodesByMetric(
+    SortingMetric metric, std::vector<const Detail*>* details) const {
+  std::priority_queue<std::pair<string, const Detail*>> sorted_list;
+  const int num_nodes = nodes_in_def_order_.size();
 
-std::string StatSummarizer::GetTimingStatsByRunOrder() const {
-  return GetStatsBySorting(SortingMetric::BY_RUN_ORDER, 1.0,
-                           std::numeric_limits<int>::max(), false);
-}
-
-std::string StatSummarizer::GetTimingStatsByOrderOfNodeDefinitions() const {
-  return GetStatsByOrderOfNodeDefinitions(false);
-}
-
-std::string StatSummarizer::GetMemoryStatsByUsage(
-    double cdf_cutoff, int num_max_nodes_to_print) const {
-  return GetStatsBySorting(SortingMetric::BY_TOTAL, cdf_cutoff,
-                           num_max_nodes_to_print, true);
-}
-
-std::string StatSummarizer::GetMemoryStatsByRunOrder() const {
-  return GetStatsBySorting(SortingMetric::BY_RUN_ORDER, 1.0,
-                           std::numeric_limits<int>::max(), true);
-}
-
-std::string StatSummarizer::GetMemoryStatsByOrderOfNodeDefinitions() const {
-  return GetStatsByOrderOfNodeDefinitions(true);
-}
-
-std::string StatSummarizer::GetStatsByOrderOfNodeDefinitions(
-    bool use_memory) const {
-  string type;
-  if (use_memory) {
-    type = "Memory Usage";
-  } else {
-    type = "Timings";
-  }
-  std::stringstream stream;
-  stream << "============ " << type
-         << " by order of graph definition =================" << std::endl;
-  stream << HeaderString() << std::endl;
-
-  int64 accumulated_us = 0;
-  auto details = use_memory ? memory_details_ : timing_details_;
-  Stat<int64> stat = use_memory ? memory_ : run_total_micros_;
+  for (const auto& det : details_) {
+    const Detail* detail = &(det.second);
+    std::stringstream stream;
+    stream << std::setw(20) << std::right << std::setprecision(10);
 
-  for (const auto& node_name_op : nodes_in_def_order_) {
-    auto detail_it = details.find(node_name_op);
-    if (detail_it == details.end()) {
-      continue;
+    int definition_index = 0;
+    auto it = std::find(nodes_in_def_order_.begin(), nodes_in_def_order_.end(),
+                        detail->name);
+    if (it != nodes_in_def_order_.end()) {
+      definition_index = std::distance(nodes_in_def_order_.begin(), it);
     }
-    accumulated_us += detail_it->second.total;
-    stream << ColumnString(detail_it->first, detail_it->second, accumulated_us,
-                           stat)
-           << std::endl;
-    details.erase(detail_it);
-  }
 
-  if (!details.empty()) {
-    stream << "============ "
-           << "The rest have different names between NodeExecStats and GraphDef"
-           << "============ " << std::endl;
-
-    for (const auto& entry : details) {
-      // Prints the remaining nodes whose names are different from the name in
-      // graph definition.
-      accumulated_us += entry.second.total;
-      stream << ColumnString(entry.first, entry.second, accumulated_us, stat)
-             << std::endl;
+    switch (metric) {
+      case BY_NAME:
+        stream << detail->name;
+        break;
+      case BY_DEFINITION_ORDER:
+        stream << num_nodes - definition_index;
+        break;
+      case BY_RUN_ORDER:
+        stream << num_nodes - detail->run_order;
+        break;
+      case BY_TIME:
+        stream << detail->rel_end_us.avg();
+        break;
+      case BY_MEMORY:
+        stream << detail->mem_used.avg();
+        break;
+      case BY_TYPE:
+        stream << detail->type;
+        break;
+      default:
+        stream << "";
+        break;
     }
+
+    sorted_list.emplace(stream.str(), detail);
   }
 
-  return stream.str();
+  while (!sorted_list.empty()) {
+    auto entry = sorted_list.top();
+    sorted_list.pop();
+    details->push_back(entry.second);
+  }
 }
 
 std::string StatSummarizer::GetStatsByNodeType() const {
   std::stringstream stream;
 
-  stream << "================ Summary by node type ================"
+  stream << "============================== Summary by node type "
+            "=============================="
          << std::endl;
 
   int64 accumulated_us = 0;
-  int64 accumulated_kb = 0;
+  int64 accumulated_bytes = 0;
   std::map<string, int64> node_type_map_count;
   std::map<string, int64> node_type_map_time;
   std::map<string, int64> node_type_map_memory;
@@ -358,22 +259,17 @@ std::string StatSummarizer::GetStatsByNodeType() const {
   int64 num_processed = 0;
 
   LOG(INFO) << "nodes_in_def_order_ size: " << nodes_in_def_order_.size();
-  LOG(INFO) << "timing_details_ size: " << timing_details_.size();
-  for (const auto& timing_detail : timing_details_) {
-    const string node_name = timing_detail.first;
+  LOG(INFO) << "timing_details_ size: " << details_.size();
+  for (const auto& det : details_) {
+    const string node_name = det.first;
+    const Detail& detail = det.second;
 
-    int64 curr_time_val = timing_detail.second.total;
+    int64 curr_time_val = detail.rel_end_us.avg();
     accumulated_us += curr_time_val;
 
-    auto memory_details_it = memory_details_.find(node_name);
-    if (memory_details_it == memory_details_.end()) {
-      LOG(WARNING) << "Timing found but not memory! " << node_name;
-      continue;
-    }
-
     ++num_processed;
-    int64 curr_memory_val = memory_details_it->second.total;
-    accumulated_kb += curr_memory_val;
+    int64 curr_memory_val = detail.mem_used.newest();
+    accumulated_bytes += curr_memory_val;
 
     string node_type = "<>";
 
@@ -397,12 +293,13 @@ std::string StatSummarizer::GetStatsByNodeType() const {
                     std::pair<string, int64>(node_type.first, mem_used));
   }
 
-  stream << std::setw(24) << std::right << "[Node type]"
-         << "\t" << std::setw(9) << std::right << "[count]"
-         << "\t" << std::setw(10) << std::right << "[avg ms]"
-         << "\t" << std::setw(11) << std::right << "[avg %]"
-         << "\t" << std::setw(11) << std::right << "[cdf %]"
-         << "\t" << std::setw(10) << std::right << "[mem KB]" << std::endl;
+  InitField(stream, 24) << "[Node type]";
+  InitField(stream, 9) << "[count]";
+  InitField(stream, 10) << "[avg ms]";
+  InitField(stream, 11) << "[avg %]";
+  InitField(stream, 11) << "[cdf %]";
+  InitField(stream, 10) << "[mem KB]";
+  stream << std::endl;
 
   float avg_total_time_ms = 0.0f;
   float cdf = 0.0f;
@@ -411,40 +308,63 @@ std::string StatSummarizer::GetStatsByNodeType() const {
     timings.pop();
 
     const string node_type = entry.second.first;
-    const float memory =
-        entry.second.second / 1000.0f / static_cast<float>(num_runs());
-    const int64 node_type_total_us = entry.first;
+    const float memory = entry.second.second / 1000.0f;
 
-    const float time_per_run_ms =
-        (node_type_total_us / static_cast<float>(num_runs()) / 1000.0f);
+    const int64 node_type_total_us = entry.first;
+    const float time_per_run_ms = node_type_total_us / 1000.0f;
 
     avg_total_time_ms += time_per_run_ms;
     const float percentage =
         ((entry.first / static_cast<float>(accumulated_us)) * 100.0f);
     cdf += percentage;
 
-    stream << std::setw(24) << std::right << node_type << "\t" << std::setw(9)
-           << std::right << node_type_map_count[node_type] << "\t"
-           << std::setw(10) << std::right << std::fixed << std::setprecision(2)
-           << time_per_run_ms << "\t" << std::setw(10) << std::right
-           << std::fixed << std::setprecision(2) << percentage << "%"
-           << "\t" << std::setw(10) << std::right << std::fixed
-           << std::setprecision(2) << cdf << "%"
-           << "\t" << std::setw(10) << std::right << std::fixed
-           << std::setprecision(2) << memory << std::endl;
+    InitField(stream, 24) << node_type;
+    InitField(stream, 9) << node_type_map_count[node_type];
+    InitField(stream, 10) << time_per_run_ms;
+    InitField(stream, 10) << percentage << "%";
+    InitField(stream, 10) << cdf << "%";
+    InitField(stream, 10) << memory;
+    stream << std::endl;
   }
+  stream << std::endl;
+  return stream.str();
+}
+
+std::string StatSummarizer::GetStatsByMetric(const string& title,
+                                             SortingMetric sorting_metric,
+                                             int num_stats) const {
+  std::vector<const Detail*> details;
+  OrderNodesByMetric(sorting_metric, &details);
+
+  double cumulative_stat_on_node = 0;
+
+  std::stringstream stream;
+  stream << HeaderString(title) << std::endl;
+  int stat_num = 0;
+  for (auto detail : details) {
+    ++stat_num;
+    if (num_stats > 0 && stat_num > num_stats) {
+      break;
+    }
+
+    // TODO(andrewharp): Make this keep track of the particular metric for cdf.
+    cumulative_stat_on_node += detail->rel_end_us.sum();
+    stream << ColumnString(*detail, cumulative_stat_on_node, run_total_us_)
+           << std::endl;
+  }
+  stream << std::endl;
   return stream.str();
 }
 
 std::string StatSummarizer::GetOutputString() const {
   std::stringstream stream;
 
-  stream << "Total time (us): " << run_total_micros_ << std::endl;
-  stream << GetTimingStatsByRunOrder();
-  stream << GetTimingStatsByTopDurations();
-  stream << "Total Memory (bytes): " << memory_ << std::endl;
-  stream << GetMemoryStatsByRunOrder();
-  stream << GetMemoryStatsByUsage();
+  // TODO(andrewharp): Allow sorting metrics to be specified externally, e.g.
+  // on command line.
+  stream << GetStatsByMetric("Run Order", BY_RUN_ORDER, 0);
+  stream << GetStatsByMetric("Top by Computation Time", BY_TIME, 10);
+  stream << GetStatsByMetric("Top by Memory Use", BY_MEMORY, 10);
+
   stream << GetStatsByNodeType();
 
   stream << ShortSummary() << std::endl;
@@ -464,8 +384,8 @@ void StatSummarizer::PrintOutputs() const {
   std::priority_queue<
       std::pair<int64, const std::pair<const std::string, Detail>*>>
       timings;
-  for (const auto& entry : timing_details_) {
-    timings.emplace(-entry.second.first_start, &entry);
+  for (const auto& entry : details_) {
+    timings.emplace(-entry.second.start_us.avg(), &entry);
   }
 
   LOG(INFO) << "============ Node output tensor sizes in run order ========";
author	Andrew Harp <andrewharp@google.com>	2016-12-13 22:32:46 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2016-12-13 22:43:54 -0800
commit	9466e2c7b6cc7924ae02017686c60209679fb3c4 (patch)
tree	45d55686601f25ca276536ef96d263823d212b8b /tensorflow/core/util/stat_summarizer.cc
parent	9004714bc4de1c78735bfe8178b441a371df1762 (diff)