aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/util/stats_calculator.h
blob: e191737bb2c8eb85518e51b3a06884a7983a392e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#ifndef TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_
#define TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_

#include <stdlib.h>

#include <cmath>
#include <limits>
#include <map>
#include <sstream>
#include <string>
#include <vector>

#include "tensorflow/core/util/stat_summarizer_options.h"

namespace tensorflow {

template <typename ValueType, typename HighPrecisionValueType = double>
class Stat {
 public:
  void UpdateStat(ValueType v) {
    if (count_ == 0) {
      first_ = v;
    }

    newest_ = v;
    max_ = std::max(v, max_);
    min_ = std::min(v, min_);
    ++count_;
    sum_ += v;
    squared_sum_ += static_cast<HighPrecisionValueType>(v) * v;
  }

  void Reset() { new (this) Stat<ValueType, HighPrecisionValueType>(); }

  bool empty() const { return count_ == 0; }

  ValueType first() const { return first_; }

  ValueType newest() const { return newest_; }

  ValueType max() const { return max_; }

  ValueType min() const { return min_; }

  int64_t count() const { return count_; }

  ValueType sum() const { return sum_; }

  HighPrecisionValueType squared_sum() const { return squared_sum_; }

  bool all_same() const { return (count_ == 0 || min_ == max_); }

  HighPrecisionValueType avg() const {
    return empty() ? std::numeric_limits<ValueType>::quiet_NaN()
                   : static_cast<HighPrecisionValueType>(sum_) / count_;
  }

  ValueType std_deviation() const {
    return all_same() ? 0 : sqrt(squared_sum_ / count_ - avg() * avg());
  }

  void OutputToStream(std::ostream* stream) const {
    if (empty()) {
      *stream << "count=0";
    } else if (all_same()) {
      *stream << "count=" << count_ << " curr=" << newest_;
      if (count_ > 1) *stream << "(all same)";
    } else {
      *stream << "count=" << count_ << " first=" << first_
              << " curr=" << newest_ << " min=" << min_ << " max=" << max_
              << " avg=" << avg() << " std=" << std_deviation();
    }
  }

  friend std::ostream& operator<<(std::ostream& stream,
                                  const Stat<ValueType>& stat) {
    stat.OutputToStream(&stream);
    return stream;
  }

 private:
  ValueType first_ = 0;
  ValueType newest_ = 0;
  ValueType max_ = std::numeric_limits<ValueType>::min();
  ValueType min_ = std::numeric_limits<ValueType>::max();
  int64_t count_ = 0;
  ValueType sum_ = 0;
  HighPrecisionValueType squared_sum_ = 0;
};

// A StatsCalculator assists in performance analysis of Graph executions.
//
// It summarizes time spent executing (on GPU/CPU), memory used etc for
// graph execution.
//
// For example usage see StatsSummarizer.
class StatsCalculator {
 public:
  enum SortingMetric {
    BY_NAME,
    BY_RUN_ORDER,
    BY_TIME,
    BY_MEMORY,
    BY_TYPE,
  };

  explicit StatsCalculator(const StatSummarizerOptions& options);

  // Returns a string detailing the accumulated runtime stats in a tab-separated
  // format which can be pasted into a spreadsheet for further analysis.
  std::string GetOutputString() const;

  std::string GetShortSummary() const;

  void ComputeStatsByType(
      std::map<std::string, int64_t>* node_type_map_count,
      std::map<std::string, int64_t>* node_type_map_time,
      std::map<std::string, int64_t>* node_type_map_memory,
      std::map<std::string, int64_t>* node_type_map_times_called,
      int64_t* accumulated_us) const;

  std::string GetStatsByNodeType() const;

  std::string GetStatsByMetric(const std::string& title,
                               SortingMetric sorting_metric,
                               int num_stats) const;

  // Returns number of runs.
  int num_runs() const { return static_cast<int>(run_total_us_.count()); }

  // Returns stats of total microseconds spent by all nodes in each run.
  const Stat<int64_t>& run_total_us() const { return run_total_us_; }

  void UpdateRunTotalUs(int64_t run_total_us) {
    run_total_us_.UpdateStat(run_total_us);
  }

  void UpdateMemoryUsed(int64_t memory) { memory_.UpdateStat(memory); }

  struct Detail {
    std::string name;
    std::string type;
    int64_t run_order;
    Stat<int64_t> start_us;
    Stat<int64_t> rel_end_us;
    Stat<int64_t> mem_used;
    int64_t times_called;
  };

  const std::map<std::string, Detail>& GetDetails() const { return details_; }

  void AddNodeStats(const std::string& name, const std::string& type,
                    int64_t run_order, int64_t start_us, int64_t rel_end_us,
                    int64_t mem_used);

 private:
  void OrderNodesByMetric(SortingMetric sorting_metric,
                          std::vector<const Detail*>* details) const;

  std::string HeaderString(const std::string& title) const;
  std::string ColumnString(const Detail& detail,
                           const int64_t cumulative_stat_on_node,
                           const Stat<int64_t>& stat) const;

  Stat<int64_t> run_total_us_;
  Stat<int64_t> memory_;

  std::map<std::string, Detail> details_;
  StatSummarizerOptions options_;
};

}  // namespace tensorflow

#endif  // TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_