aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--tensorflow/core/util/stat_summarizer.cc102
-rw-r--r--tensorflow/core/util/stat_summarizer.h125
-rw-r--r--tensorflow/examples/android/jni/tensorflow_jni.cc45
3 files changed, 267 insertions, 5 deletions
diff --git a/tensorflow/core/util/stat_summarizer.cc b/tensorflow/core/util/stat_summarizer.cc
new file mode 100644
index 0000000000..bd3ec11837
--- /dev/null
+++ b/tensorflow/core/util/stat_summarizer.cc
@@ -0,0 +1,102 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/util/stat_summarizer.h"
+
+#include <hash_map>
+#include <iomanip>
+#include <map>
+#include <queue>
+#include <sstream>
+#include <string>
+
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/framework/step_stats.pb.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/public/session.h"
+
+using namespace tensorflow;
+
+void StatSummarizer::ProcessStepStats(const StepStats& step_stats) {
+ ++num_runs_;
+ int64 curr_total = 0;
+ for (const auto& ds : step_stats.dev_stats()) {
+ for (const auto& ns : ds.node_stats()) {
+ const string name = ns.node_name();
+ const int64 curr_time = ns.all_end_rel_micros();
+ curr_total += curr_time;
+ int64 accum_time = timing_totals_[name];
+ timing_totals_[name] = accum_time + curr_time;
+ }
+ }
+ run_total_us_.UpdateStat(curr_total);
+
+ timing_total_us_ += curr_total;
+}
+
+void StatSummarizer::PrintHeaders() {
+ std::stringstream stream;
+ stream << std::setw(40) << "[Name]"
+ << "\t" << std::fixed << std::setprecision(2) << std::setw(7) << "[ms]"
+ << "\t" << std::fixed << std::setprecision(2) << std::setw(6) << "[%]";
+ LOG(INFO) << stream.str();
+}
+
+void StatSummarizer::PrintColumns(const char* name, const double time_ms,
+ const double percentage) {
+ std::stringstream stream;
+ stream << std::setw(40) << name << "\t" << std::fixed << std::setprecision(2)
+ << std::setw(7) << time_ms << "\t" << std::fixed
+ << std::setprecision(2) << std::setw(6) << percentage;
+ LOG(INFO) << stream.str();
+}
+
+void StatSummarizer::PrintStepStats() {
+ const double avg_total_ms =
+ timing_total_us_ / static_cast<double>(num_runs_) / 1000.0;
+
+ LOG(INFO) << "Total time (us): " << run_total_us_;
+
+ std::priority_queue<std::pair<double, string> > timings;
+ LOG(INFO) << "========== Sorted by run order (ms) ==========";
+ PrintHeaders();
+ for (auto entry : timing_totals_) {
+ const double avg_time_ms =
+ entry.second / static_cast<double>(num_runs_) / 1000.0;
+
+ const double overall_percentage = 100.0 * avg_time_ms / avg_total_ms;
+
+ PrintColumns(entry.first.c_str(), avg_time_ms, overall_percentage);
+ timings.push(std::pair<double, string>(avg_time_ms, entry.first));
+ }
+ LOG(INFO);
+
+ LOG(INFO) << "============ Top by duration =================";
+ PrintHeaders();
+ int num_printed = 0;
+ while (!timings.empty() && num_printed < 10) {
+ auto entry = timings.top();
+ timings.pop();
+
+ const double overall_percentage = 100.0 * entry.first / avg_total_ms;
+ PrintColumns(entry.second.c_str(), entry.first, overall_percentage);
+ ++num_printed;
+ }
+ LOG(INFO);
+}
diff --git a/tensorflow/core/util/stat_summarizer.h b/tensorflow/core/util/stat_summarizer.h
new file mode 100644
index 0000000000..fe04279bea
--- /dev/null
+++ b/tensorflow/core/util/stat_summarizer.h
@@ -0,0 +1,125 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_UTIL_STAT_SUMMARIZER_H_
+#define TENSORFLOW_UTIL_STAT_SUMMARIZER_H_
+
+#include <string.h>
+#include <cmath>
+#include <ios>
+#include <limits>
+#include <list>
+#include <map>
+
+#include <stdlib.h>
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+
+class GraphDef;
+class StepStats;
+
+static const int kNumValues = 1000;
+
+template <typename T>
+class Stat {
+ public:
+ Stat<T>() { Reset(); }
+
+ void Reset() {
+ values_.clear();
+ min_value_ = std::numeric_limits<T>::max();
+ max_value_ = std::numeric_limits<T>::min();
+ avg_value_ = 0;
+ current_value_ = 0;
+ std_deviation_ = 0;
+ }
+
+ std::list<T> values_;
+ T min_value_;
+ T max_value_;
+ double avg_value_;
+ T current_value_;
+ double std_deviation_;
+
+ void UpdateStat(const T val) {
+ current_value_ = val;
+ values_.push_front(val);
+ while (values_.size() > kNumValues) {
+ values_.pop_back();
+ }
+
+ T total = 0;
+ for (const T curr_val : values_) {
+ min_value_ = std::min(min_value_, curr_val);
+ max_value_ = std::max(max_value_, curr_val);
+ total += curr_val;
+ }
+ avg_value_ = static_cast<double>(total) / values_.size();
+
+ double sqr_total = 0.0;
+ for (const T curr_val : values_) {
+ const double delta = avg_value_ - curr_val;
+ sqr_total += delta * delta;
+ }
+ std_deviation_ = std::sqrt(sqr_total / values_.size());
+ }
+
+ friend std::ostream& operator<<(std::ostream& stream, const Stat<T>& stat) {
+ stream << std::string("curr=") << stat.current_value_
+ << std::string(" min=") << stat.min_value_ << std::string(" max=")
+ << stat.max_value_ << std::string(" avg=")
+ << static_cast<int64>(stat.avg_value_) << std::string(" stddev=")
+ << static_cast<int64>(stat.std_deviation_);
+ return stream;
+ }
+};
+
+// A class intended to make performance analysis easier by collecting StepStats
+// and showing in an easily understandable format where CPU time is being spent.
+// See tensorflow/examples/android/jni/tensorflow_jni.cc for an example usage.
+class StatSummarizer {
+ public:
+ // Adds another run's StepStats output to the aggregate counts.
+ void ProcessStepStats(const StepStats& step_stats);
+
+ // Prints all the accumulated runtime stats in a tab-separated format which
+ // can be pasted into a spreadsheet for further analysis.
+ void PrintStepStats();
+
+ void Reset() {
+ num_runs_ = 0;
+
+ run_total_us_.Reset();
+
+ timing_total_us_ = 0;
+ timing_totals_.clear();
+ }
+
+ private:
+ void PrintHeaders();
+ void PrintColumns(const char* name, const double time_ms,
+ const double percentage);
+
+ Stat<int64> run_total_us_;
+
+ int num_runs_ = 0;
+ int64 timing_total_us_ = 0;
+ std::map<string, int64> timing_totals_;
+};
+
+} // namespace tensorflow
+
+#endif // TENSORFLOW_UTIL_STAT_SUMMARIZER_H_
diff --git a/tensorflow/examples/android/jni/tensorflow_jni.cc b/tensorflow/examples/android/jni/tensorflow_jni.cc
index ae0634f4fd..a6f6174664 100644
--- a/tensorflow/examples/android/jni/tensorflow_jni.cc
+++ b/tensorflow/examples/android/jni/tensorflow_jni.cc
@@ -36,6 +36,7 @@ limitations under the License.
#include "tensorflow/core/platform/mutex.h"
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/public/session.h"
+#include "tensorflow/core/util/stat_summarizer.h"
#include "tensorflow/examples/android/jni/jni_utils.h"
using namespace tensorflow;
@@ -49,10 +50,19 @@ static bool g_compute_graph_initialized = false;
static int g_tensorflow_input_size; // The image size for the mognet input.
static int g_image_mean; // The image mean.
+static StatSummarizer g_stats;
// For basic benchmarking.
static int g_num_runs = 0;
static int64 g_timing_total_us = 0;
+static Stat<int64> g_frequency_start;
+static Stat<int64> g_frequency_end;
+
+#ifdef LOG_DETAILED_STATS
+static const bool kLogDetailedStats = true;
+#else
+static const bool kLogDetailedStats = false;
+#endif
// Improve benchmarking by limiting runs to predefined amount.
// 0 (default) denotes infinite runs.
@@ -79,6 +89,9 @@ TENSORFLOW_METHOD(initializeTensorflow)(
jint num_classes, jint mognet_input_size, jint image_mean) {
g_num_runs = 0;
g_timing_total_us = 0;
+ g_stats.Reset();
+ g_frequency_start.Reset();
+ g_frequency_end.Reset();
//MutexLock input_lock(&g_compute_graph_mutex);
if (g_compute_graph_initialized) {
@@ -179,6 +192,17 @@ static void GetTopN(
std::reverse(top_results->begin(), top_results->end());
}
+static int64 GetCpuSpeed() {
+ string scaling_contents;
+ ReadFileToString(nullptr,
+ "/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq",
+ &scaling_contents);
+ std::stringstream ss(scaling_contents);
+ int64 result;
+ ss >> result;
+ return result;
+}
+
static std::string ClassifyImage(const RGBA* const bitmap_src,
const int in_stride,
const int width, const int height) {
@@ -227,22 +251,33 @@ static std::string ClassifyImage(const RGBA* const bitmap_src,
tensorflow::Status s;
int64 start_time, end_time;
- if (kSaveStepStats) {
+ if (kLogDetailedStats || kSaveStepStats) {
RunOptions run_options;
run_options.set_trace_level(RunOptions::FULL_TRACE);
RunMetadata run_metadata;
+ g_frequency_start.UpdateStat(GetCpuSpeed());
start_time = CurrentThreadTimeUs();
s = session->Run(run_options, input_tensors, output_names, {},
&output_tensors, &run_metadata);
end_time = CurrentThreadTimeUs();
+ g_frequency_end.UpdateStat(GetCpuSpeed());
assert(run_metadata.has_step_stats());
const StepStats& stats = run_metadata.step_stats();
- mkdir("/sdcard/tf/", 0755);
- const string filename =
- strings::Printf("/sdcard/tf/stepstats%05d.pb", g_num_runs);
- WriteProtoToFile(filename.c_str(), stats);
+ if (kLogDetailedStats) {
+ LOG(INFO) << "CPU frequency start: " << g_frequency_start;
+ LOG(INFO) << "CPU frequency end: " << g_frequency_end;
+ g_stats.ProcessStepStats(stats);
+ g_stats.PrintStepStats();
+ }
+
+ if (kSaveStepStats) {
+ mkdir("/sdcard/tf/", 0755);
+ const string filename =
+ strings::Printf("/sdcard/tf/stepstats%05d.pb", g_num_runs);
+ WriteProtoToFile(filename.c_str(), stats);
+ }
} else {
start_time = CurrentThreadTimeUs();
s = session->Run(input_tensors, output_names, {}, &output_tensors);