diff options
-rw-r--r-- | tensorflow/core/util/stat_summarizer.cc | 102 | ||||
-rw-r--r-- | tensorflow/core/util/stat_summarizer.h | 125 | ||||
-rw-r--r-- | tensorflow/examples/android/jni/tensorflow_jni.cc | 45 |
3 files changed, 267 insertions, 5 deletions
diff --git a/tensorflow/core/util/stat_summarizer.cc b/tensorflow/core/util/stat_summarizer.cc new file mode 100644 index 0000000000..bd3ec11837 --- /dev/null +++ b/tensorflow/core/util/stat_summarizer.cc @@ -0,0 +1,102 @@ +/* Copyright 2016 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/util/stat_summarizer.h" + +#include <hash_map> +#include <iomanip> +#include <map> +#include <queue> +#include <sstream> +#include <string> + +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/step_stats.pb.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/public/session.h" + +using namespace tensorflow; + +void StatSummarizer::ProcessStepStats(const StepStats& step_stats) { + ++num_runs_; + int64 curr_total = 0; + for (const auto& ds : step_stats.dev_stats()) { + for (const auto& ns : ds.node_stats()) { + const string name = ns.node_name(); + const int64 curr_time = ns.all_end_rel_micros(); + curr_total += curr_time; + int64 accum_time = timing_totals_[name]; + timing_totals_[name] = accum_time + curr_time; + } + } + run_total_us_.UpdateStat(curr_total); + + timing_total_us_ += curr_total; +} + +void StatSummarizer::PrintHeaders() { + std::stringstream stream; + stream << std::setw(40) << "[Name]" + << "\t" << std::fixed << std::setprecision(2) << std::setw(7) << "[ms]" + << "\t" << std::fixed << std::setprecision(2) << std::setw(6) << "[%]"; + LOG(INFO) << stream.str(); +} + +void StatSummarizer::PrintColumns(const char* name, const double time_ms, + const double percentage) { + std::stringstream stream; + stream << std::setw(40) << name << "\t" << std::fixed << std::setprecision(2) + << std::setw(7) << time_ms << "\t" << std::fixed + << std::setprecision(2) << std::setw(6) << percentage; + LOG(INFO) << stream.str(); +} + +void StatSummarizer::PrintStepStats() { + const double avg_total_ms = + timing_total_us_ / static_cast<double>(num_runs_) / 1000.0; + + LOG(INFO) << "Total time (us): " << run_total_us_; + + std::priority_queue<std::pair<double, string> > timings; + LOG(INFO) << "========== Sorted by run order (ms) =========="; + PrintHeaders(); + for (auto entry : timing_totals_) { + const double avg_time_ms = + entry.second / static_cast<double>(num_runs_) / 1000.0; + + const double overall_percentage = 100.0 * avg_time_ms / avg_total_ms; + + PrintColumns(entry.first.c_str(), avg_time_ms, overall_percentage); + timings.push(std::pair<double, string>(avg_time_ms, entry.first)); + } + LOG(INFO); + + LOG(INFO) << "============ Top by duration ================="; + PrintHeaders(); + int num_printed = 0; + while (!timings.empty() && num_printed < 10) { + auto entry = timings.top(); + timings.pop(); + + const double overall_percentage = 100.0 * entry.first / avg_total_ms; + PrintColumns(entry.second.c_str(), entry.first, overall_percentage); + ++num_printed; + } + LOG(INFO); +} diff --git a/tensorflow/core/util/stat_summarizer.h b/tensorflow/core/util/stat_summarizer.h new file mode 100644 index 0000000000..fe04279bea --- /dev/null +++ b/tensorflow/core/util/stat_summarizer.h @@ -0,0 +1,125 @@ +/* Copyright 2016 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_UTIL_STAT_SUMMARIZER_H_ +#define TENSORFLOW_UTIL_STAT_SUMMARIZER_H_ + +#include <string.h> +#include <cmath> +#include <ios> +#include <limits> +#include <list> +#include <map> + +#include <stdlib.h> +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { + +class GraphDef; +class StepStats; + +static const int kNumValues = 1000; + +template <typename T> +class Stat { + public: + Stat<T>() { Reset(); } + + void Reset() { + values_.clear(); + min_value_ = std::numeric_limits<T>::max(); + max_value_ = std::numeric_limits<T>::min(); + avg_value_ = 0; + current_value_ = 0; + std_deviation_ = 0; + } + + std::list<T> values_; + T min_value_; + T max_value_; + double avg_value_; + T current_value_; + double std_deviation_; + + void UpdateStat(const T val) { + current_value_ = val; + values_.push_front(val); + while (values_.size() > kNumValues) { + values_.pop_back(); + } + + T total = 0; + for (const T curr_val : values_) { + min_value_ = std::min(min_value_, curr_val); + max_value_ = std::max(max_value_, curr_val); + total += curr_val; + } + avg_value_ = static_cast<double>(total) / values_.size(); + + double sqr_total = 0.0; + for (const T curr_val : values_) { + const double delta = avg_value_ - curr_val; + sqr_total += delta * delta; + } + std_deviation_ = std::sqrt(sqr_total / values_.size()); + } + + friend std::ostream& operator<<(std::ostream& stream, const Stat<T>& stat) { + stream << std::string("curr=") << stat.current_value_ + << std::string(" min=") << stat.min_value_ << std::string(" max=") + << stat.max_value_ << std::string(" avg=") + << static_cast<int64>(stat.avg_value_) << std::string(" stddev=") + << static_cast<int64>(stat.std_deviation_); + return stream; + } +}; + +// A class intended to make performance analysis easier by collecting StepStats +// and showing in an easily understandable format where CPU time is being spent. +// See tensorflow/examples/android/jni/tensorflow_jni.cc for an example usage. +class StatSummarizer { + public: + // Adds another run's StepStats output to the aggregate counts. + void ProcessStepStats(const StepStats& step_stats); + + // Prints all the accumulated runtime stats in a tab-separated format which + // can be pasted into a spreadsheet for further analysis. + void PrintStepStats(); + + void Reset() { + num_runs_ = 0; + + run_total_us_.Reset(); + + timing_total_us_ = 0; + timing_totals_.clear(); + } + + private: + void PrintHeaders(); + void PrintColumns(const char* name, const double time_ms, + const double percentage); + + Stat<int64> run_total_us_; + + int num_runs_ = 0; + int64 timing_total_us_ = 0; + std::map<string, int64> timing_totals_; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_UTIL_STAT_SUMMARIZER_H_ diff --git a/tensorflow/examples/android/jni/tensorflow_jni.cc b/tensorflow/examples/android/jni/tensorflow_jni.cc index ae0634f4fd..a6f6174664 100644 --- a/tensorflow/examples/android/jni/tensorflow_jni.cc +++ b/tensorflow/examples/android/jni/tensorflow_jni.cc @@ -36,6 +36,7 @@ limitations under the License. #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/public/session.h" +#include "tensorflow/core/util/stat_summarizer.h" #include "tensorflow/examples/android/jni/jni_utils.h" using namespace tensorflow; @@ -49,10 +50,19 @@ static bool g_compute_graph_initialized = false; static int g_tensorflow_input_size; // The image size for the mognet input. static int g_image_mean; // The image mean. +static StatSummarizer g_stats; // For basic benchmarking. static int g_num_runs = 0; static int64 g_timing_total_us = 0; +static Stat<int64> g_frequency_start; +static Stat<int64> g_frequency_end; + +#ifdef LOG_DETAILED_STATS +static const bool kLogDetailedStats = true; +#else +static const bool kLogDetailedStats = false; +#endif // Improve benchmarking by limiting runs to predefined amount. // 0 (default) denotes infinite runs. @@ -79,6 +89,9 @@ TENSORFLOW_METHOD(initializeTensorflow)( jint num_classes, jint mognet_input_size, jint image_mean) { g_num_runs = 0; g_timing_total_us = 0; + g_stats.Reset(); + g_frequency_start.Reset(); + g_frequency_end.Reset(); //MutexLock input_lock(&g_compute_graph_mutex); if (g_compute_graph_initialized) { @@ -179,6 +192,17 @@ static void GetTopN( std::reverse(top_results->begin(), top_results->end()); } +static int64 GetCpuSpeed() { + string scaling_contents; + ReadFileToString(nullptr, + "/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq", + &scaling_contents); + std::stringstream ss(scaling_contents); + int64 result; + ss >> result; + return result; +} + static std::string ClassifyImage(const RGBA* const bitmap_src, const int in_stride, const int width, const int height) { @@ -227,22 +251,33 @@ static std::string ClassifyImage(const RGBA* const bitmap_src, tensorflow::Status s; int64 start_time, end_time; - if (kSaveStepStats) { + if (kLogDetailedStats || kSaveStepStats) { RunOptions run_options; run_options.set_trace_level(RunOptions::FULL_TRACE); RunMetadata run_metadata; + g_frequency_start.UpdateStat(GetCpuSpeed()); start_time = CurrentThreadTimeUs(); s = session->Run(run_options, input_tensors, output_names, {}, &output_tensors, &run_metadata); end_time = CurrentThreadTimeUs(); + g_frequency_end.UpdateStat(GetCpuSpeed()); assert(run_metadata.has_step_stats()); const StepStats& stats = run_metadata.step_stats(); - mkdir("/sdcard/tf/", 0755); - const string filename = - strings::Printf("/sdcard/tf/stepstats%05d.pb", g_num_runs); - WriteProtoToFile(filename.c_str(), stats); + if (kLogDetailedStats) { + LOG(INFO) << "CPU frequency start: " << g_frequency_start; + LOG(INFO) << "CPU frequency end: " << g_frequency_end; + g_stats.ProcessStepStats(stats); + g_stats.PrintStepStats(); + } + + if (kSaveStepStats) { + mkdir("/sdcard/tf/", 0755); + const string filename = + strings::Printf("/sdcard/tf/stepstats%05d.pb", g_num_runs); + WriteProtoToFile(filename.c_str(), stats); + } } else { start_time = CurrentThreadTimeUs(); s = session->Run(input_tensors, output_names, {}, &output_tensors); |