aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Andrew Harp <andrew.harp@gmail.com>2016-04-12 18:44:27 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-04-12 19:51:42 -0700
commitdc62e16d428d52ec2ffd069bb066004681ddcb81 (patch)
tree3153c2c42cd9c9e9fb2cc04734d24563990f64d9
parent1b06f75201ccf92159ed52125363b3f26684cf8b (diff)
Standalone benchmark for Tensorflow models that runs on desktop and Android.
Change: 119706066
-rw-r--r--tensorflow/tools/benchmark/BUILD66
-rw-r--r--tensorflow/tools/benchmark/README.md57
-rw-r--r--tensorflow/tools/benchmark/benchmark_model.cc225
3 files changed, 348 insertions, 0 deletions
diff --git a/tensorflow/tools/benchmark/BUILD b/tensorflow/tools/benchmark/BUILD
new file mode 100644
index 0000000000..364197bebd
--- /dev/null
+++ b/tensorflow/tools/benchmark/BUILD
@@ -0,0 +1,66 @@
+# Description:
+# Benchmark utility that can run on desktop and Android.
+
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"]) # Apache 2.0
+
+load("//tensorflow:tensorflow.bzl", "tf_copts")
+
+exports_files(["LICENSE"])
+
+cc_library(
+ name = "benchmark_model_lib",
+ srcs = [
+ "benchmark_model.cc",
+ ],
+ copts = tf_copts(),
+ visibility = ["//visibility:public"],
+ deps = select({
+ "//tensorflow:android": [
+ "//tensorflow/core:android_tensorflow_lib",
+ ],
+ "//conditions:default": [
+ "//tensorflow/core:core_cpu",
+ "//tensorflow/core:lib",
+ "//tensorflow/core:framework",
+ "//tensorflow/core:framework_internal",
+ "//tensorflow/core:protos_all_cc",
+ "//tensorflow/core:tensorflow",
+ ],
+ }),
+)
+
+# This binary may be built for either desktop or Android.
+# A typical Android build command will look like the following:
+# bazel build -c opt tensorflow/core:android_tensorflow_lib \
+# --crosstool_top=//external:android/crosstool \
+# --cpu=armeabi-v7a \
+# --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
+#
+# NOTE: currently '-pthread' must be removed from the LINK_OPTS variable
+# in google/protobuf/BUILD to sucessfully build for Android. This is temporary
+# pending an update of the version of the protobuf library that Tensorflow
+# uses.
+cc_binary(
+ name = "benchmark_model",
+ copts = tf_copts(),
+ linkopts = select({
+ "//tensorflow:android": [
+ "-pie",
+ "-s",
+ "-landroid",
+ "-ljnigraphics",
+ "-llog",
+ "-lm",
+ "-z defs",
+ "-s",
+ "-Wl,--icf=all", # Identical Code Folding
+ "-Wl,--exclude-libs,ALL", # Exclude syms in all libs from auto export
+ ],
+ "//conditions:default": [],
+ }),
+ linkstatic = 1,
+ visibility = ["//visibility:public"],
+ deps = [":benchmark_model_lib"],
+)
diff --git a/tensorflow/tools/benchmark/README.md b/tensorflow/tools/benchmark/README.md
new file mode 100644
index 0000000000..bcfed4ff14
--- /dev/null
+++ b/tensorflow/tools/benchmark/README.md
@@ -0,0 +1,57 @@
+# Tensorflow Model Benchmark Tool
+
+## Description
+
+A simple C++ binary to benchmark a compute graph and its individual operators,
+both on desktop machines and on Android.
+
+## To build/install/run
+
+### On Android:
+
+(1) build for your specific platform, e.g.:
+```bash
+$bazel build -c opt \
+ --crosstool_top=//external:android/crosstool \
+ --cpu=armeabi-v7a \
+ --host_crosstool_top=@bazel_tools//tools/cpp:toolchain \
+ tensorflow/tools/benchmark:benchmark_model
+```
+
+(2) Connect your phone. Push the binary to your phone with adb push
+ (make the directory if required):
+```bash
+$adb push bazel-bin/tensorflow/tools/benchmark/benchmark_model /data/local/tmp
+```
+
+(3) Push the compute graph that you need to test. For example:
+ adb push tensorflow_inception_graph.pb /data/local/tmp
+
+(4) Run the benchmark. For example:
+```bash
+$adb shell "/data/local/tmp/benchmark_model \
+ --graph=/data/local/tmp/tensorflow_inception_graph.pb \
+ --input_layer="input:0" \
+ --input_layer_shape="1,224,224,3" \
+ --input_layer_type="float" \
+ --output_layer="output:0"
+```
+### On desktop:
+(1) build the binary
+```bash
+$bazel build -c opt tensorflow/tools/benchmark:benchmark_model
+```
+
+(2) Run on your compute graph, similar to the Android case but without the need of adb shell.
+For example:
+```bash
+$bazel-bin/tensorflow/tools/benchmark/benchmark_model \
+ --graph=tensorflow_inception_graph.pb \
+ --input_layer="input:0" \
+ --input_layer_shape="1,224,224,3" \
+ --input_layer_type="float" \
+ --output_layer="output:0"
+```
+
+The Inception graph used as an example here may be downloaded from
+https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip \ No newline at end of file
diff --git a/tensorflow/tools/benchmark/benchmark_model.cc b/tensorflow/tools/benchmark/benchmark_model.cc
new file mode 100644
index 0000000000..556f702fed
--- /dev/null
+++ b/tensorflow/tools/benchmark/benchmark_model.cc
@@ -0,0 +1,225 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// A C++ binary to benchmark a compute graph and its individual operators,
+// both on desktop machines and on Android.
+//
+// See README.md for usage instructions.
+
+#include <cstdlib>
+#include <memory>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/graph/algorithm.h"
+#include "tensorflow/core/graph/graph.h"
+#include "tensorflow/core/graph/graph_constructor.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/init_main.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/public/session.h"
+#include "tensorflow/core/util/command_line_flags.h"
+#include "tensorflow/core/util/stat_summarizer.h"
+
+namespace tensorflow {
+
+// Global variables that holds the Tensorflow classifier.
+static std::unique_ptr<tensorflow::Session> session;
+
+static StatSummarizer g_stats;
+
+struct Flags {
+ string graph = "/data/local/tmp/tensorflow_inception_graph.pb";
+ string input_layer = "input:0";
+ string input_layer_shape = "1,224,224,3";
+ string input_layer_type = "float";
+ string output_layer = "output:0";
+ int num_runs = 50;
+ string run_delay = "-1.0";
+ int num_threads = -1;
+};
+
+static Flags* flags; // Filled in by main()
+
+static bool InitializeBenchmark() {
+ g_stats.Reset();
+
+ LOG(INFO) << "Loading Tensorflow.";
+
+ tensorflow::SessionOptions options;
+ tensorflow::ConfigProto& config = options.config;
+ if (flags->num_threads > 0) {
+ config.set_intra_op_parallelism_threads(flags->num_threads);
+ }
+ LOG(INFO) << "Got config, " << config.device_count_size() << " devices";
+
+ session.reset(tensorflow::NewSession(options));
+ tensorflow::GraphDef tensorflow_graph;
+ Status s = ReadBinaryProto(Env::Default(), flags->graph, &tensorflow_graph);
+ if (!s.ok()) {
+ LOG(ERROR) << "Could not create Tensorflow Graph: " << s;
+ return false;
+ }
+
+ s = session->Create(tensorflow_graph);
+ if (!s.ok()) {
+ LOG(ERROR) << "Could not create Tensorflow Session: " << s;
+ return false;
+ }
+
+ // Clear the proto to save memory space.
+ tensorflow_graph.Clear();
+ return true;
+}
+
+static bool RunBenchmark() {
+ DataType input_data_type;
+ CHECK(DataTypeFromString(flags->input_layer_type, &input_data_type))
+ << flags->input_layer_type << " was an invalid type";
+
+ std::vector<int32> sizes;
+ CHECK(str_util::SplitAndParseAsInts(flags->input_layer_shape, ',', &sizes))
+ << "Incorrect size string specified: " << flags->input_layer_shape;
+ TensorShape input_shape;
+ for (int i = 0; i < sizes.size(); ++i) {
+ input_shape.AddDim(sizes[i]);
+ }
+
+ Tensor input_tensor(input_data_type, input_shape);
+
+ switch (input_data_type) {
+ case DT_INT32: {
+ auto int_tensor = input_tensor.flat<int32>();
+ int_tensor = int_tensor.constant(0.0);
+ break;
+ }
+ case DT_FLOAT: {
+ auto float_tensor = input_tensor.flat<float>();
+ float_tensor = float_tensor.constant(0.0);
+ break;
+ }
+ case DT_QUINT8: {
+ auto int_tensor = input_tensor.flat<quint8>();
+ int_tensor = int_tensor.constant(0.0);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unsupported input type: " << flags->input_layer_type;
+ }
+
+ std::vector<std::pair<string, tensorflow::Tensor> > input_tensors(
+ {{flags->input_layer, input_tensor}});
+
+ std::vector<tensorflow::Tensor> output_tensors;
+ std::vector<string> output_names({flags->output_layer});
+
+ tensorflow::Status s;
+
+ RunOptions run_options;
+ run_options.set_trace_level(RunOptions::FULL_TRACE);
+ RunMetadata run_metadata;
+
+ s = session->Run(run_options, input_tensors, output_names, {},
+ &output_tensors, &run_metadata);
+
+ assert(run_metadata.has_step_stats());
+
+ const StepStats& stats = run_metadata.step_stats();
+
+ g_stats.ProcessStepStats(stats);
+
+ if (!s.ok()) {
+ LOG(ERROR) << "Error during inference: " << s;
+ return false;
+ }
+ return true;
+}
+
+} // namespace tensorflow
+
+int main(int argc, char** argv) {
+ tensorflow::flags = new tensorflow::Flags();
+
+ const bool parse_result = tensorflow::ParseFlags(
+ &argc, argv,
+ {
+ tensorflow::Flag("graph", &tensorflow::flags->graph),
+ tensorflow::Flag("input_layer", &tensorflow::flags->input_layer),
+ tensorflow::Flag("input_layer_shape",
+ &tensorflow::flags->input_layer_shape),
+ tensorflow::Flag("input_layer_type",
+ &tensorflow::flags->input_layer_type),
+ tensorflow::Flag("output_layer", &tensorflow::flags->output_layer),
+ tensorflow::Flag("num_runs", &tensorflow::flags->num_runs),
+ tensorflow::Flag("run_delay", &tensorflow::flags->run_delay),
+ tensorflow::Flag("num_threads", &tensorflow::flags->num_threads),
+ });
+
+ if (!parse_result) {
+ LOG(ERROR) << "Error parsing command-line flags.";
+ return -1;
+ }
+
+ ::tensorflow::port::InitMain(argv[0], &argc, &argv);
+ if (argc > 1) {
+ LOG(ERROR) << "Unknown argument " << argv[1];
+ return -1;
+ }
+
+ LOG(INFO) << "Graph: [" << tensorflow::flags->graph << "]";
+ LOG(INFO) << "Input layer: [" << tensorflow::flags->input_layer << "]";
+ LOG(INFO) << "Input shape: [" << tensorflow::flags->input_layer_shape << "]";
+ LOG(INFO) << "Input type: [" << tensorflow::flags->input_layer_type << "]";
+ LOG(INFO) << "Output layer: [" << tensorflow::flags->output_layer << "]";
+ LOG(INFO) << "Num runs: [" << tensorflow::flags->num_runs << "]";
+ LOG(INFO) << "Inter-run delay (seconds): [" << tensorflow::flags->run_delay
+ << "]";
+ LOG(INFO) << "Num threads: [" << tensorflow::flags->num_threads << "]";
+
+ if (!tensorflow::InitializeBenchmark()) {
+ return -1;
+ }
+
+ // Convert the run_delay string into a timespec.
+ const double sleep_seconds =
+ std::strtod(tensorflow::flags->run_delay.c_str(), nullptr);
+ timespec req;
+ req.tv_sec = static_cast<time_t>(sleep_seconds);
+ req.tv_nsec = (sleep_seconds - req.tv_sec) * 1000000000;
+
+ LOG(INFO) << "Running benchmark";
+ for (int i = 0; i < tensorflow::flags->num_runs; ++i) {
+ if (!tensorflow::RunBenchmark()) {
+ LOG(INFO) << "Failed on run " << i;
+ return -1;
+ }
+
+ // If requested, sleep between runs for an arbitrary amount of time.
+ // This can be helpful to determine the effect of mobile processor
+ // scaling and thermal throttling.
+ if (sleep_seconds > 0.0) {
+ nanosleep(&req, nullptr);
+ }
+ }
+
+ tensorflow::g_stats.PrintStepStats();
+ return 0;
+}