From dc62e16d428d52ec2ffd069bb066004681ddcb81 Mon Sep 17 00:00:00 2001
From: Andrew Harp <andrew.harp@gmail.com>
Date: Tue, 12 Apr 2016 18:44:27 -0800
Subject: Standalone benchmark for Tensorflow models that runs on desktop and
 Android. Change: 119706066

---
 tensorflow/tools/benchmark/BUILD              |  66 ++++++++
 tensorflow/tools/benchmark/README.md          |  57 +++++++
 tensorflow/tools/benchmark/benchmark_model.cc | 225 ++++++++++++++++++++++++++
 3 files changed, 348 insertions(+)
 create mode 100644 tensorflow/tools/benchmark/BUILD
 create mode 100644 tensorflow/tools/benchmark/README.md
 create mode 100644 tensorflow/tools/benchmark/benchmark_model.cc

diff --git a/tensorflow/tools/benchmark/BUILD b/tensorflow/tools/benchmark/BUILD
new file mode 100644
index 0000000000..364197bebd
--- /dev/null
+++ b/tensorflow/tools/benchmark/BUILD
@@ -0,0 +1,66 @@
+# Description:
+#   Benchmark utility that can run on desktop and Android.
+
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])  # Apache 2.0
+
+load("//tensorflow:tensorflow.bzl", "tf_copts")
+
+exports_files(["LICENSE"])
+
+cc_library(
+    name = "benchmark_model_lib",
+    srcs = [
+        "benchmark_model.cc",
+    ],
+    copts = tf_copts(),
+    visibility = ["//visibility:public"],
+    deps = select({
+        "//tensorflow:android": [
+            "//tensorflow/core:android_tensorflow_lib",
+        ],
+        "//conditions:default": [
+            "//tensorflow/core:core_cpu",
+            "//tensorflow/core:lib",
+            "//tensorflow/core:framework",
+            "//tensorflow/core:framework_internal",
+            "//tensorflow/core:protos_all_cc",
+            "//tensorflow/core:tensorflow",
+        ],
+    }),
+)
+
+# This binary may be built for either desktop or Android.
+# A typical Android build command will look like the following:
+# bazel build -c opt tensorflow/core:android_tensorflow_lib \
+# --crosstool_top=//external:android/crosstool \
+# --cpu=armeabi-v7a \
+# --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
+#
+# NOTE: currently '-pthread' must be removed from the LINK_OPTS variable
+# in google/protobuf/BUILD to sucessfully build for Android. This is temporary
+# pending an update of the version of the protobuf library that Tensorflow
+# uses.
+cc_binary(
+    name = "benchmark_model",
+    copts = tf_copts(),
+    linkopts = select({
+        "//tensorflow:android": [
+            "-pie",
+            "-s",
+            "-landroid",
+            "-ljnigraphics",
+            "-llog",
+            "-lm",
+            "-z defs",
+            "-s",
+            "-Wl,--icf=all",  # Identical Code Folding
+            "-Wl,--exclude-libs,ALL",  # Exclude syms in all libs from auto export
+        ],
+        "//conditions:default": [],
+    }),
+    linkstatic = 1,
+    visibility = ["//visibility:public"],
+    deps = [":benchmark_model_lib"],
+)
diff --git a/tensorflow/tools/benchmark/README.md b/tensorflow/tools/benchmark/README.md
new file mode 100644
index 0000000000..bcfed4ff14
--- /dev/null
+++ b/tensorflow/tools/benchmark/README.md
@@ -0,0 +1,57 @@
+# Tensorflow Model Benchmark Tool
+
+## Description
+
+A simple C++ binary to benchmark a compute graph and its individual operators,
+both on desktop machines and on Android.
+
+## To build/install/run
+
+### On Android:
+
+(1) build for your specific platform, e.g.:
+```bash
+$bazel build -c opt \
+  --crosstool_top=//external:android/crosstool \
+  --cpu=armeabi-v7a \
+  --host_crosstool_top=@bazel_tools//tools/cpp:toolchain \
+  tensorflow/tools/benchmark:benchmark_model
+```
+
+(2) Connect your phone. Push the binary to your phone with adb push
+     (make the directory if required):
+```bash
+$adb push bazel-bin/tensorflow/tools/benchmark/benchmark_model /data/local/tmp
+```
+
+(3) Push the compute graph that you need to test. For example:
+     adb push tensorflow_inception_graph.pb /data/local/tmp
+
+(4) Run the benchmark. For example:
+```bash
+$adb shell "/data/local/tmp/benchmark_model \
+  --graph=/data/local/tmp/tensorflow_inception_graph.pb \
+  --input_layer="input:0" \
+  --input_layer_shape="1,224,224,3" \
+  --input_layer_type="float" \
+  --output_layer="output:0"
+```
+### On desktop:
+(1) build the binary
+```bash
+$bazel build -c opt tensorflow/tools/benchmark:benchmark_model
+```
+
+(2) Run on your compute graph, similar to the Android case but without the need of adb shell.
+For example:
+```bash
+$bazel-bin/tensorflow/tools/benchmark/benchmark_model \
+  --graph=tensorflow_inception_graph.pb \
+  --input_layer="input:0" \
+  --input_layer_shape="1,224,224,3" \
+  --input_layer_type="float" \
+  --output_layer="output:0"
+```
+
+The Inception graph used as an example here may be downloaded from
+https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip
\ No newline at end of file
diff --git a/tensorflow/tools/benchmark/benchmark_model.cc b/tensorflow/tools/benchmark/benchmark_model.cc
new file mode 100644
index 0000000000..556f702fed
--- /dev/null
+++ b/tensorflow/tools/benchmark/benchmark_model.cc
@@ -0,0 +1,225 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// A C++ binary to benchmark a compute graph and its individual operators,
+// both on desktop machines and on Android.
+//
+// See README.md for usage instructions.
+
+#include <cstdlib>
+#include <memory>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/graph/algorithm.h"
+#include "tensorflow/core/graph/graph.h"
+#include "tensorflow/core/graph/graph_constructor.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/init_main.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/public/session.h"
+#include "tensorflow/core/util/command_line_flags.h"
+#include "tensorflow/core/util/stat_summarizer.h"
+
+namespace tensorflow {
+
+// Global variables that holds the Tensorflow classifier.
+static std::unique_ptr<tensorflow::Session> session;
+
+static StatSummarizer g_stats;
+
+struct Flags {
+  string graph = "/data/local/tmp/tensorflow_inception_graph.pb";
+  string input_layer = "input:0";
+  string input_layer_shape = "1,224,224,3";
+  string input_layer_type = "float";
+  string output_layer = "output:0";
+  int num_runs = 50;
+  string run_delay = "-1.0";
+  int num_threads = -1;
+};
+
+static Flags* flags;  // Filled in by main()
+
+static bool InitializeBenchmark() {
+  g_stats.Reset();
+
+  LOG(INFO) << "Loading Tensorflow.";
+
+  tensorflow::SessionOptions options;
+  tensorflow::ConfigProto& config = options.config;
+  if (flags->num_threads > 0) {
+    config.set_intra_op_parallelism_threads(flags->num_threads);
+  }
+  LOG(INFO) << "Got config, " << config.device_count_size() << " devices";
+
+  session.reset(tensorflow::NewSession(options));
+  tensorflow::GraphDef tensorflow_graph;
+  Status s = ReadBinaryProto(Env::Default(), flags->graph, &tensorflow_graph);
+  if (!s.ok()) {
+    LOG(ERROR) << "Could not create Tensorflow Graph: " << s;
+    return false;
+  }
+
+  s = session->Create(tensorflow_graph);
+  if (!s.ok()) {
+    LOG(ERROR) << "Could not create Tensorflow Session: " << s;
+    return false;
+  }
+
+  // Clear the proto to save memory space.
+  tensorflow_graph.Clear();
+  return true;
+}
+
+static bool RunBenchmark() {
+  DataType input_data_type;
+  CHECK(DataTypeFromString(flags->input_layer_type, &input_data_type))
+      << flags->input_layer_type << " was an invalid type";
+
+  std::vector<int32> sizes;
+  CHECK(str_util::SplitAndParseAsInts(flags->input_layer_shape, ',', &sizes))
+      << "Incorrect size string specified: " << flags->input_layer_shape;
+  TensorShape input_shape;
+  for (int i = 0; i < sizes.size(); ++i) {
+    input_shape.AddDim(sizes[i]);
+  }
+
+  Tensor input_tensor(input_data_type, input_shape);
+
+  switch (input_data_type) {
+    case DT_INT32: {
+      auto int_tensor = input_tensor.flat<int32>();
+      int_tensor = int_tensor.constant(0.0);
+      break;
+    }
+    case DT_FLOAT: {
+      auto float_tensor = input_tensor.flat<float>();
+      float_tensor = float_tensor.constant(0.0);
+      break;
+    }
+    case DT_QUINT8: {
+      auto int_tensor = input_tensor.flat<quint8>();
+      int_tensor = int_tensor.constant(0.0);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unsupported input type: " << flags->input_layer_type;
+  }
+
+  std::vector<std::pair<string, tensorflow::Tensor> > input_tensors(
+      {{flags->input_layer, input_tensor}});
+
+  std::vector<tensorflow::Tensor> output_tensors;
+  std::vector<string> output_names({flags->output_layer});
+
+  tensorflow::Status s;
+
+  RunOptions run_options;
+  run_options.set_trace_level(RunOptions::FULL_TRACE);
+  RunMetadata run_metadata;
+
+  s = session->Run(run_options, input_tensors, output_names, {},
+                   &output_tensors, &run_metadata);
+
+  assert(run_metadata.has_step_stats());
+
+  const StepStats& stats = run_metadata.step_stats();
+
+  g_stats.ProcessStepStats(stats);
+
+  if (!s.ok()) {
+    LOG(ERROR) << "Error during inference: " << s;
+    return false;
+  }
+  return true;
+}
+
+}  // namespace tensorflow
+
+int main(int argc, char** argv) {
+  tensorflow::flags = new tensorflow::Flags();
+
+  const bool parse_result = tensorflow::ParseFlags(
+      &argc, argv,
+      {
+          tensorflow::Flag("graph", &tensorflow::flags->graph),
+          tensorflow::Flag("input_layer", &tensorflow::flags->input_layer),
+          tensorflow::Flag("input_layer_shape",
+                           &tensorflow::flags->input_layer_shape),
+          tensorflow::Flag("input_layer_type",
+                           &tensorflow::flags->input_layer_type),
+          tensorflow::Flag("output_layer", &tensorflow::flags->output_layer),
+          tensorflow::Flag("num_runs", &tensorflow::flags->num_runs),
+          tensorflow::Flag("run_delay", &tensorflow::flags->run_delay),
+          tensorflow::Flag("num_threads", &tensorflow::flags->num_threads),
+      });
+
+  if (!parse_result) {
+    LOG(ERROR) << "Error parsing command-line flags.";
+    return -1;
+  }
+
+  ::tensorflow::port::InitMain(argv[0], &argc, &argv);
+  if (argc > 1) {
+    LOG(ERROR) << "Unknown argument " << argv[1];
+    return -1;
+  }
+
+  LOG(INFO) << "Graph: [" << tensorflow::flags->graph << "]";
+  LOG(INFO) << "Input layer: [" << tensorflow::flags->input_layer << "]";
+  LOG(INFO) << "Input shape: [" << tensorflow::flags->input_layer_shape << "]";
+  LOG(INFO) << "Input type: [" << tensorflow::flags->input_layer_type << "]";
+  LOG(INFO) << "Output layer: [" << tensorflow::flags->output_layer << "]";
+  LOG(INFO) << "Num runs: [" << tensorflow::flags->num_runs << "]";
+  LOG(INFO) << "Inter-run delay (seconds): [" << tensorflow::flags->run_delay
+            << "]";
+  LOG(INFO) << "Num threads: [" << tensorflow::flags->num_threads << "]";
+
+  if (!tensorflow::InitializeBenchmark()) {
+    return -1;
+  }
+
+  // Convert the run_delay string into a timespec.
+  const double sleep_seconds =
+      std::strtod(tensorflow::flags->run_delay.c_str(), nullptr);
+  timespec req;
+  req.tv_sec = static_cast<time_t>(sleep_seconds);
+  req.tv_nsec = (sleep_seconds - req.tv_sec) * 1000000000;
+
+  LOG(INFO) << "Running benchmark";
+  for (int i = 0; i < tensorflow::flags->num_runs; ++i) {
+    if (!tensorflow::RunBenchmark()) {
+      LOG(INFO) << "Failed on run " << i;
+      return -1;
+    }
+
+    // If requested, sleep between runs for an arbitrary amount of time.
+    // This can be helpful to determine the effect of mobile processor
+    // scaling and thermal throttling.
+    if (sleep_seconds > 0.0) {
+      nanosleep(&req, nullptr);
+    }
+  }
+
+  tensorflow::g_stats.PrintStepStats();
+  return 0;
+}
-- 
cgit v1.2.3