From dc62e16d428d52ec2ffd069bb066004681ddcb81 Mon Sep 17 00:00:00 2001 From: Andrew Harp Date: Tue, 12 Apr 2016 18:44:27 -0800 Subject: Standalone benchmark for Tensorflow models that runs on desktop and Android. Change: 119706066 --- tensorflow/tools/benchmark/BUILD | 66 ++++++++ tensorflow/tools/benchmark/README.md | 57 +++++++ tensorflow/tools/benchmark/benchmark_model.cc | 225 ++++++++++++++++++++++++++ 3 files changed, 348 insertions(+) create mode 100644 tensorflow/tools/benchmark/BUILD create mode 100644 tensorflow/tools/benchmark/README.md create mode 100644 tensorflow/tools/benchmark/benchmark_model.cc diff --git a/tensorflow/tools/benchmark/BUILD b/tensorflow/tools/benchmark/BUILD new file mode 100644 index 0000000000..364197bebd --- /dev/null +++ b/tensorflow/tools/benchmark/BUILD @@ -0,0 +1,66 @@ +# Description: +# Benchmark utility that can run on desktop and Android. + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +load("//tensorflow:tensorflow.bzl", "tf_copts") + +exports_files(["LICENSE"]) + +cc_library( + name = "benchmark_model_lib", + srcs = [ + "benchmark_model.cc", + ], + copts = tf_copts(), + visibility = ["//visibility:public"], + deps = select({ + "//tensorflow:android": [ + "//tensorflow/core:android_tensorflow_lib", + ], + "//conditions:default": [ + "//tensorflow/core:core_cpu", + "//tensorflow/core:lib", + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:tensorflow", + ], + }), +) + +# This binary may be built for either desktop or Android. +# A typical Android build command will look like the following: +# bazel build -c opt tensorflow/core:android_tensorflow_lib \ +# --crosstool_top=//external:android/crosstool \ +# --cpu=armeabi-v7a \ +# --host_crosstool_top=@bazel_tools//tools/cpp:toolchain +# +# NOTE: currently '-pthread' must be removed from the LINK_OPTS variable +# in google/protobuf/BUILD to sucessfully build for Android. This is temporary +# pending an update of the version of the protobuf library that Tensorflow +# uses. +cc_binary( + name = "benchmark_model", + copts = tf_copts(), + linkopts = select({ + "//tensorflow:android": [ + "-pie", + "-s", + "-landroid", + "-ljnigraphics", + "-llog", + "-lm", + "-z defs", + "-s", + "-Wl,--icf=all", # Identical Code Folding + "-Wl,--exclude-libs,ALL", # Exclude syms in all libs from auto export + ], + "//conditions:default": [], + }), + linkstatic = 1, + visibility = ["//visibility:public"], + deps = [":benchmark_model_lib"], +) diff --git a/tensorflow/tools/benchmark/README.md b/tensorflow/tools/benchmark/README.md new file mode 100644 index 0000000000..bcfed4ff14 --- /dev/null +++ b/tensorflow/tools/benchmark/README.md @@ -0,0 +1,57 @@ +# Tensorflow Model Benchmark Tool + +## Description + +A simple C++ binary to benchmark a compute graph and its individual operators, +both on desktop machines and on Android. + +## To build/install/run + +### On Android: + +(1) build for your specific platform, e.g.: +```bash +$bazel build -c opt \ + --crosstool_top=//external:android/crosstool \ + --cpu=armeabi-v7a \ + --host_crosstool_top=@bazel_tools//tools/cpp:toolchain \ + tensorflow/tools/benchmark:benchmark_model +``` + +(2) Connect your phone. Push the binary to your phone with adb push + (make the directory if required): +```bash +$adb push bazel-bin/tensorflow/tools/benchmark/benchmark_model /data/local/tmp +``` + +(3) Push the compute graph that you need to test. For example: + adb push tensorflow_inception_graph.pb /data/local/tmp + +(4) Run the benchmark. For example: +```bash +$adb shell "/data/local/tmp/benchmark_model \ + --graph=/data/local/tmp/tensorflow_inception_graph.pb \ + --input_layer="input:0" \ + --input_layer_shape="1,224,224,3" \ + --input_layer_type="float" \ + --output_layer="output:0" +``` +### On desktop: +(1) build the binary +```bash +$bazel build -c opt tensorflow/tools/benchmark:benchmark_model +``` + +(2) Run on your compute graph, similar to the Android case but without the need of adb shell. +For example: +```bash +$bazel-bin/tensorflow/tools/benchmark/benchmark_model \ + --graph=tensorflow_inception_graph.pb \ + --input_layer="input:0" \ + --input_layer_shape="1,224,224,3" \ + --input_layer_type="float" \ + --output_layer="output:0" +``` + +The Inception graph used as an example here may be downloaded from +https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip \ No newline at end of file diff --git a/tensorflow/tools/benchmark/benchmark_model.cc b/tensorflow/tools/benchmark/benchmark_model.cc new file mode 100644 index 0000000000..556f702fed --- /dev/null +++ b/tensorflow/tools/benchmark/benchmark_model.cc @@ -0,0 +1,225 @@ +/* Copyright 2016 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// A C++ binary to benchmark a compute graph and its individual operators, +// both on desktop machines and on Android. +// +// See README.md for usage instructions. + +#include +#include +#include +#include +#include + +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/graph/algorithm.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/graph/graph_constructor.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/init_main.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/public/session.h" +#include "tensorflow/core/util/command_line_flags.h" +#include "tensorflow/core/util/stat_summarizer.h" + +namespace tensorflow { + +// Global variables that holds the Tensorflow classifier. +static std::unique_ptr session; + +static StatSummarizer g_stats; + +struct Flags { + string graph = "/data/local/tmp/tensorflow_inception_graph.pb"; + string input_layer = "input:0"; + string input_layer_shape = "1,224,224,3"; + string input_layer_type = "float"; + string output_layer = "output:0"; + int num_runs = 50; + string run_delay = "-1.0"; + int num_threads = -1; +}; + +static Flags* flags; // Filled in by main() + +static bool InitializeBenchmark() { + g_stats.Reset(); + + LOG(INFO) << "Loading Tensorflow."; + + tensorflow::SessionOptions options; + tensorflow::ConfigProto& config = options.config; + if (flags->num_threads > 0) { + config.set_intra_op_parallelism_threads(flags->num_threads); + } + LOG(INFO) << "Got config, " << config.device_count_size() << " devices"; + + session.reset(tensorflow::NewSession(options)); + tensorflow::GraphDef tensorflow_graph; + Status s = ReadBinaryProto(Env::Default(), flags->graph, &tensorflow_graph); + if (!s.ok()) { + LOG(ERROR) << "Could not create Tensorflow Graph: " << s; + return false; + } + + s = session->Create(tensorflow_graph); + if (!s.ok()) { + LOG(ERROR) << "Could not create Tensorflow Session: " << s; + return false; + } + + // Clear the proto to save memory space. + tensorflow_graph.Clear(); + return true; +} + +static bool RunBenchmark() { + DataType input_data_type; + CHECK(DataTypeFromString(flags->input_layer_type, &input_data_type)) + << flags->input_layer_type << " was an invalid type"; + + std::vector sizes; + CHECK(str_util::SplitAndParseAsInts(flags->input_layer_shape, ',', &sizes)) + << "Incorrect size string specified: " << flags->input_layer_shape; + TensorShape input_shape; + for (int i = 0; i < sizes.size(); ++i) { + input_shape.AddDim(sizes[i]); + } + + Tensor input_tensor(input_data_type, input_shape); + + switch (input_data_type) { + case DT_INT32: { + auto int_tensor = input_tensor.flat(); + int_tensor = int_tensor.constant(0.0); + break; + } + case DT_FLOAT: { + auto float_tensor = input_tensor.flat(); + float_tensor = float_tensor.constant(0.0); + break; + } + case DT_QUINT8: { + auto int_tensor = input_tensor.flat(); + int_tensor = int_tensor.constant(0.0); + break; + } + default: + LOG(FATAL) << "Unsupported input type: " << flags->input_layer_type; + } + + std::vector > input_tensors( + {{flags->input_layer, input_tensor}}); + + std::vector output_tensors; + std::vector output_names({flags->output_layer}); + + tensorflow::Status s; + + RunOptions run_options; + run_options.set_trace_level(RunOptions::FULL_TRACE); + RunMetadata run_metadata; + + s = session->Run(run_options, input_tensors, output_names, {}, + &output_tensors, &run_metadata); + + assert(run_metadata.has_step_stats()); + + const StepStats& stats = run_metadata.step_stats(); + + g_stats.ProcessStepStats(stats); + + if (!s.ok()) { + LOG(ERROR) << "Error during inference: " << s; + return false; + } + return true; +} + +} // namespace tensorflow + +int main(int argc, char** argv) { + tensorflow::flags = new tensorflow::Flags(); + + const bool parse_result = tensorflow::ParseFlags( + &argc, argv, + { + tensorflow::Flag("graph", &tensorflow::flags->graph), + tensorflow::Flag("input_layer", &tensorflow::flags->input_layer), + tensorflow::Flag("input_layer_shape", + &tensorflow::flags->input_layer_shape), + tensorflow::Flag("input_layer_type", + &tensorflow::flags->input_layer_type), + tensorflow::Flag("output_layer", &tensorflow::flags->output_layer), + tensorflow::Flag("num_runs", &tensorflow::flags->num_runs), + tensorflow::Flag("run_delay", &tensorflow::flags->run_delay), + tensorflow::Flag("num_threads", &tensorflow::flags->num_threads), + }); + + if (!parse_result) { + LOG(ERROR) << "Error parsing command-line flags."; + return -1; + } + + ::tensorflow::port::InitMain(argv[0], &argc, &argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1]; + return -1; + } + + LOG(INFO) << "Graph: [" << tensorflow::flags->graph << "]"; + LOG(INFO) << "Input layer: [" << tensorflow::flags->input_layer << "]"; + LOG(INFO) << "Input shape: [" << tensorflow::flags->input_layer_shape << "]"; + LOG(INFO) << "Input type: [" << tensorflow::flags->input_layer_type << "]"; + LOG(INFO) << "Output layer: [" << tensorflow::flags->output_layer << "]"; + LOG(INFO) << "Num runs: [" << tensorflow::flags->num_runs << "]"; + LOG(INFO) << "Inter-run delay (seconds): [" << tensorflow::flags->run_delay + << "]"; + LOG(INFO) << "Num threads: [" << tensorflow::flags->num_threads << "]"; + + if (!tensorflow::InitializeBenchmark()) { + return -1; + } + + // Convert the run_delay string into a timespec. + const double sleep_seconds = + std::strtod(tensorflow::flags->run_delay.c_str(), nullptr); + timespec req; + req.tv_sec = static_cast(sleep_seconds); + req.tv_nsec = (sleep_seconds - req.tv_sec) * 1000000000; + + LOG(INFO) << "Running benchmark"; + for (int i = 0; i < tensorflow::flags->num_runs; ++i) { + if (!tensorflow::RunBenchmark()) { + LOG(INFO) << "Failed on run " << i; + return -1; + } + + // If requested, sleep between runs for an arbitrary amount of time. + // This can be helpful to determine the effect of mobile processor + // scaling and thermal throttling. + if (sleep_seconds > 0.0) { + nanosleep(&req, nullptr); + } + } + + tensorflow::g_stats.PrintStepStats(); + return 0; +} -- cgit v1.2.3