diff options
author | Andrew Harp <andrewharp@google.com> | 2017-01-25 16:01:16 -0800 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2017-01-25 17:09:37 -0800 |
commit | d7478ece1254e5a8f2382064213364f53ace1413 (patch) | |
tree | a5a5ec435991345e2b7d22049442c51bb3824045 /tensorflow/examples/multibox_detector | |
parent | 554e5f29878b2c89b4ff5357784027aa74dec72e (diff) |
Adding MultiBox person detector standalone example.
Change: 145608840
Diffstat (limited to 'tensorflow/examples/multibox_detector')
-rw-r--r-- | tensorflow/examples/multibox_detector/BUILD | 41 | ||||
-rw-r--r-- | tensorflow/examples/multibox_detector/README.md | 71 | ||||
-rwxr-xr-x | tensorflow/examples/multibox_detector/data/surfers.jpg | bin | 0 -> 53834 bytes | |||
-rw-r--r-- | tensorflow/examples/multibox_detector/main.cc | 428 |
4 files changed, 540 insertions, 0 deletions
diff --git a/tensorflow/examples/multibox_detector/BUILD b/tensorflow/examples/multibox_detector/BUILD new file mode 100644 index 0000000000..5d0c769007 --- /dev/null +++ b/tensorflow/examples/multibox_detector/BUILD @@ -0,0 +1,41 @@ +# Description: +# TensorFlow C++ inference example for labeling images. + +package( + default_visibility = ["//tensorflow:internal"], + features = [ + "-layering_check", + "-parse_headers", + ], +) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +cc_binary( + name = "detect_objects", + srcs = [ + "main.cc", + ], + linkopts = ["-lm"], + deps = [ + "//tensorflow/cc:cc_ops", + "//tensorflow/core:framework_internal", + "//tensorflow/core:tensorflow", + ], +) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + "bin/**", + "gen/**", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/examples/multibox_detector/README.md b/tensorflow/examples/multibox_detector/README.md new file mode 100644 index 0000000000..42c63a5167 --- /dev/null +++ b/tensorflow/examples/multibox_detector/README.md @@ -0,0 +1,71 @@ +# TensorFlow C++ MultiBox Object Detection Demo + +This example shows how you can load a pre-trained TensorFlow network and use it +to detect objects in images in C++. For an alternate implementation see the +[Android TensorFlow demo](https://tensorflow.org/tutorials/image_recognition/) + +## Description + +This demo uses a model based on [Scalable Object Detection using Deep NeuralNetworks](https://arxiv.org/abs/1312.2249) to detect people in images passed in from +the command line. This is the same model also used in the Android TensorFlow +demo for real-time person detection and tracking in the camera preview. + +## To build/install/run + +The TensorFlow `GraphDef` that contains the model definition and weights is not +packaged in the repo because of its size. Instead, you must first download the +file to the `data` directory in the source tree: + +```bash +$ wget https://storage.googleapis.com/download.tensorflow.org/models/mobile_multibox_v1a.zip -O tensorflow/examples/multibox_detector/data/mobile_multibox_v1a.zip + +$ unzip tensorflow/examples/multibox_detector/data/mobile_multibox_v1a.zip -d tensorflow/examples/multibox_detector/data/ +``` + +Then, as long as you've managed to build the main TensorFlow framework, you +should have everything you need to run this example installed already. + +Once extracted, see the box priors file in the data directory. This file +contains means and standard deviations for all 784 possible detections, +normalized from 0-1 in left top right bottom order. + +To build it, run this command: + +```bash +$ bazel build -c opt tensorflow/examples/multibox_detector/... +``` + +That should build a binary executable that you can then run like this: + +```bash +$ bazel-bin/tensorflow/examples/multibox_detector/detect_objects --image_out=$HOME/x20/surfers_labeled.png +``` + +This uses the default example image that ships with the framework, and should +output something similar to this: + +``` +I0125 18:24:13.804047 8677 main.cc:293] ===== Top 5 Detections ====== +I0125 18:24:13.804058 8677 main.cc:307] Detection 0: L:324.542 T:76.5764 R:373.26 B:214.957 (635) score: 0.267425 +I0125 18:24:13.804077 8677 main.cc:307] Detection 1: L:332.896 T:76.2751 R:372.116 B:204.614 (523) score: 0.245334 +I0125 18:24:13.804087 8677 main.cc:307] Detection 2: L:306.605 T:76.2228 R:371.356 B:217.32 (634) score: 0.216121 +I0125 18:24:13.804096 8677 main.cc:307] Detection 3: L:143.918 T:86.0909 R:187.333 B:195.885 (387) score: 0.171368 +I0125 18:24:13.804104 8677 main.cc:307] Detection 4: L:144.915 T:86.2675 R:185.243 B:165.246 (219) score: 0.169244 +``` + +In this case, we're using a public domain stock image of surfers walking on the +beach, and the top two few detections are of the two on the right. Adding more +detections with --num_detections=N will also include the surfer on the left, +and eventually non-person boxes below a certain threshold. + +You can visually inspect the detections by viewing the resulting png file +'~/surfers_labeled.png'. + +Next, try it out on your own images by supplying the --image= argument, e.g. + +```bash +$ bazel-bin/tensorflow/examples/multibox_detector/detect_objects --image=my_image.png +``` + +For another implementation of this work, you can check out the [Android +TensorFlow demo](https://tensorflow.org/tutorials/image_recognition/). diff --git a/tensorflow/examples/multibox_detector/data/surfers.jpg b/tensorflow/examples/multibox_detector/data/surfers.jpg Binary files differnew file mode 100755 index 0000000000..940cf234d4 --- /dev/null +++ b/tensorflow/examples/multibox_detector/data/surfers.jpg diff --git a/tensorflow/examples/multibox_detector/main.cc b/tensorflow/examples/multibox_detector/main.cc new file mode 100644 index 0000000000..42972078e5 --- /dev/null +++ b/tensorflow/examples/multibox_detector/main.cc @@ -0,0 +1,428 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <setjmp.h> +#include <stdio.h> +#include <string.h> +#include <fstream> +#include <vector> + +#include "tensorflow/cc/ops/const_op.h" +#include "tensorflow/cc/ops/image_ops.h" +#include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/graph/default_device.h" +#include "tensorflow/core/graph/graph_def_builder.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/core/stringpiece.h" +#include "tensorflow/core/lib/core/threadpool.h" +#include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/strings/stringprintf.h" +#include "tensorflow/core/platform/init_main.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/public/session.h" +#include "tensorflow/core/util/command_line_flags.h" + +// These are all common classes it's handy to reference with no namespace. +using tensorflow::Flag; +using tensorflow::Tensor; +using tensorflow::Status; +using tensorflow::string; +using tensorflow::int32; +using tensorflow::uint8; + +// Takes a file name, and loads a list of comma-separated box priors from it, +// one per line, and returns a vector of the values. +Status ReadLocationsFile(const string& file_name, std::vector<float>* result, + size_t* found_label_count) { + std::ifstream file(file_name); + if (!file) { + return tensorflow::errors::NotFound("Labels file ", file_name, + " not found."); + } + result->clear(); + string line; + while (std::getline(file, line)) { + std::vector<float> tokens; + CHECK(tensorflow::str_util::SplitAndParseAsFloats(line, ',', &tokens)); + for (auto number : tokens) { + result->push_back(number); + } + } + *found_label_count = result->size(); + return Status::OK(); +} + +// Given an image file name, read in the data, try to decode it as an image, +// resize it to the requested size, and then scale the values as desired. +Status ReadTensorFromImageFile(const string& file_name, const int input_height, + const int input_width, const float input_mean, + const float input_std, + std::vector<Tensor>* out_tensors) { + auto root = tensorflow::Scope::NewRootScope(); + using namespace ::tensorflow::ops; // NOLINT(build/namespaces) + + string input_name = "file_reader"; + string original_name = "identity"; + string output_name = "normalized"; + auto file_reader = + tensorflow::ops::ReadFile(root.WithOpName(input_name), file_name); + // Now try to figure out what kind of file it is and decode it. + const int wanted_channels = 3; + tensorflow::Output image_reader; + if (tensorflow::StringPiece(file_name).ends_with(".png")) { + image_reader = DecodePng(root.WithOpName("png_reader"), file_reader, + DecodePng::Channels(wanted_channels)); + } else if (tensorflow::StringPiece(file_name).ends_with(".gif")) { + image_reader = DecodeGif(root.WithOpName("gif_reader"), file_reader); + } else { + // Assume if it's neither a PNG nor a GIF then it must be a JPEG. + image_reader = DecodeJpeg(root.WithOpName("jpeg_reader"), file_reader, + DecodeJpeg::Channels(wanted_channels)); + } + + // Also return identity so that we can know the original dimensions and + // optionally save the image out with bounding boxes overlaid. + auto original_image = Identity(root.WithOpName(original_name), image_reader); + + // Now cast the image data to float so we can do normal math on it. + auto float_caster = Cast(root.WithOpName("float_caster"), original_image, + tensorflow::DT_FLOAT); + // The convention for image ops in TensorFlow is that all images are expected + // to be in batches, so that they're four-dimensional arrays with indices of + // [batch, height, width, channel]. Because we only have a single image, we + // have to add a batch dimension of 1 to the start with ExpandDims(). + auto dims_expander = ExpandDims(root, float_caster, 0); + + // Bilinearly resize the image to fit the required dimensions. + auto resized = ResizeBilinear( + root, dims_expander, + Const(root.WithOpName("size"), {input_height, input_width})); + // Subtract the mean and divide by the scale. + Div(root.WithOpName(output_name), Sub(root, resized, {input_mean}), + {input_std}); + + // This runs the GraphDef network definition that we've just constructed, and + // returns the results in the output tensor. + tensorflow::GraphDef graph; + TF_RETURN_IF_ERROR(root.ToGraphDef(&graph)); + + std::unique_ptr<tensorflow::Session> session( + tensorflow::NewSession(tensorflow::SessionOptions())); + TF_RETURN_IF_ERROR(session->Create(graph)); + TF_RETURN_IF_ERROR( + session->Run({}, {output_name, original_name}, {}, out_tensors)); + return Status::OK(); +} + +Status SaveImage(const Tensor& tensor, const string& file_path) { + LOG(INFO) << "Saving image to " << file_path; + CHECK(tensorflow::StringPiece(file_path).ends_with(".png")) + << "Only saving of png files is supported."; + + auto root = tensorflow::Scope::NewRootScope(); + using namespace ::tensorflow::ops; // NOLINT(build/namespaces) + + string encoder_name = "encode"; + string output_name = "file_writer"; + + tensorflow::Output image_encoder = + EncodePng(root.WithOpName(encoder_name), tensor); + tensorflow::ops::WriteFile file_saver = tensorflow::ops::WriteFile( + root.WithOpName(output_name), file_path, image_encoder); + + tensorflow::GraphDef graph; + TF_RETURN_IF_ERROR(root.ToGraphDef(&graph)); + + std::unique_ptr<tensorflow::Session> session( + tensorflow::NewSession(tensorflow::SessionOptions())); + TF_RETURN_IF_ERROR(session->Create(graph)); + std::vector<Tensor> outputs; + TF_RETURN_IF_ERROR(session->Run({}, {}, {output_name}, &outputs)); + + return Status::OK(); +} + +// Reads a model graph definition from disk, and creates a session object you +// can use to run it. +Status LoadGraph(string graph_file_name, + std::unique_ptr<tensorflow::Session>* session) { + tensorflow::GraphDef graph_def; + Status load_graph_status = + ReadBinaryProto(tensorflow::Env::Default(), graph_file_name, &graph_def); + if (!load_graph_status.ok()) { + return tensorflow::errors::NotFound("Failed to load compute graph at '", + graph_file_name, "'"); + } + session->reset(tensorflow::NewSession(tensorflow::SessionOptions())); + Status session_create_status = (*session)->Create(graph_def); + if (!session_create_status.ok()) { + return session_create_status; + } + return Status::OK(); +} + +// Analyzes the output of the MultiBox graph to retrieve the highest scores and +// their positions in the tensor, which correspond to individual box detections. +Status GetTopDetections(const std::vector<Tensor>& outputs, int how_many_labels, + Tensor* indices, Tensor* scores) { + auto root = tensorflow::Scope::NewRootScope(); + using namespace ::tensorflow::ops; // NOLINT(build/namespaces) + + string output_name = "top_k"; + TopKV2(root.WithOpName(output_name), outputs[0], how_many_labels); + // This runs the GraphDef network definition that we've just constructed, and + // returns the results in the output tensors. + tensorflow::GraphDef graph; + TF_RETURN_IF_ERROR(root.ToGraphDef(&graph)); + + std::unique_ptr<tensorflow::Session> session( + tensorflow::NewSession(tensorflow::SessionOptions())); + TF_RETURN_IF_ERROR(session->Create(graph)); + // The TopK node returns two outputs, the scores and their original indices, + // so we have to append :0 and :1 to specify them both. + std::vector<Tensor> out_tensors; + TF_RETURN_IF_ERROR(session->Run({}, {output_name + ":0", output_name + ":1"}, + {}, &out_tensors)); + *scores = out_tensors[0]; + *indices = out_tensors[1]; + return Status::OK(); +} + +// Converts an encoded location to an actual box placement with the provided +// box priors. +void DecodeLocation(const float* encoded_location, const float* box_priors, + float* decoded_location) { + bool non_zero = false; + for (int i = 0; i < 4; ++i) { + const float curr_encoding = encoded_location[i]; + non_zero = non_zero || curr_encoding != 0.0f; + + const float mean = box_priors[i * 2]; + const float std_dev = box_priors[i * 2 + 1]; + + float currentLocation = curr_encoding * std_dev + mean; + + currentLocation = std::max(currentLocation, 0.0f); + currentLocation = std::min(currentLocation, 1.0f); + decoded_location[i] = currentLocation; + } + + if (!non_zero) { + LOG(WARNING) << "No non-zero encodings; check log for inference errors."; + } +} + +float DecodeScore(float encoded_score) { return 1 / (1 + exp(-encoded_score)); } + +void DrawBox(const int image_width, const int image_height, int left, int top, + int right, int bottom, tensorflow::TTypes<uint8>::Flat* image) { + tensorflow::TTypes<uint8>::Flat image_ref = *image; + + top = std::max(0, std::min(image_height - 1, top)); + bottom = std::max(0, std::min(image_height - 1, bottom)); + + left = std::max(0, std::min(image_width - 1, left)); + right = std::max(0, std::min(image_width - 1, right)); + + for (int i = 0; i < 3; ++i) { + uint8 val = i == 2 ? 255 : 0; + for (int x = left; x <= right; ++x) { + image_ref((top * image_width + x) * 3 + i) = val; + image_ref((bottom * image_width + x) * 3 + i) = val; + } + for (int y = top; y <= bottom; ++y) { + image_ref((y * image_width + left) * 3 + i) = val; + image_ref((y * image_width + right) * 3 + i) = val; + } + } +} + +// Given the output of a model run, and the name of a file containing the labels +// this prints out the top five highest-scoring values. +Status PrintTopDetections(const std::vector<Tensor>& outputs, + const string& labels_file_name, + const int num_boxes, + const int num_detections, + const string& image_file_name, + Tensor* original_tensor) { + std::vector<float> locations; + size_t label_count; + Status read_labels_status = + ReadLocationsFile(labels_file_name, &locations, &label_count); + if (!read_labels_status.ok()) { + LOG(ERROR) << read_labels_status; + return read_labels_status; + } + CHECK_EQ(label_count, num_boxes * 8); + + const int how_many_labels = + std::min(num_detections, static_cast<int>(label_count)); + Tensor indices; + Tensor scores; + TF_RETURN_IF_ERROR( + GetTopDetections(outputs, how_many_labels, &indices, &scores)); + + tensorflow::TTypes<float>::Flat scores_flat = scores.flat<float>(); + + tensorflow::TTypes<int32>::Flat indices_flat = indices.flat<int32>(); + + const Tensor& encoded_locations = outputs[1]; + auto locations_encoded = encoded_locations.flat<float>(); + + LOG(INFO) << original_tensor->DebugString(); + const int image_width = original_tensor->shape().dim_size(1); + const int image_height = original_tensor->shape().dim_size(0); + + tensorflow::TTypes<uint8>::Flat image_flat = original_tensor->flat<uint8>(); + + LOG(INFO) << "===== Top " << how_many_labels << " Detections ======"; + for (int pos = 0; pos < how_many_labels; ++pos) { + const int label_index = indices_flat(pos); + const float score = scores_flat(pos); + + float decoded_location[4]; + DecodeLocation(&locations_encoded(label_index * 4), + &locations[label_index * 8], decoded_location); + + float left = decoded_location[0] * image_width; + float top = decoded_location[1] * image_height; + float right = decoded_location[2] * image_width; + float bottom = decoded_location[3] * image_height; + + LOG(INFO) << "Detection " << pos << ": " + << "L:" << left << " " + << "T:" << top << " " + << "R:" << right << " " + << "B:" << bottom << " " + << "(" << label_index << ") score: " << DecodeScore(score); + + DrawBox(image_width, image_height, left, top, right, bottom, &image_flat); + } + + if (!image_file_name.empty()) { + return SaveImage(*original_tensor, image_file_name); + } + return Status::OK(); +} + +int main(int argc, char* argv[]) { + // These are the command-line flags the program can understand. + // They define where the graph and input data is located, and what kind of + // input the model expects. If you train your own model, or use something + // other than multibox_model you'll need to update these. + string image = + "tensorflow/examples/multibox_detector/data/surfers.jpg"; + string graph = + "tensorflow/examples/multibox_detector/data/" + "multibox_model.pb"; + string box_priors = + "tensorflow/examples/multibox_detector/data/" + "multibox_location_priors.txt"; + int32 input_width = 224; + int32 input_height = 224; + int32 input_mean = 128; + int32 input_std = 128; + int32 num_detections = 5; + int32 num_boxes = 784; + string input_layer = "ResizeBilinear"; + string output_location_layer = "output_locations/Reshape"; + string output_score_layer = "output_scores/Reshape"; + string root_dir = ""; + string image_out = ""; + + std::vector<Flag> flag_list = { + Flag("image", &image, "image to be processed"), + Flag("image_out", &image_out, + "location to save output image, if desired"), + Flag("graph", &graph, "graph to be executed"), + Flag("box_priors", &box_priors, "name of file containing box priors"), + Flag("input_width", &input_width, "resize image to this width in pixels"), + Flag("input_height", &input_height, + "resize image to this height in pixels"), + Flag("input_mean", &input_mean, "scale pixel values to this mean"), + Flag("input_std", &input_std, "scale pixel values to this std deviation"), + Flag("num_detections", &num_detections, + "number of top detections to return"), + Flag("num_boxes", &num_boxes, + "number of boxes defined by the location file"), + Flag("input_layer", &input_layer, "name of input layer"), + Flag("output_location_layer", &output_location_layer, + "name of location output layer"), + Flag("output_score_layer", &output_score_layer, + "name of score output layer"), + Flag("root_dir", &root_dir, + "interpret image and graph file names relative to this directory"), + }; + + string usage = tensorflow::Flags::Usage(argv[0], flag_list); + const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parse_result) { + LOG(ERROR) << usage; + return -1; + } + + // We need to call this to set up global state for TensorFlow. + tensorflow::port::InitMain(argv[0], &argc, &argv); + if (argc > 1) { + LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; + return -1; + } + + // First we load and initialize the model. + std::unique_ptr<tensorflow::Session> session; + string graph_path = tensorflow::io::JoinPath(root_dir, graph); + Status load_graph_status = LoadGraph(graph_path, &session); + if (!load_graph_status.ok()) { + LOG(ERROR) << load_graph_status; + return -1; + } + + // Get the image from disk as a float array of numbers, resized and normalized + // to the specifications the main graph expects. + std::vector<Tensor> image_tensors; + string image_path = tensorflow::io::JoinPath(root_dir, image); + + Status read_tensor_status = + ReadTensorFromImageFile(image_path, input_height, input_width, input_mean, + input_std, &image_tensors); + if (!read_tensor_status.ok()) { + LOG(ERROR) << read_tensor_status; + return -1; + } + const Tensor& resized_tensor = image_tensors[0]; + + // Actually run the image through the model. + std::vector<Tensor> outputs; + Status run_status = + session->Run({{input_layer, resized_tensor}}, + {output_score_layer, output_location_layer}, {}, &outputs); + if (!run_status.ok()) { + LOG(ERROR) << "Running model failed: " << run_status; + return -1; + } + + Status print_status = PrintTopDetections(outputs, box_priors, num_boxes, + num_detections, image_out, + &image_tensors[1]); + + if (!print_status.ok()) { + LOG(ERROR) << "Running print failed: " << print_status; + return -1; + } + return 0; +} |