diff options
Diffstat (limited to 'tensorflow/contrib')
81 files changed, 1403 insertions, 340 deletions
diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index 0451f00629..3ed8cef56c 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -50,6 +50,7 @@ py_library( "//tensorflow/contrib/image:single_image_random_dot_stereograms_py", "//tensorflow/contrib/input_pipeline:input_pipeline_py", "//tensorflow/contrib/integrate:integrate_py", + "//tensorflow/contrib/kafka", "//tensorflow/contrib/keras", "//tensorflow/contrib/kernel_methods", "//tensorflow/contrib/kfac", @@ -142,6 +143,7 @@ cc_library( "//tensorflow/contrib/factorization:all_ops", "//tensorflow/contrib/framework:all_ops", "//tensorflow/contrib/input_pipeline:input_pipeline_ops_op_lib", + "//tensorflow/contrib/kafka:kafka_ops_op_lib", "//tensorflow/contrib/layers:sparse_feature_cross_op_op_lib", "//tensorflow/contrib/nccl:nccl_ops_op_lib", "//tensorflow/contrib/nearest_neighbor:nearest_neighbor_ops_op_lib", diff --git a/tensorflow/contrib/android/java/org/tensorflow/contrib/android/TensorFlowInferenceInterface.java b/tensorflow/contrib/android/java/org/tensorflow/contrib/android/TensorFlowInferenceInterface.java index dc5b9fb887..abddadac5b 100644 --- a/tensorflow/contrib/android/java/org/tensorflow/contrib/android/TensorFlowInferenceInterface.java +++ b/tensorflow/contrib/android/java/org/tensorflow/contrib/android/TensorFlowInferenceInterface.java @@ -194,6 +194,11 @@ public class TensorFlowInferenceInterface { * @param outputNames A list of output nodes which should be filled by the inference pass. */ public void run(String[] outputNames, boolean enableStats) { + run(outputNames, enableStats, new String[] {}); + } + + /** An overloaded version of runInference that allows supplying targetNodeNames as well */ + public void run(String[] outputNames, boolean enableStats, String[] targetNodeNames) { // Release any Tensors from the previous run calls. closeFetches(); @@ -204,6 +209,11 @@ public class TensorFlowInferenceInterface { runner.fetch(tid.name, tid.outputIndex); } + // Add targets. + for (String t : targetNodeNames) { + runner.addTarget(t); + } + // Run the session. try { if (enableStats) { diff --git a/tensorflow/contrib/cmake/python_modules.txt b/tensorflow/contrib/cmake/python_modules.txt index ad8c995eef..57a52bf4ca 100644 --- a/tensorflow/contrib/cmake/python_modules.txt +++ b/tensorflow/contrib/cmake/python_modules.txt @@ -6,6 +6,7 @@ tensorflow/core/example tensorflow/core/framework tensorflow/core/lib tensorflow/core/lib/core +tensorflow/core/profiler tensorflow/core/protobuf tensorflow/core/util tensorflow/examples @@ -219,6 +220,8 @@ tensorflow/contrib/input_pipeline/python/ops tensorflow/contrib/integrate tensorflow/contrib/integrate/python tensorflow/contrib/integrate/python/ops +tensorflow/contrib/kafka/python +tensorflow/contrib/kafka/python/ops tensorflow/contrib/keras tensorflow/contrib/keras/api tensorflow/contrib/keras/api/keras diff --git a/tensorflow/contrib/cmake/tf_core_ops.cmake b/tensorflow/contrib/cmake/tf_core_ops.cmake index 138993db35..c42bc35ce7 100644 --- a/tensorflow/contrib/cmake/tf_core_ops.cmake +++ b/tensorflow/contrib/cmake/tf_core_ops.cmake @@ -30,6 +30,7 @@ set(tf_op_lib_names "list_ops" "lookup_ops" "logging_ops" + "manip_ops" "math_ops" "nn_ops" "no_op" diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index 294b9c5941..34c466fa01 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -335,6 +335,7 @@ GENERATE_PYTHON_OP_LIB("list_ops") GENERATE_PYTHON_OP_LIB("logging_ops") GENERATE_PYTHON_OP_LIB("lookup_ops") GENERATE_PYTHON_OP_LIB("nn_ops") +GENERATE_PYTHON_OP_LIB("manip_ops") GENERATE_PYTHON_OP_LIB("parsing_ops") GENERATE_PYTHON_OP_LIB("random_ops") GENERATE_PYTHON_OP_LIB("remote_fused_graph_ops" diff --git a/tensorflow/contrib/cmake/tools/create_def_file.py b/tensorflow/contrib/cmake/tools/create_def_file.py index f67698eb99..53c2285699 100644 --- a/tensorflow/contrib/cmake/tools/create_def_file.py +++ b/tensorflow/contrib/cmake/tools/create_def_file.py @@ -31,7 +31,7 @@ from __future__ import division from __future__ import print_function import argparse -import io +import codecs import os import re import subprocess @@ -103,7 +103,7 @@ def main(): for lib_path in args.input: proc = subprocess.Popen([DUMPBIN, "/nologo", "/linkermember:1", lib_path], stdout=subprocess.PIPE) - for line in io.TextIOWrapper(proc.stdout, encoding="utf-8"): + for line in codecs.getreader("utf-8")(proc.stdout): cols = line.split() if len(cols) < 2: continue @@ -131,7 +131,7 @@ def main(): # We compare on undname but use the decorated name from candidates. dupes = 0 proc = subprocess.Popen([UNDNAME, tmpfile.name], stdout=subprocess.PIPE) - for idx, line in enumerate(io.TextIOWrapper(proc.stdout, encoding="utf-8")): + for idx, line in enumerate(codecs.getreader("utf-8")(proc.stdout)): decorated = candidates[idx] if decorated in taken: # Symbol is already in output, done. diff --git a/tensorflow/contrib/coder/README.md b/tensorflow/contrib/coder/README.md index e1e867db5a..c6c379c458 100644 --- a/tensorflow/contrib/coder/README.md +++ b/tensorflow/contrib/coder/README.md @@ -30,7 +30,7 @@ following sense: around, - The number of CDF axes does not extend, i.e., `CDF.ndim == data.ndim + 1`. -In the previous example where data has shape (10, 10), the followings are +In the previous example where data has shape (10, 10), the following are acceptable CDF shapes: - (10, 10, 65) diff --git a/tensorflow/contrib/coder/kernels/range_coder.cc b/tensorflow/contrib/coder/kernels/range_coder.cc index f4f076b6c4..21b35155ff 100644 --- a/tensorflow/contrib/coder/kernels/range_coder.cc +++ b/tensorflow/contrib/coder/kernels/range_coder.cc @@ -276,7 +276,7 @@ void RangeEncoder::Finalize(string* sink) { } } else if (base_ != 0) { // If base == 0, then pick 0 from [base, base + size) and no zeros are - // explcitly written. + // explicitly written. // // Otherwise, pick (base + (2^16 - base[16:0])), i.e., round up base to the // next multiple of 2^16. As 2^16 < size, this value should be in the diff --git a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_benchmark.py b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_benchmark.py index 4fc5ff1bd1..933df6d71d 100644 --- a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_benchmark.py +++ b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_benchmark.py @@ -20,6 +20,7 @@ from __future__ import print_function import time +from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.contrib import rnn as contrib_rnn from tensorflow.contrib.cudnn_rnn.python.ops import cudnn_rnn_ops from tensorflow.contrib.rnn.python.ops import lstm_ops diff --git a/tensorflow/contrib/eager/python/evaluator.py b/tensorflow/contrib/eager/python/evaluator.py index 3faaeef590..68e7b5421f 100644 --- a/tensorflow/contrib/eager/python/evaluator.py +++ b/tensorflow/contrib/eager/python/evaluator.py @@ -178,7 +178,7 @@ class Evaluator(object): call_op: An op that updates evaluation state on a mini-batch of examples. Must generate an tf.errors.OutOfRangeError when done. results_op: A dictionary of tensors that compute the final evaluation - results from the evaulation state. + results from the evaluation state. sess: The Session to run the evaluation in. Defaults to the default Session. diff --git a/tensorflow/contrib/eager/python/examples/resnet50/README.md b/tensorflow/contrib/eager/python/examples/resnet50/README.md index db023e6c97..79e4600529 100644 --- a/tensorflow/contrib/eager/python/examples/resnet50/README.md +++ b/tensorflow/contrib/eager/python/examples/resnet50/README.md @@ -34,7 +34,7 @@ bazel run -c opt --config=cuda :resnet50_graph_test -- --benchmarks=. (Or remove the `--config=cuda` flag for running on CPU instead of GPU). -On October 31, 2017, the benchmarks demostrated comparable performance +On October 31, 2017, the benchmarks demonstrated comparable performance for eager and graph execution of this particular model when using a single NVIDIA Titan X (Pascal) GPU on a host with an Intel Xeon E5-1650 CPU @ 3.50GHz and a batch size of 32. diff --git a/tensorflow/contrib/eager/python/examples/resnet50/resnet50.py b/tensorflow/contrib/eager/python/examples/resnet50/resnet50.py index b302a87e0e..9982fdb07e 100644 --- a/tensorflow/contrib/eager/python/examples/resnet50/resnet50.py +++ b/tensorflow/contrib/eager/python/examples/resnet50/resnet50.py @@ -97,7 +97,7 @@ class _ConvBlock(tfe.Network): Args: kernel_size: the kernel size of middle conv layer at main path - filters: list of integers, the filterss of 3 conv layer at main path + filters: list of integers, the filters of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names data_format: data_format for the input ('channels_first' or diff --git a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py index 76e06269b6..0ff8746884 100644 --- a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py +++ b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py @@ -22,6 +22,7 @@ import gc import tempfile import time +from six.moves import xrange # pylint: disable=redefined-builtin import tensorflow as tf import tensorflow.contrib.eager as tfe diff --git a/tensorflow/contrib/eager/python/examples/rnn_ptb/README.md b/tensorflow/contrib/eager/python/examples/rnn_ptb/README.md index 743ebb68ee..966177e91c 100644 --- a/tensorflow/contrib/eager/python/examples/rnn_ptb/README.md +++ b/tensorflow/contrib/eager/python/examples/rnn_ptb/README.md @@ -40,7 +40,7 @@ bazel run -c opt --config=cuda :rnn_ptb_graph_test -- --benchmarks=. (Or remove the `--config=cuda` flag for running on CPU instead of GPU). -On October 31, 2017, the benchmarks demostrated slightly better performance +On October 31, 2017, the benchmarks demonstrated slightly better performance (3-6%) for graph execution over eager execution for this particular model when using a single NVIDIA Titan X (Pascal) GPU on a host with an Intel Xeon E5-1650 CPU @ 3.50GHz and a batch size of 32. diff --git a/tensorflow/contrib/eager/python/examples/rnn_ptb/rnn_ptb.py b/tensorflow/contrib/eager/python/examples/rnn_ptb/rnn_ptb.py index 7b9637a9d5..5c5c59c877 100644 --- a/tensorflow/contrib/eager/python/examples/rnn_ptb/rnn_ptb.py +++ b/tensorflow/contrib/eager/python/examples/rnn_ptb/rnn_ptb.py @@ -88,7 +88,7 @@ class Embedding(tf.layers.Layer): class PTBModel(tfe.Network): - """LSTM for word language modelling. + """LSTM for word language modeling. Model described in: (Zaremba, et. al.) Recurrent Neural Network Regularization @@ -339,8 +339,7 @@ if __name__ == "__main__": "http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz") parser.add_argument( "--logdir", type=str, default="", help="Directory for checkpoint.") - parser.add_argument( - "--epoch", type=int, default=20, help="Number of epoches.") + parser.add_argument("--epoch", type=int, default=20, help="Number of epochs.") parser.add_argument("--batch-size", type=int, default=20, help="Batch size.") parser.add_argument( "--seq-len", type=int, default=35, help="Sequence length.") diff --git a/tensorflow/contrib/eager/python/examples/spinn/data.py b/tensorflow/contrib/eager/python/examples/spinn/data.py index a6e046320f..fcaae0a4f8 100644 --- a/tensorflow/contrib/eager/python/examples/spinn/data.py +++ b/tensorflow/contrib/eager/python/examples/spinn/data.py @@ -51,11 +51,11 @@ def get_non_parenthesis_words(items): """Get the non-parenthesis items from a SNLI parsed sentence. Args: - items: Data items from a parsed SNLI setence, with parentheses. E.g., + items: Data items from a parsed SNLI sentence, with parentheses. E.g., ["(", "Man", "(", "(", "(", "(", "(", "wearing", "pass", ")", ... Returns: - A list of non-parenthis word items, all converted to lower case. E.g., + A list of non-parentheses word items, all converted to lower case. E.g., ["man", "wearing", "pass", ... """ return [x.lower() for x in items if x not in PARENTHESES and x] @@ -201,7 +201,7 @@ def load_word_vectors(data_root, vocab): def calculate_bins(length2count, min_bin_size): - """Cacluate bin boundaries given a histogram of lengths and mininum bin size. + """Calculate bin boundaries given a histogram of lengths and minimum bin size. Args: length2count: A `dict` mapping length to sentence count. @@ -335,9 +335,9 @@ class SnliData(object): # The sorting above and the batching here makes sure that sentences of # similar max lengths are batched together, minimizing the inefficiency # due to uneven max lengths. The sentences are batched differently in - # each call to get_generator() due to the shuffling before sotring + # each call to get_generator() due to the shuffling before sorting # above. The pad_and_reverse_word_ids() and pad_transitions() functions - # take care of any remaning unevenness of the max sentence lengths. + # take care of any remaining unevenness of the max sentence lengths. end = min(begin + batch_size, len(labels)) # Transpose, because the SPINN model requires time-major, instead of # batch-major. diff --git a/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py b/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py index 84e25cf81a..7b2f09cba1 100644 --- a/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py +++ b/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py @@ -26,6 +26,7 @@ import tempfile import time import numpy as np +from six.moves import xrange # pylint: disable=redefined-builtin import tensorflow as tf # pylint: disable=g-bad-import-order diff --git a/tensorflow/contrib/eager/python/network_test.py b/tensorflow/contrib/eager/python/network_test.py index 8e6b947e5c..3329fc6c51 100644 --- a/tensorflow/contrib/eager/python/network_test.py +++ b/tensorflow/contrib/eager/python/network_test.py @@ -539,7 +539,7 @@ class NetworkTest(test.TestCase): # No issue here since the name is unique within its scope. name_conflict3 = MyNetwork(name="name_conflict") net2 = MyNetwork() # name=outside_scope/my_network_2 to avoid the - # variable_scope my_network_1 below. + # variable_scope my_network_1 below. vs_name_conflict = MyNetwork(name="vs_name_conflict") # conflict below with variable_scope.variable_scope("intervening_scope"): with variable_scope.variable_scope(captured_scope): @@ -688,7 +688,7 @@ class NetworkTest(test.TestCase): net2(one) # Layer names typically are globally unique rather than being unique within # the scope of their first use. However, within a Network they must be named - # locally so that previous Layer consutrciton does not interfere with + # locally so that previous Layer construction does not interfere with # variable naming (e.g. add a Layer construction before the Network, # suddenly your previously saved checkpoint is incompatible). self.assertEqual("dense", net1.l1.name) diff --git a/tensorflow/contrib/eager/python/saver.py b/tensorflow/contrib/eager/python/saver.py index 57b070ec6e..62421849c7 100644 --- a/tensorflow/contrib/eager/python/saver.py +++ b/tensorflow/contrib/eager/python/saver.py @@ -82,7 +82,7 @@ def restore_variables_on_create(save_path, map_func=None): map_func_wrapper = lambda self, x: x else: if not callable(map_func): - raise ValueError("map_func must be callaled.") + raise ValueError("map_func must be callable.") map_func_wrapper = lambda self, x: map_func(x) ckpt_var_cache = dict() diff --git a/tensorflow/contrib/ffmpeg/decode_video_op.cc b/tensorflow/contrib/ffmpeg/decode_video_op.cc index d44032968d..6f8ad486d1 100644 --- a/tensorflow/contrib/ffmpeg/decode_video_op.cc +++ b/tensorflow/contrib/ffmpeg/decode_video_op.cc @@ -102,16 +102,12 @@ REGISTER_OP("DecodeVideo") return Status::OK(); }) .Doc(R"doc( -Processes the contents of an audio file into a tensor using FFmpeg to decode +Processes the contents of an video file into a tensor using FFmpeg to decode the file. -One row of the tensor is created for each channel in the audio file. Each -channel contains audio samples starting at the beginning of the audio and -having `1/samples_per_second` time between them. If the `channel_count` is -different from the contents of the file, channels will be merged or created. - -contents: The binary audio file contents, as a string or rank-0 string - tensor. +contents: The binary contents of the video file to decode. This is a + scalar. +output: A rank-4 `Tensor` that has `[frames, height, width, 3]` RGB as output. )doc"); } // namespace ffmpeg diff --git a/tensorflow/contrib/framework/python/ops/variables.py b/tensorflow/contrib/framework/python/ops/variables.py index a9d47ac9b9..0754c3e0e3 100644 --- a/tensorflow/contrib/framework/python/ops/variables.py +++ b/tensorflow/contrib/framework/python/ops/variables.py @@ -25,6 +25,7 @@ import re from tensorflow.contrib.framework.python.ops import add_arg_scope as contrib_add_arg_scope from tensorflow.contrib.framework.python.ops import gen_variable_ops from tensorflow.contrib.util import loader +from tensorflow.core.protobuf import saver_pb2 from tensorflow.python import pywrap_tensorflow from tensorflow.python.framework import device as tf_device from tensorflow.python.framework import dtypes @@ -684,7 +685,8 @@ def assign_from_checkpoint_fn(model_path, var_list, ignore_missing_vars=False, 'Variable %s missing in checkpoint %s', var, model_path) var_list = available_vars if var_list: - saver = tf_saver.Saver(var_list, reshape=reshape_variables) + saver = tf_saver.Saver(var_list, reshape=reshape_variables, + write_version=saver_pb2.SaverDef.V1) def callback(session): saver.restore(session, model_path) return callback diff --git a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py index 986a5ff6dc..fdfabd07c1 100644 --- a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py +++ b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py @@ -28,6 +28,7 @@ from __future__ import division from __future__ import print_function import functools +import os import sys import tarfile @@ -189,20 +190,34 @@ def get_graph_def_from_resource(filename): return graph_pb2.GraphDef.FromString(resource_loader.load_resource(filename)) -def get_graph_def_from_url_tarball(url, filename): - """Get a GraphDef proto from a tarball on the web.""" - def _progress(count, block_size, total_size): - sys.stdout.write('\r>> Downloading %s %.1f%%' % ( - url, float(count * block_size) / float(total_size) * 100.0)) - sys.stdout.flush() - tar_filename, _ = urllib.request.urlretrieve(url, reporthook=_progress) +def get_graph_def_from_url_tarball(url, filename, tar_filename=None): + """Get a GraphDef proto from a tarball on the web. + + Args: + url: Web address of tarball + filename: Filename of graph definition within tarball + tar_filename: Temporary download filename (None = always download) + + Returns: + A GraphDef loaded from a file in the downloaded tarball. + """ + if not (tar_filename and os.path.exists(tar_filename)): + + def _progress(count, block_size, total_size): + sys.stdout.write('\r>> Downloading %s %.1f%%' % + (url, + float(count * block_size) / float(total_size) * 100.0)) + sys.stdout.flush() + + tar_filename, _ = urllib.request.urlretrieve(url, tar_filename, _progress) with tarfile.open(tar_filename, 'r:gz') as tar: proto_str = tar.extractfile(filename).read() return graph_pb2.GraphDef.FromString(proto_str) def _default_graph_def_fn(): - return get_graph_def_from_url_tarball(INCEPTION_URL, INCEPTION_FROZEN_GRAPH) + return get_graph_def_from_url_tarball(INCEPTION_URL, INCEPTION_FROZEN_GRAPH, + os.path.basename(INCEPTION_URL)) def run_inception(images, diff --git a/tensorflow/contrib/gan/python/losses/python/losses_impl_test.py b/tensorflow/contrib/gan/python/losses/python/losses_impl_test.py index 7d2a7a254f..56ac45554d 100644 --- a/tensorflow/contrib/gan/python/losses/python/losses_impl_test.py +++ b/tensorflow/contrib/gan/python/losses/python/losses_impl_test.py @@ -620,7 +620,7 @@ class CombineAdversarialLossTest(test.TestCase): with self.test_session(use_gpu=True) as sess: for _ in range(10): # spot check closeness on more than one sample. gnorm_np, precond_gnorm_np = sess.run([gnorm, precond_gnorm]) - self.assertNear(gnorm_np, precond_gnorm_np, 1e-5) + self.assertNear(gnorm_np, precond_gnorm_np, 1e-4) class CycleConsistencyLossTest(test.TestCase): diff --git a/tensorflow/contrib/hvx/README.md b/tensorflow/contrib/hvx/README.md index 5a6f2f3086..163993a3f6 100644 --- a/tensorflow/contrib/hvx/README.md +++ b/tensorflow/contrib/hvx/README.md @@ -1,60 +1,67 @@ # TensorFlow Runtime with HVX Acceleration -## Description +This README explain how to build and use the TensorFlow runtime with HVX Acceleration. HVX is an extension of Hexagon, a DSP provided by Qualcomm, which can compute vector calculations faster using less energy than ARM processors. -This README explain how to build and use the TensorFlow Runtime with HVX Acceleration. HVX is an extension of Hexagon which is a DSP provided by qualcomm which can compute vector calculations faster using lower energy than ARM processors. +## Dependencies + +* [Android SDK](https://developer.android.com/studio/index.html). +* [Android NDK](https://developer.android.com/ndk/index.html). Save the path in `${NDK_ROOT}`. +* A rooted Qualcomm-based Android device connected to the computer (preferably, a [Snapdragon Development Board](https://developer.qualcomm.com/hardware/additional-snapdragon), but it could be a rooted phone with a Qualcomm SoC, albeit this guide may not work with it). The device needs to be rooted for development and testing purposes, and shouldn't be needed in production. See [Behold, The Snapdragon MDP](https://developer.qualcomm.com/blog/behold-snapdragon-mdp) for more information. +* [Hexagon SDK v3.0](https://developer.qualcomm.com/software/hexagon-dsp-sdk/tools). Save the path in `${QUALCOMM_SDK}`. +* The current directory should be TensorFlow source code (`git clone https://github.com/tensorflow/tensorflow.git && cd tensorflow`), and saved into `${TF_ROOT_DIR}`. + +You may also need to add a test signature in the device to run HVX-based binaries. Follow the instructions in `${QUALCOMM_SDK}/docs/Tools_Signing.html`, using Python 2. + +Note that if the device is not rooted, you may not be able to get the serial number, push the test signature and/or run binary files that call HVX libraries. ## Quick Start Guide -We provides several tools to build and run inference with this runtime quickly. +We provide several tools to build and run inference with this runtime quickly. -#### All-in-one script to run inception model with prebuild hexagon library -If you don’t need to build your own implementation of hexagon HVX, we provide a shortcut to execute graphs by using pre-compiled binaries. +### Run inception model with a prebuilt Hexagon library +If you don’t need to build your own implementation of Hexagon HVX, we provide a shortcut to execute graphs by using pre-compiled binaries. + +```shell +./tensorflow/contrib/makefile/samples/build_and_run_inception_hexagon.sh -p ``` -git clone https://github.com/tensorflow/tensorflow.git -cd tensorflow -NDK_ROOT="/path/to/ndk" ./tensorflow/contrib/makefile/build_all_android.sh -X -``` -(-X downloads dependencies to hexagon HVX and graphs, and copy all dependencies to android and execute a test) -#### All-in-one script to run inception model by building entire libraries from source code - If you want to build your own implementation of hexagon HVX, we provide a sample all-in-one script to execute graphs which downloads source and build everything for hexagon. +The `-p` option makes the script download dependencies (i.e., Hexagon HVX binaries and graphs models), copy them to the Android device and execute a test. -``` -git clone https://github.com/tensorflow/tensorflow.git -cd tensorflow -QUALCOMM_SDK="/path/to/qualcomm/sdk" NDK_ROOT="/path/to/ndk" ./tensorflow/contrib/makefile/samples/build_and_run_inception_hexagon.sh +### Run inception model by building all from the source code + +If you want to build your own implementation of Hexagon HVX, we provide a sample all-in-one script to execute graphs which downloads the source and builds everything that's necessary. + +```shell +./tensorflow/contrib/makefile/samples/build_and_run_inception_hexagon.sh ``` ## Building libraries If you've finished walking through the quick start guide, you may want to try building each binary manually. -#### Build libhexagon_nn_skel.so -Download hexagon nn library from codeaurora.org and build it. +### Build libhexagon\_nn\_skel.so -``` +Download Hexagon NN library from codeaurora.org and build it. + +```shell git clone https://source.codeaurora.org/quic/hexagon_nn/nnlib cd nnlib ``` -(Just follow instructions in README.HOW_TO_BUILD. You can find libhexagon_nn_skel.so in hexagon_Release_dynamic_toolv72_v60/ship) -Then copy the generated binary to GEN_LIBS_DIR +Just follow the instructions in `README.HOW_TO_BUILD`. You can find the file `libhexagon_nn_skel.so` in `hexagon_Release_dynamic_toolv72_v60/ship`. +Then copy the generated binary to `${GEN_LIBS_DIR}`. -``` +```shell GEN_LIBS_DIR="/path/to/a/dir/to/store/hexagon/libraries" cp -v "hexagon_Release_dynamic_toolv72_v60/ship/libhexagon_nn_skel.so" "${GEN_LIBS_DIR}" ``` -#### Build libhexagon_controller.so +### Build libhexagon\_controller.so + Download tensorflow and build hexagon controller. -``` -git clone https://github.com/tensorflow/tensorflow.git -cd tensorflow -TF_ROOT_DIR="$(pwd)" -QUALCOMM_SDK="/path/to/qualcomm/sdk" +```shell GENERATED_NNLIB_DIRECTORY="/path/to/nnlib" GENERATED_HEXAGON_CONTROLLER_DIRECTORY="${QUALCOMM_SDK}/examples/common/generated_hexagon_controller" rm -rf "${GENERATED_HEXAGON_CONTROLLER_DIRECTORY}" @@ -70,12 +77,12 @@ make tree VERBOSE=1 V=android_Release cp -v "${GENERATED_HEXAGON_CONTROLLER_DIRECTORY}/android_Release/ship/libhexagon_controller.so" "${GEN_LIBS_DIR}" ``` -#### Build tensorflow linking hexagon library -Build tensorflow with the build_all_android.sh with specifying -x option. +### Build TensorFlow linking Hexagon library -``` +Build TensorFlow with `build_all_android.sh` specifying the `-x` option. + +```shell BUILD_ALL_ANDROID_PATH="${TF_ROOT_DIR}/tensorflow/contrib/makefile/build_all_android.sh" -NDK_ROOT="/path/to/ndk/root" CC_PREFIX=${CC_PREFIX} NDK_ROOT=${NDK_ROOT} "${BUILD_ALL_ANDROID_PATH}" \ -x "${GEN_LIBS_DIR}" \ @@ -83,11 +90,11 @@ CC_PREFIX=${CC_PREFIX} NDK_ROOT=${NDK_ROOT} "${BUILD_ALL_ANDROID_PATH}" \ -t hexagon_graph_execution ``` -#### Push binaries to your Android device +### Push binaries to your Android device Before running tests on your Android device, you need to push several binaries to it. -``` +```shell adb push "${GEN_LIBS_DIR}/libhexagon_controller.so" "/data/local/tmp" adb push "${GEN_LIBS_DIR}/libhexagon_nn_skel.so" "/vendor/lib/rfsa/adsp" adb push -p \ @@ -100,40 +107,54 @@ adb shell chmod "${ANDROID_EXEC_FILE_MODE}" \ adb wait-for-device ``` -#### Run tests on the device +### Run tests on the device Finally, you can run the inference tests on your device. -``` +```shell adb shell 'LD_LIBRARY_PATH=/data/local/tmp:$LD_LIBRARY_PATH' \ "/data/local/tmp/hexagon_graph_execution" ``` -#### Troubleshooting -If you're using the Open-Q 820 Snapdragon development kit, you may run into an issue with running the executable due to a missing testsig library. From the Hexagon SDK documentation: *Dynamic shared objects are required to be digitally signed and then authenticated at runtime before they are allowed to be loaded and executed.* Generating a testsig library is necessary to run the unsigned sample library built from this project. +### Troubleshooting + +#### Testsig issue + +If you're using the Open-Q 820 Snapdragon Development Kit, you may run into an issue with running the executable due to a missing `testsig` library. From the Hexagon SDK documentation: *Dynamic shared objects are required to be digitally signed and then authenticated at runtime before they are allowed to be loaded and executed.* Generating a testsig library is necessary to run the unsigned sample library built from this project. -If the lack of a testsig library is your problem, you will see errors of the type: +If the lack of a `testsig` library is your problem, you will see errors of the type: `vendor/qcom/proprietary/adsprpc/src/fastrpc_apps_user.c:169::error: -1: 0 == (nErr = remotectl_open(name, (int*)ph, dlerrstr, sizeof(dlerrstr), &dlerr))` -appearing in adb logcat. - -There are several ways to create the testsig library, the only prerequisite is Python and the correct version of the Hexagon-SDK. The following steps is one way to create this library: -1. Run adb as root: `adb root` -2. Run the command `adb shell cat /sys/devices/soc0/serial_number` -3. Convert the decimal number you get as output to hex -4. Run the python script: `python ${QUALCOMM_SDK}/tools/elfsigner/elfsigner.py -t $(SERIAL_NUMBER_HEX_VALUE)` -5. The output of the python script is a shared library stored in ${QUALCOMM_SDK}/tools/elfsigner/output/testsig-$(SERIAL_NUMBER_HEX_VALUE).so -6. Push the shared library to your device: +appearing in `adb logcat` or ["Expected: (version) >= (1), actual: 0 vs 1" while running a binary from adb](https://github.com/tensorflow/tensorflow/issues/11210). + +You need to add a test signature, as described at the beginning of this README. After rebooting your device, you should be able to run the sample application. + +#### Qualcomm SDK Linux installation fails with "Malformed \uxxxx encoding" + +The installation file is based on LaunchAnywhere, which fails in Linux if the `PS1` env variable contains non-common Unicode chars: + ``` -adb root -adb wait-for-device -adb remount -adb wait-for-device -adb shell mkdir /system/lib/rfsa -adb shell mkdir /system/lib/rfsa/adsp -adb push ${QUALCOMM_SDK}/tools/elfsigner/output/testsig-$(SERIAL_NUMBER_HEX_VALUE).so /system/lib/rfsa/adsp/ +Preparing to install... +Extracting the JRE from the installer archive... +Unpacking the JRE... +Extracting the installation resources from the installer archive... +Configuring the installer for this system's environment... + +Launching installer... + +An internal LaunchAnywhere application error has occurred and this application cannot proceed. (LAX) + +Stack Trace: +java.lang.IllegalArgumentException: Malformed \uxxxx encoding. + at java.util.Properties.loadConvert(Properties.java:574) + at java.util.Properties.load0(Properties.java:391) + at java.util.Properties.load(Properties.java:317) + at com.zerog.common.java.util.PropertiesUtil.loadProperties(Unknown Source) + at com.zerog.lax.LAX.<init>(Unknown Source) + at com.zerog.lax.LAX.main(Unknown Source) ``` -After rebooting your device, you should be able to run the sample application. +It can be solved by temporarily assigning the `PS1` environment variable to something simple, such as '$'. + +## Maintainers -Maintainers: -- Satoshi Kataoka (satok@google.com, github.com/satok16) +* Satoshi Kataoka (satok@google.com, github.com/satok16) diff --git a/tensorflow/contrib/kafka/BUILD b/tensorflow/contrib/kafka/BUILD new file mode 100644 index 0000000000..efb403462a --- /dev/null +++ b/tensorflow/contrib/kafka/BUILD @@ -0,0 +1,105 @@ +package( + default_visibility = ["//visibility:private"], +) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +load("//tensorflow:tensorflow.bzl", "tf_gen_op_libs") +load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_py") +load("//tensorflow:tensorflow.bzl", "tf_kernel_library") +load("//tensorflow:tensorflow.bzl", "tf_py_test") + +tf_kernel_library( + name = "kafka_kernels", + srcs = ["kernels/kafka_dataset_ops.cc"], + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core/kernels:bounds_check_lib", + "//tensorflow/core/kernels:dataset", + "//third_party/eigen3", + "@kafka", + ], +) + +tf_gen_op_libs( + op_lib_names = ["kafka_ops"], + deps = [ + "//tensorflow/core:lib", + ], +) + +tf_gen_op_wrapper_py( + name = "gen_kafka_ops", + out = "python/ops/gen_kafka_ops.py", + require_shape_functions = True, + deps = [":kafka_ops_op_lib"], +) + +py_library( + name = "kafka", + srcs = [ + "__init__.py", + "python/ops/kafka_dataset_ops.py", + ], + srcs_version = "PY2AND3", + visibility = ["//visibility:public"], + deps = [ + ":gen_kafka_ops", + "//tensorflow/contrib/util:util_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:control_flow_ops", + "//tensorflow/python:framework", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:platform", + "//tensorflow/python:state_ops", + "//tensorflow/python:training", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/ops:iterator_ops", + "//tensorflow/python/data/ops:readers", + ], +) + +# The Kafka server has to be setup before running the test. +# The Kafka server is setup through Docker so the Docker engine +# has to be installed. +# +# Once the Docker engine is ready: +# To setup the Kafka server: +# $ bash tensorflow/contrib/kafka/python/kernel_tests/kafka_test.sh start kafka +# +# After the test is complete: +# To team down the Kafka server: +# $ bash tensorflow/contrib/kafka/python/kernel_tests/kafka_test.sh stop kafka +tf_py_test( + name = "kafka_test", + srcs = ["python/kernel_tests/kafka_test.py"], + additional_deps = [ + ":kafka", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:platform_test", + ], + tags = [ + "manual", + "notap", + ], +) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/contrib/kafka/__init__.py b/tensorflow/contrib/kafka/__init__.py new file mode 100644 index 0000000000..4d755c4056 --- /dev/null +++ b/tensorflow/contrib/kafka/__init__.py @@ -0,0 +1,32 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Kafka Dataset. + +@@KafkaDataset +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.kafka.python.ops.kafka_dataset_ops import KafkaDataset + +from tensorflow.python.util.all_util import remove_undocumented + +_allowed_symbols = [ + "KafkaDataset", +] + +remove_undocumented(__name__) diff --git a/tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc b/tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc new file mode 100644 index 0000000000..88ef5f3571 --- /dev/null +++ b/tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc @@ -0,0 +1,321 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/kernels/dataset.h" + +#include "tensorflow/core/framework/tensor.h" + +#include "src-cpp/rdkafkacpp.h" + +namespace tensorflow { + +class KafkaDatasetOp : public DatasetOpKernel { + public: + using DatasetOpKernel::DatasetOpKernel; + + void MakeDataset(OpKernelContext* ctx, DatasetBase** output) override { + const Tensor* topics_tensor; + OP_REQUIRES_OK(ctx, ctx->input("topics", &topics_tensor)); + OP_REQUIRES( + ctx, topics_tensor->dims() <= 1, + errors::InvalidArgument("`topics` must be a scalar or a vector.")); + + std::vector<string> topics; + topics.reserve(topics_tensor->NumElements()); + for (int i = 0; i < topics_tensor->NumElements(); ++i) { + topics.push_back(topics_tensor->flat<string>()(i)); + } + + std::string servers = ""; + OP_REQUIRES_OK(ctx, + ParseScalarArgument<std::string>(ctx, "servers", &servers)); + std::string group = ""; + OP_REQUIRES_OK(ctx, ParseScalarArgument<std::string>(ctx, "group", &group)); + bool eof = false; + OP_REQUIRES_OK(ctx, ParseScalarArgument<bool>(ctx, "eof", &eof)); + int64 timeout = -1; + OP_REQUIRES_OK(ctx, ParseScalarArgument<int64>(ctx, "timeout", &timeout)); + OP_REQUIRES(ctx, (timeout > 0), + errors::InvalidArgument( + "Timeout value should be large than 0, got ", timeout)); + *output = new Dataset(ctx, std::move(topics), servers, group, eof, timeout); + } + + private: + class Dataset : public GraphDatasetBase { + public: + Dataset(OpKernelContext* ctx, std::vector<string> topics, + const string& servers, const string& group, const bool eof, + const int64 timeout) + : GraphDatasetBase(ctx), + topics_(std::move(topics)), + servers_(servers), + group_(group), + eof_(eof), + timeout_(timeout) {} + + std::unique_ptr<IteratorBase> MakeIterator( + const string& prefix) const override { + return std::unique_ptr<IteratorBase>( + new Iterator({this, strings::StrCat(prefix, "::Kafka")})); + } + + const DataTypeVector& output_dtypes() const override { + static DataTypeVector* dtypes = new DataTypeVector({DT_STRING}); + return *dtypes; + } + + const std::vector<PartialTensorShape>& output_shapes() const override { + static std::vector<PartialTensorShape>* shapes = + new std::vector<PartialTensorShape>({{}}); + return *shapes; + } + + string DebugString() override { return "KafkaDatasetOp::Dataset"; } + + protected: + Status AsGraphDefInternal(DatasetGraphDefBuilder* b, + Node** output) const override { + Node* topics = nullptr; + TF_RETURN_IF_ERROR(b->AddVector(topics_, &topics)); + Node* servers = nullptr; + TF_RETURN_IF_ERROR(b->AddScalar(servers_, &servers)); + Node* group = nullptr; + TF_RETURN_IF_ERROR(b->AddScalar(group_, &group)); + Node* eof = nullptr; + TF_RETURN_IF_ERROR(b->AddScalar(eof_, &eof)); + Node* timeout = nullptr; + TF_RETURN_IF_ERROR(b->AddScalar(timeout_, &timeout)); + TF_RETURN_IF_ERROR( + b->AddDataset(this, {topics, servers, group, eof, timeout}, output)); + return Status::OK(); + } + + private: + class Iterator : public DatasetIterator<Dataset> { + public: + explicit Iterator(const Params& params) + : DatasetIterator<Dataset>(params) {} + + Status GetNextInternal(IteratorContext* ctx, + std::vector<Tensor>* out_tensors, + bool* end_of_sequence) override { + mutex_lock l(mu_); + do { + // We are currently processing a topic, so try to read the next line. + if (consumer_.get()) { + while (true) { + if (limit_ >= 0 && + (topic_partition_->offset() >= limit_ || offset_ >= limit_)) { + // EOF current topic + break; + } + std::unique_ptr<RdKafka::Message> message( + consumer_->consume(dataset()->timeout_)); + if (message->err() == RdKafka::ERR_NO_ERROR) { + // Produce the line as output. + Tensor line_tensor(cpu_allocator(), DT_STRING, {}); + line_tensor.scalar<string>()() = + std::string(static_cast<const char*>(message->payload()), + message->len()); + out_tensors->emplace_back(std::move(line_tensor)); + *end_of_sequence = false; + // Sync offset + offset_ = message->offset(); + return Status::OK(); + } + + if (message->err() == RdKafka::ERR__PARTITION_EOF && + dataset()->eof_) { + // EOF current topic + break; + } + if (message->err() != RdKafka::ERR__TIMED_OUT) { + return errors::Internal("Failed to consume:", + message->errstr()); + } + message.reset(nullptr); + consumer_->poll(0); + } + + // We have reached the end of the current topic, so maybe + // move on to next topic. + ResetStreamsLocked(); + ++current_topic_index_; + } + + // Iteration ends when there are no more topic to process. + if (current_topic_index_ == dataset()->topics_.size()) { + *end_of_sequence = true; + return Status::OK(); + } + + TF_RETURN_IF_ERROR(SetupStreamsLocked(ctx->env())); + } while (true); + } + + protected: + Status SaveInternal(IteratorStateWriter* writer) override { + mutex_lock l(mu_); + TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("current_topic_index"), + current_topic_index_)); + + // `consumer_` is empty if + // 1. GetNext has not been called even once. + // 2. All topics have been read and iterator has been exhausted. + if (consumer_.get()) { + TF_RETURN_IF_ERROR( + writer->WriteScalar(full_name("current_pos"), offset_)); + } + return Status::OK(); + } + + Status RestoreInternal(IteratorContext* ctx, + IteratorStateReader* reader) override { + mutex_lock l(mu_); + ResetStreamsLocked(); + int64 current_topic_index; + TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("current_topic_index"), + ¤t_topic_index)); + current_topic_index_ = size_t(current_topic_index); + // The key "current_pos" is written only if the iterator was saved + // with an open topic. + if (reader->Contains(full_name("current_pos"))) { + int64 current_pos; + TF_RETURN_IF_ERROR( + reader->ReadScalar(full_name("current_pos"), ¤t_pos)); + + TF_RETURN_IF_ERROR(SetupStreamsLocked(ctx->env())); + topic_partition_->set_offset(current_pos); + if (topic_partition_->offset() != current_pos) { + return errors::Internal("Failed to restore to offset ", + current_pos); + } + offset_ = current_pos; + } + return Status::OK(); + } + + private: + // Sets up Kafka streams to read from the topic at + // `current_topic_index_`. + Status SetupStreamsLocked(Env* env) EXCLUSIVE_LOCKS_REQUIRED(mu_) { + if (current_topic_index_ >= dataset()->topics_.size()) { + return errors::InvalidArgument( + "current_topic_index_:", current_topic_index_, + " >= topics_.size():", dataset()->topics_.size()); + } + + // Actually move on to next topic. + string entry = dataset()->topics_[current_topic_index_]; + + std::vector<string> parts = str_util::Split(entry, ":"); + if (parts.size() < 1) { + return errors::InvalidArgument("Invalid parameters: ", entry); + } + string topic = parts[0]; + int32 partition = 0; + if (parts.size() > 1) { + if (!strings::safe_strto32(parts[1], &partition)) { + return errors::InvalidArgument("Invalid parameters: ", entry); + } + } + int64 offset = 0; + if (parts.size() > 2) { + if (!strings::safe_strto64(parts[2], &offset)) { + return errors::InvalidArgument("Invalid parameters: ", entry); + } + } + + topic_partition_.reset( + RdKafka::TopicPartition::create(topic, partition, offset)); + + offset_ = topic_partition_->offset(); + limit_ = -1; + if (parts.size() > 3) { + if (!strings::safe_strto64(parts[3], &limit_)) { + return errors::InvalidArgument("Invalid parameters: ", entry); + } + } + + std::unique_ptr<RdKafka::Conf> conf( + RdKafka::Conf::create(RdKafka::Conf::CONF_GLOBAL)); + std::unique_ptr<RdKafka::Conf> topic_conf( + RdKafka::Conf::create(RdKafka::Conf::CONF_TOPIC)); + + std::string errstr; + + RdKafka::Conf::ConfResult result = + conf->set("default_topic_conf", topic_conf.get(), errstr); + if (result != RdKafka::Conf::CONF_OK) { + return errors::Internal("Failed to set default_topic_conf:", errstr); + } + + result = conf->set("bootstrap.servers", dataset()->servers_, errstr); + if (result != RdKafka::Conf::CONF_OK) { + return errors::Internal("Failed to set bootstrap.servers ", + dataset()->servers_, ":", errstr); + } + result = conf->set("group.id", dataset()->group_, errstr); + if (result != RdKafka::Conf::CONF_OK) { + return errors::Internal("Failed to set group.id ", dataset()->group_, + ":", errstr); + } + + consumer_.reset(RdKafka::KafkaConsumer::create(conf.get(), errstr)); + if (!consumer_.get()) { + return errors::Internal("Failed to create consumer:", errstr); + } + + std::vector<RdKafka::TopicPartition*> partitions; + partitions.emplace_back(topic_partition_.get()); + RdKafka::ErrorCode err = consumer_->assign(partitions); + if (err != RdKafka::ERR_NO_ERROR) { + return errors::Internal( + "Failed to assign partition [", topic_partition_->topic(), ", ", + topic_partition_->partition(), ", ", topic_partition_->offset(), + "]:", RdKafka::err2str(err)); + } + + return Status::OK(); + } + + // Resets all Kafka streams. + void ResetStreamsLocked() EXCLUSIVE_LOCKS_REQUIRED(mu_) { + consumer_->unassign(); + consumer_->close(); + consumer_.reset(nullptr); + } + + mutex mu_; + size_t current_topic_index_ GUARDED_BY(mu_) = 0; + int64 offset_ GUARDED_BY(mu_) = 0; + int64 limit_ GUARDED_BY(mu_) = -1; + std::unique_ptr<RdKafka::TopicPartition> topic_partition_ GUARDED_BY(mu_); + std::unique_ptr<RdKafka::KafkaConsumer> consumer_ GUARDED_BY(mu_); + }; + + const std::vector<string> topics_; + const std::string servers_; + const std::string group_; + const bool eof_; + const int64 timeout_; + }; +}; + +REGISTER_KERNEL_BUILDER(Name("KafkaDataset").Device(DEVICE_CPU), + KafkaDatasetOp); + +} // namespace tensorflow diff --git a/tensorflow/contrib/kafka/ops/kafka_ops.cc b/tensorflow/contrib/kafka/ops/kafka_ops.cc new file mode 100644 index 0000000000..8cdf16103b --- /dev/null +++ b/tensorflow/contrib/kafka/ops/kafka_ops.cc @@ -0,0 +1,44 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" + +namespace tensorflow { + +REGISTER_OP("KafkaDataset") + .Input("topics: string") + .Input("servers: string") + .Input("group: string") + .Input("eof: bool") + .Input("timeout: int64") + .Output("handle: variant") + .SetIsStateful() + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Creates a dataset that emits the messages of one or more Kafka topics. + +topics: A `tf.string` tensor containing one or more subscriptions, + in the format of [topic:partition:offset:length], + by default length is -1 for unlimited. +servers: A list of bootstrap servers. +group: The consumer group id. +eof: If True, the kafka reader will stop on EOF. +timeout: The timeout value for the Kafka Consumer to wait + (in millisecond). +)doc"); + +} // namespace tensorflow diff --git a/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.py b/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.py new file mode 100644 index 0000000000..621911876f --- /dev/null +++ b/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.py @@ -0,0 +1,115 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. +# ============================================================================== +"""Tests for KafkaDataset.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.kafka.python.ops import kafka_dataset_ops +from tensorflow.python.data.ops import iterator_ops +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors +from tensorflow.python.ops import array_ops +from tensorflow.python.platform import test + + +class KafkaDatasetTest(test.TestCase): + + def setUp(self): + # The Kafka server has to be setup before the test + # and tear down after the test manually. + # The docker engine has to be installed. + # + # To setup the Kafka server: + # $ bash kafka_test.sh start kafka + # + # To team down the Kafka server: + # $ bash kafka_test.sh stop kafka + pass + + def testKafkaDataset(self): + topics = array_ops.placeholder(dtypes.string, shape=[None]) + num_epochs = array_ops.placeholder(dtypes.int64, shape=[]) + batch_size = array_ops.placeholder(dtypes.int64, shape=[]) + + repeat_dataset = kafka_dataset_ops.KafkaDataset( + topics, group="test", eof=True).repeat(num_epochs) + batch_dataset = repeat_dataset.batch(batch_size) + + iterator = iterator_ops.Iterator.from_structure(batch_dataset.output_types) + init_op = iterator.make_initializer(repeat_dataset) + init_batch_op = iterator.make_initializer(batch_dataset) + get_next = iterator.get_next() + + with self.test_session() as sess: + # Basic test: read from topic 0. + sess.run(init_op, feed_dict={topics: ["test:0:0:4"], num_epochs: 1}) + for i in range(5): + self.assertEqual("D" + str(i), sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + # Basic test: read from topic 1. + sess.run(init_op, feed_dict={topics: ["test:0:5:-1"], num_epochs: 1}) + for i in range(5): + self.assertEqual("D" + str(i + 5), sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + # Basic test: read from both topics. + sess.run( + init_op, + feed_dict={ + topics: ["test:0:0:4", "test:0:5:-1"], + num_epochs: 1 + }) + for j in range(2): + for i in range(5): + self.assertEqual("D" + str(i + j * 5), sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + # Test repeated iteration through both files. + sess.run( + init_op, + feed_dict={ + topics: ["test:0:0:4", "test:0:5:-1"], + num_epochs: 10 + }) + for _ in range(10): + for j in range(2): + for i in range(5): + self.assertEqual("D" + str(i + j * 5), sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + # Test batched and repeated iteration through both files. + sess.run( + init_batch_op, + feed_dict={ + topics: ["test:0:0:4", "test:0:5:-1"], + num_epochs: 10, + batch_size: 5 + }) + for _ in range(10): + self.assertAllEqual(["D" + str(i) for i in range(5)], + sess.run(get_next)) + self.assertAllEqual(["D" + str(i + 5) for i in range(5)], + sess.run(get_next)) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.sh b/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.sh new file mode 100644 index 0000000000..adf027b8e7 --- /dev/null +++ b/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +set -e +set -o pipefail + +if [ "$#" -ne 2 ]; then + echo "Usage: $0 start|stop <kafka container name>" >&2 + exit 1 +fi + +container=$2 +if [ "$1" == "start" ]; then + docker run -d --rm --net=host --name=$container spotify/kafka + echo Wait 5 secs until kafka is up and running + sleep 5 + echo Create test topic + docker exec $container bash -c '/opt/kafka_2.11-0.10.1.0/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic test' + echo Create test message + docker exec $container bash -c 'echo -e "D0\nD1\nD2\nD3\nD4\nD5\nD6\nD7\nD8\nD9" > /test' + echo Produce test message + docker exec $container bash -c '/opt/kafka_2.11-0.10.1.0/bin/kafka-console-producer.sh --topic test --broker-list 127.0.0.1:9092 < /test' + + echo Container $container started successfully +elif [ "$1" == "stop" ]; then + docker rm -f $container + + echo Container $container stopped successfully +else + echo "Usage: $0 start|stop <kafka container name>" >&2 + exit 1 +fi + + + diff --git a/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py b/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py new file mode 100644 index 0000000000..8e51d27a34 --- /dev/null +++ b/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py @@ -0,0 +1,74 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Kafka Dataset.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.kafka.python.ops import gen_kafka_ops +from tensorflow.python.data.ops.readers import Dataset +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape + + +class KafkaDataset(Dataset): + """A Kafka Dataset that consumes the message. + """ + + def __init__(self, + topics, + servers="localhost", + group="", + eof=False, + timeout=1000): + """Create a KafkaReader. + + Args: + topics: A `tf.string` tensor containing one or more subscriptions, + in the format of [topic:partition:offset:length], + by default length is -1 for unlimited. + servers: A list of bootstrap servers. + group: The consumer group id. + eof: If True, the kafka reader will stop on EOF. + timeout: The timeout value for the Kafka Consumer to wait + (in millisecond). + """ + super(KafkaDataset, self).__init__() + self._topics = ops.convert_to_tensor( + topics, dtype=dtypes.string, name="topics") + self._servers = ops.convert_to_tensor( + servers, dtype=dtypes.string, name="servers") + self._group = ops.convert_to_tensor( + group, dtype=dtypes.string, name="group") + self._eof = ops.convert_to_tensor(eof, dtype=dtypes.bool, name="eof") + self._timeout = ops.convert_to_tensor( + timeout, dtype=dtypes.int64, name="timeout") + + def _as_variant_tensor(self): + return gen_kafka_ops.kafka_dataset(self._topics, self._servers, self._group, + self._eof, self._timeout) + + @property + def output_classes(self): + return ops.Tensor + + @property + def output_shapes(self): + return tensor_shape.scalar() + + @property + def output_types(self): + return dtypes.string diff --git a/tensorflow/contrib/layers/__init__.py b/tensorflow/contrib/layers/__init__.py index 6c624929f2..ef419862b4 100644 --- a/tensorflow/contrib/layers/__init__.py +++ b/tensorflow/contrib/layers/__init__.py @@ -27,6 +27,7 @@ See the @{$python/contrib.layers} guide. @@convolution2d_transpose @@conv3d_transpose @@convolution3d_transpose +@@dense_to_sparse @@dropout @@elu @@embedding_lookup_unique diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py index 7c52da7b49..1c3af19a6c 100644 --- a/tensorflow/contrib/layers/python/layers/layers.py +++ b/tensorflow/contrib/layers/python/layers/layers.py @@ -29,6 +29,7 @@ from tensorflow.contrib.framework.python.ops import variables from tensorflow.contrib.layers.python.layers import initializers from tensorflow.contrib.layers.python.layers import utils from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import function from tensorflow.python.framework import ops @@ -58,12 +59,12 @@ __all__ = [ 'avg_pool2d', 'avg_pool3d', 'batch_norm', 'bias_add', 'conv2d', 'conv3d', 'conv2d_in_plane', 'conv2d_transpose', 'conv3d_transpose', 'convolution', 'convolution2d', 'convolution2d_in_plane', 'convolution2d_transpose', - 'convolution3d', 'convolution3d_transpose', 'dropout', 'elu', 'flatten', - 'fully_connected', 'GDN', 'gdn', 'layer_norm', 'linear', 'pool', - 'max_pool2d', 'max_pool3d', 'one_hot_encoding', 'relu', 'relu6', 'repeat', - 'scale_gradient', 'separable_conv2d', 'separable_convolution2d', 'softmax', - 'spatial_softmax', 'stack', 'unit_norm', 'legacy_fully_connected', - 'legacy_linear', 'legacy_relu', 'maxout' + 'convolution3d', 'convolution3d_transpose', 'dense_to_sparse', 'dropout', + 'elu', 'flatten', 'fully_connected', 'GDN', 'gdn', 'layer_norm', 'linear', + 'pool', 'max_pool2d', 'max_pool3d', 'one_hot_encoding', 'relu', 'relu6', + 'repeat', 'scale_gradient', 'separable_conv2d', 'separable_convolution2d', + 'softmax', 'spatial_softmax', 'stack', 'unit_norm', + 'legacy_fully_connected', 'legacy_linear', 'legacy_relu', 'maxout' ] DATA_FORMAT_NCHW = 'NCHW' @@ -1401,6 +1402,30 @@ def convolution3d_transpose( @add_arg_scope +def dense_to_sparse(tensor, eos_token=0, outputs_collections=None, scope=None): + """Converts a dense tensor into a sparse tensor. + An example use would be to convert dense labels to sparse ones + so that they can be fed to the ctc_loss. + + Args: + tensor: An `int` `Tensor` to be converted to a `Sparse`. + eos_token: An integer. + It is part of the target label that signfies the end of a sentence. + outputs_collections: Collection to add the outputs. + scope: Optional scope for name_scope. + """ + with variable_scope.variable_scope(scope, 'dense_to_sparse', [tensor]) as sc: + tensor = ops.convert_to_tensor(tensor) + indices = array_ops.where( + math_ops.not_equal(tensor, constant_op.constant(eos_token, + tensor.dtype))) + values = array_ops.gather_nd(tensor, indices) + shape = array_ops.shape(tensor, out_type=dtypes.int64) + outputs = sparse_tensor.SparseTensor(indices, values, shape) + return utils.collect_named_outputs(outputs_collections, sc.name, outputs) + + +@add_arg_scope def dropout(inputs, keep_prob=0.5, noise_shape=None, diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py index 49b23ce8fa..972ff10bf9 100644 --- a/tensorflow/contrib/layers/python/layers/layers_test.py +++ b/tensorflow/contrib/layers/python/layers/layers_test.py @@ -44,6 +44,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import partitioned_variables from tensorflow.python.ops import random_ops +from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import state_ops from tensorflow.python.ops import template from tensorflow.python.ops import variable_scope @@ -1301,6 +1302,19 @@ class ConvolutionInPlaneTest(test.TestCase): self.assertAllClose(result, expected, rtol=1e-5, atol=1e-5) +class DenseToSparseTest(test.TestCase): + + def testDenseFromConstantToSparse(self): + expected_constant = np.reshape(np.arange(24, dtype=np.int64), (3, 4, 2)) + tensor = constant_op.constant(expected_constant) + sparse = _layers.dense_to_sparse(tensor) + dense = sparse_ops.sparse_to_dense(sparse.indices, sparse.dense_shape, + sparse.values) + with self.test_session() as sess: + constant = sess.run(dense) + self.assertAllEqual(expected_constant, constant) + + class DropoutTest(test.TestCase): def testCreateDropout(self): diff --git a/tensorflow/contrib/learn/python/learn/datasets/synthetic.py b/tensorflow/contrib/learn/python/learn/datasets/synthetic.py index 649996c49c..9a843168c2 100644 --- a/tensorflow/contrib/learn/python/learn/datasets/synthetic.py +++ b/tensorflow/contrib/learn/python/learn/datasets/synthetic.py @@ -151,7 +151,7 @@ def spirals(n_samples=100, # Add more points if n_samples is not divisible by n_classes (unbalanced!) extras = n_samples % n_classes if extras > 0: - x_exrta, y_extra = _modes[mode](np.random.rand(extras) * 2 * np.pi, *args, + x_extra, y_extra = _modes[mode](np.random.rand(extras) * 2 * np.pi, *args, **kwargs) spir_x = np.append(spir_x, x_extra) spir_y = np.append(spir_y, y_extra) diff --git a/tensorflow/contrib/learn/python/learn/datasets/synthetic_test.py b/tensorflow/contrib/learn/python/learn/datasets/synthetic_test.py index 613d8d39a3..5809995c8c 100644 --- a/tensorflow/contrib/learn/python/learn/datasets/synthetic_test.py +++ b/tensorflow/contrib/learn/python/learn/datasets/synthetic_test.py @@ -136,6 +136,9 @@ class SyntheticTest(test.TestCase): self.assertRaises(AssertionError, np.testing.assert_array_equal, spir0.data, spir1.data) + def test_spirals_synthetic(self): + synthetic.spirals(3) + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn_test.py b/tensorflow/contrib/learn/python/learn/estimators/dnn_test.py index 12f9bba531..2bd57597c2 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/dnn_test.py +++ b/tensorflow/contrib/learn/python/learn/estimators/dnn_test.py @@ -1224,7 +1224,7 @@ class DNNRegressorTest(test.TestCase): self, predictions, expected_shape): predictions_nparray = np.array(predictions) self.assertAllEqual(expected_shape, predictions_nparray.shape) - self.assertTrue(np.issubdtype(predictions_nparray.dtype, np.float)) + self.assertTrue(np.issubdtype(predictions_nparray.dtype, np.floating)) def testPredict_AsIterableFalse(self): """Tests predict method with as_iterable=False.""" diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl index 0a097d5a69..19829e4991 100644 --- a/tensorflow/contrib/lite/build_def.bzl +++ b/tensorflow/contrib/lite/build_def.bzl @@ -5,25 +5,25 @@ def tflite_copts(): copts = [ "-DFARMHASH_NO_CXX_STRING", ] + select({ - "//tensorflow:android_arm64": [ + str(Label("//tensorflow:android_arm64")): [ "-std=c++11", "-O3", ], - "//tensorflow:android_arm": [ + str(Label("//tensorflow:android_arm")): [ "-mfpu=neon", "-mfloat-abi=softfp", "-std=c++11", "-O3", ], - "//tensorflow:android_x86": [ + str(Label("//tensorflow:android_x86")): [ "-DGEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK", ], - "//tensorflow:ios_x86_64": [ + str(Label("//tensorflow:ios_x86_64")): [ "-msse4.1", ], "//conditions:default": [], }) + select({ - "//tensorflow:with_default_optimizations": [], + str(Label("//tensorflow:with_default_optimizations")): [], "//conditions:default": ["-DGEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK"], }) diff --git a/tensorflow/contrib/lite/examples/label_image/BUILD b/tensorflow/contrib/lite/examples/label_image/BUILD index 476d85c031..959347b549 100644 --- a/tensorflow/contrib/lite/examples/label_image/BUILD +++ b/tensorflow/contrib/lite/examples/label_image/BUILD @@ -42,7 +42,15 @@ cc_library( "bitmap_helpers_impl.h", "label_image.h", ], - deps = ["//tensorflow/contrib/lite:string"], + deps = [ + "//tensorflow/contrib/lite:builtin_op_data", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite:schema_fbs_version", + "//tensorflow/contrib/lite:string", + "//tensorflow/contrib/lite:string_util", + "//tensorflow/contrib/lite/kernels:builtin_ops", + "//tensorflow/contrib/lite/schema:schema_fbs", + ], ) # TODO(ahentz): Test disabled as it has a memory leek from read_bmp diff --git a/tensorflow/contrib/lite/examples/label_image/bitmap_helpers.h b/tensorflow/contrib/lite/examples/label_image/bitmap_helpers.h index 860e27e5ba..97343dde6b 100644 --- a/tensorflow/contrib/lite/examples/label_image/bitmap_helpers.h +++ b/tensorflow/contrib/lite/examples/label_image/bitmap_helpers.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_BITMAP_HELPERS_H -#define TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_BITMAP_HELPERS_H +#ifndef TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_BITMAP_HELPERS_H_ +#define TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_BITMAP_HELPERS_H_ #include "tensorflow/contrib/lite/examples/label_image/bitmap_helpers_impl.h" #include "tensorflow/contrib/lite/examples/label_image/label_image.h" @@ -26,15 +26,15 @@ uint8_t* read_bmp(const std::string& input_bmp_name, int* width, int* height, int* channels, Settings* s); template <class T> -void downsize(T* out, uint8_t* in, int image_height, int image_width, - int image_channels, int wanted_height, int wanted_width, - int wanted_channels, Settings* s); +void resize(T* out, uint8_t* in, int image_height, int image_width, + int image_channels, int wanted_height, int wanted_width, + int wanted_channels, Settings* s); // explicit instantiation -template void downsize<uint8_t>(uint8_t*, unsigned char*, int, int, int, int, - int, int, Settings*); -template void downsize<float>(float*, unsigned char*, int, int, int, int, int, +template void resize<uint8_t>(uint8_t*, unsigned char*, int, int, int, int, int, int, Settings*); +template void resize<float>(float*, unsigned char*, int, int, int, int, int, + int, Settings*); } // namespace label_image } // namespace tflite diff --git a/tensorflow/contrib/lite/examples/label_image/bitmap_helpers_impl.h b/tensorflow/contrib/lite/examples/label_image/bitmap_helpers_impl.h index 64a931082b..d57f597875 100644 --- a/tensorflow/contrib/lite/examples/label_image/bitmap_helpers_impl.h +++ b/tensorflow/contrib/lite/examples/label_image/bitmap_helpers_impl.h @@ -13,8 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_BITMAP_HELPERS_IMPL_H -#define TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_BITMAP_HELPERS_IMPL_H +#ifndef TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_BITMAP_HELPERS_IMPL_H_ +#define TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_BITMAP_HELPERS_IMPL_H_ + +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/string_util.h" +#include "tensorflow/contrib/lite/version.h" #include "tensorflow/contrib/lite/examples/label_image/label_image.h" @@ -22,28 +28,67 @@ namespace tflite { namespace label_image { template <class T> -void downsize(T* out, uint8_t* in, int image_height, int image_width, - int image_channels, int wanted_height, int wanted_width, - int wanted_channels, Settings* s) { - for (int y = 0; y < wanted_height; ++y) { - const int in_y = (y * image_height) / wanted_height; - uint8_t* in_row = in + (in_y * image_width * image_channels); - T* out_row = out + (y * wanted_width * wanted_channels); - for (int x = 0; x < wanted_width; ++x) { - const int in_x = (x * image_width) / wanted_width; - uint8_t* in_pixel = in_row + (in_x * image_channels); - T* out_pixel = out_row + (x * wanted_channels); - for (int c = 0; c < wanted_channels; ++c) { - if (s->input_floating) - out_pixel[c] = (in_pixel[c] - s->input_mean) / s->input_std; - else - out_pixel[c] = in_pixel[c]; - } - } +void resize(T* out, uint8_t* in, int image_height, int image_width, + int image_channels, int wanted_height, int wanted_width, + int wanted_channels, Settings* s) { + int number_of_pixels = image_height * image_width * image_channels; + std::unique_ptr<Interpreter> interpreter(new Interpreter); + + int base_index = 0; + + // two inputs: input and new_sizes + interpreter->AddTensors(2, &base_index); + // one output + interpreter->AddTensors(1, &base_index); + // set input and output tensors + interpreter->SetInputs({0, 1}); + interpreter->SetOutputs({2}); + + // set parameters of tensors + TfLiteQuantizationParams quant; + interpreter->SetTensorParametersReadWrite( + 0, kTfLiteFloat32, "input", + {1, image_height, image_width, image_channels}, quant); + interpreter->SetTensorParametersReadWrite(1, kTfLiteInt32, "new_size", {2}, + quant); + interpreter->SetTensorParametersReadWrite( + 2, kTfLiteFloat32, "output", + {1, wanted_height, wanted_width, wanted_channels}, quant); + + ops::builtin::BuiltinOpResolver resolver; + TfLiteRegistration* resize_op = + resolver.FindOp(BuiltinOperator_RESIZE_BILINEAR); + interpreter->AddNodeWithParameters({0, 1}, {2}, nullptr, 0, nullptr, + resize_op, nullptr); + + interpreter->AllocateTensors(); + + // fill input image + // in[] are integers, cannot do memcpy() directly + auto input = interpreter->typed_tensor<float>(0); + for (int i = 0; i < number_of_pixels; i++) { + input[i] = in[i]; + } + + // fill new_sizes + interpreter->typed_tensor<int>(1)[0] = wanted_height; + interpreter->typed_tensor<int>(1)[1] = wanted_width; + + interpreter->Invoke(); + + auto output = interpreter->typed_tensor<float>(2); + auto output_number_of_pixels = + wanted_height * wanted_height * wanted_channels; + + for (int i = 0; i < output_number_of_pixels; i++) { + if (s->input_floating) + out[i] = (output[i] - s->input_mean) / s->input_std; + else + out[i] = (uint8_t)output[i]; } } } // namespace label_image } // namespace tflite -#endif // TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_BITMAP_HELPERS_IMPL_H +#endif // TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_BITMAP_HELPERS_IMPL_H_ diff --git a/tensorflow/contrib/lite/examples/label_image/label_image.cc b/tensorflow/contrib/lite/examples/label_image/label_image.cc index 4d2e1ce0bc..a91467d345 100644 --- a/tensorflow/contrib/lite/examples/label_image/label_image.cc +++ b/tensorflow/contrib/lite/examples/label_image/label_image.cc @@ -148,14 +148,22 @@ void RunInference(Settings* s) { int wanted_width = dims->data[2]; int wanted_channels = dims->data[3]; - if (s->input_floating) { - downsize<float>(interpreter->typed_tensor<float>(input), in, image_height, + switch (interpreter->tensor(input)->type) { + case kTfLiteFloat32: + s->input_floating = true; + resize<float>(interpreter->typed_tensor<float>(input), in, image_height, image_width, image_channels, wanted_height, wanted_width, wanted_channels, s); - } else { - downsize<uint8_t>(interpreter->typed_tensor<uint8_t>(input), in, + break; + case kTfLiteUInt8: + resize<uint8_t>(interpreter->typed_tensor<uint8_t>(input), in, image_height, image_width, image_channels, wanted_height, wanted_width, wanted_channels, s); + break; + default: + LOG(FATAL) << "cannot handle input type " + << interpreter->tensor(input)->type << " yet"; + exit(-1); } struct timeval start_time, stop_time; @@ -177,13 +185,21 @@ void RunInference(Settings* s) { std::vector<std::pair<float, int>> top_results; - if (s->input_floating) { - get_top_n<float>(interpreter->typed_output_tensor<float>(0), output_size, - num_results, threshold, &top_results, s->input_floating); - } else { - get_top_n<uint8_t>(interpreter->typed_output_tensor<uint8_t>(0), - output_size, num_results, threshold, &top_results, - s->input_floating); + int output = interpreter->outputs()[0]; + switch (interpreter->tensor(output)->type) { + case kTfLiteFloat32: + get_top_n<float>(interpreter->typed_output_tensor<float>(0), output_size, + num_results, threshold, &top_results, true); + break; + case kTfLiteUInt8: + get_top_n<uint8_t>(interpreter->typed_output_tensor<uint8_t>(0), + output_size, num_results, threshold, &top_results, + false); + break; + default: + LOG(FATAL) << "cannot handle output type " + << interpreter->tensor(input)->type << " yet"; + exit(-1); } std::vector<string> labels; @@ -203,13 +219,11 @@ void display_usage() { LOG(INFO) << "label_image\n" << "--accelerated, -a: [0|1], use Android NNAPI or note\n" << "--count, -c: loop interpreter->Invoke() for certain times\n" - << "--input_floating, -f: [0|1] type of input layer is floating " - "point numbers\n" << "--input_mean, -b: input mean\n" << "--input_std, -s: input standard deviation\n" << "--image, -i: image_name.bmp\n" << "--labels, -l: labels for the model\n" - << "--tflite_mode, -m: model_name.tflite\n" + << "--tflite_model, -m: model_name.tflite\n" << "--threads, -t: number of threads\n" << "--verbose, -v: [0|1] print more information\n" << "\n"; @@ -223,7 +237,6 @@ int Main(int argc, char** argv) { static struct option long_options[] = { {"accelerated", required_argument, 0, 'a'}, {"count", required_argument, 0, 'c'}, - {"input_floating", required_argument, 0, 'f'}, {"verbose", required_argument, 0, 'v'}, {"image", required_argument, 0, 'i'}, {"labels", required_argument, 0, 'l'}, @@ -254,11 +267,6 @@ int Main(int argc, char** argv) { s.loop_count = strtol( // NOLINT(runtime/deprecated_fn) optarg, (char**)NULL, 10); break; - case 'f': - s.input_floating = strtol( // NOLINT(runtime/deprecated_fn) - optarg, (char**)NULL, 10); - s.input_layer_type = "float"; - break; case 'i': s.input_bmp_name = optarg; break; diff --git a/tensorflow/contrib/lite/examples/label_image/label_image.h b/tensorflow/contrib/lite/examples/label_image/label_image.h index ce98e06fc1..4de32e33fb 100644 --- a/tensorflow/contrib/lite/examples/label_image/label_image.h +++ b/tensorflow/contrib/lite/examples/label_image/label_image.h @@ -16,9 +16,11 @@ limitations under the License. #ifndef TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_LABEL_IMAGE_H #define TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_LABEL_IMAGE_H -#include <string> #include "tensorflow/contrib/lite/string.h" +namespace tflite { +namespace label_image { + struct Settings { bool verbose = false; bool accel = false; @@ -33,4 +35,7 @@ struct Settings { int number_of_threads = 4; }; +} // namespace label_image +} // namespace tflite + #endif // TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_LABEL_IMAGE_H diff --git a/tensorflow/contrib/lite/examples/label_image/label_image.md b/tensorflow/contrib/lite/examples/label_image/label_image.md index d6019d673f..9ce32cf101 100644 --- a/tensorflow/contrib/lite/examples/label_image/label_image.md +++ b/tensorflow/contrib/lite/examples/label_image/label_image.md @@ -1,8 +1,12 @@ label_image for TensorFlow Lite inspired by TensorFlow's label_image. + +To build label_image for Android, run $TENSORFLOW_ROOT/configure +and set Android NDK or configure NDK setting in +$TENSORFLOW_ROOT/WORKSPACE first. To build it for android ARMv8: ``` -> bazel build --cxxopt=-std=c++11 \ +> bazel build --config monolithic --cxxopt=-std=c++11 \ --crosstool_top=//external:android/crosstool \ --host_crosstool_top=@bazel_tools//tools/cpp:toolchain \ --cpu=arm64-v8a \ @@ -10,13 +14,13 @@ To build it for android ARMv8: ``` or ``` -> bazel build --config android_arm64 --cxxopt=-std=c++11 \ +> bazel build --config android_arm64 --config monolithic --cxxopt=-std=c++11 \ //tensorflow/contrib/lite/examples/label_image:label_image ``` To build it for android arm-v7a: ``` -> bazel build --cxxopt=-std=c++11 \ +> bazel build --config monolithic --cxxopt=-std=c++11 \ --crosstool_top=//external:android/crosstool \ --host_crosstool_top=@bazel_tools//tools/cpp:toolchain \ --cpu=armeabi-v7a \ @@ -24,7 +28,7 @@ To build it for android arm-v7a: ``` or ``` -> bazel build --config android_arm --cxxopt=-std=c++11 \ +> bazel build --config android_arm --config monolithic --cxxopt=-std=c++11 \ //tensorflow/contrib/lite/examples/label_image:label_image ``` diff --git a/tensorflow/contrib/lite/kernels/internal/BUILD b/tensorflow/contrib/lite/kernels/internal/BUILD index 4691a543e9..a6ccc99a51 100644 --- a/tensorflow/contrib/lite/kernels/internal/BUILD +++ b/tensorflow/contrib/lite/kernels/internal/BUILD @@ -278,6 +278,8 @@ cc_library( "optimized/neon_tensor_utils.cc", ], hdrs = [ + "common.h", + "optimized/cpu_check.h", "optimized/neon_tensor_utils.h", "optimized/tensor_utils_impl.h", ], @@ -285,8 +287,11 @@ cc_library( deps = [ ":cpu_check", ":portable_tensor_utils", + ":types", "//tensorflow/contrib/lite:builtin_op_data", "//tensorflow/contrib/lite/kernels:activation_functor", + "@arm_neon_2_x86_sse", + "@gemmlowp", ], ) @@ -306,14 +311,21 @@ cc_library( "tensor_utils.cc", ], hdrs = [ + "common.h", + "compatibility.h", + "optimized/cpu_check.h", + "optimized/neon_tensor_utils.h", "optimized/tensor_utils_impl.h", "reference/portable_tensor_utils.h", "tensor_utils.h", + "types.h", ], copts = NEON_FLAGS_IF_APPLICABLE, deps = [ "//tensorflow/contrib/lite/kernels:activation_functor", "//tensorflow/contrib/lite:builtin_op_data", + "@arm_neon_2_x86_sse", + "@gemmlowp", ] + select({ ":arm": [ ":neon_tensor_utils", @@ -333,6 +345,18 @@ cc_library( ":ios_arm64": [ ":neon_tensor_utils", ], + ":x86_64": [ + ":neon_tensor_utils", + ], + ":x86": [ + ":neon_tensor_utils", + ], + ":k8": [ + ":neon_tensor_utils", + ], + ":darwin": [ + ":neon_tensor_utils", + ], "//conditions:default": [ ":portable_tensor_utils", ], diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/cpu_check.h b/tensorflow/contrib/lite/kernels/internal/optimized/cpu_check.h index 6cb556bf45..3a53d3ab07 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/cpu_check.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/cpu_check.h @@ -34,7 +34,7 @@ inline bool TestCPUFeatureNeon() { #endif // __aarch64__ } -#elif __ARM_NEON +#elif defined USE_NEON || defined __ARM_NEON inline bool TestCPUFeatureNeon() { return true; } diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc b/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc index bf0bdfb1fb..883c7f270d 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc +++ b/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc @@ -16,11 +16,11 @@ limitations under the License. #include "tensorflow/contrib/lite/builtin_op_data.h" #include "tensorflow/contrib/lite/kernels/activation_functor.h" +#include "tensorflow/contrib/lite/kernels/internal/common.h" #include "tensorflow/contrib/lite/kernels/internal/optimized/tensor_utils_impl.h" #ifdef USE_NEON -#include <arm_neon.h> #define kFloatWeightsPerNeonLane 4 namespace tflite { diff --git a/tensorflow/contrib/lite/kernels/internal/tensor_utils.cc b/tensorflow/contrib/lite/kernels/internal/tensor_utils.cc index 904a97803a..f4181b18a8 100644 --- a/tensorflow/contrib/lite/kernels/internal/tensor_utils.cc +++ b/tensorflow/contrib/lite/kernels/internal/tensor_utils.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ #include "tensorflow/contrib/lite/kernels/internal/tensor_utils.h" +#include "tensorflow/contrib/lite/kernels/internal/common.h" #ifndef USE_NEON #if defined(__ARM_NEON__) || defined(__ARM_NEON) diff --git a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h index 7019c29959..76032771af 100644 --- a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h +++ b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h @@ -1571,7 +1571,7 @@ inline int ANeuralNetworksModel_addOperation(ANeuralNetworksModel* model, } /** - * Specfifies which operands will be the model's inputs and outputs. + * Specifies which operands will be the model's inputs and outputs. * * An operand cannot be used for both input and output. Doing so will * return an error. diff --git a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc index 2340f0e850..6961e23690 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc @@ -132,6 +132,7 @@ bool GraphTransformationsPass(int increment, Model* model, CHECK(increment == 1 || increment == -1); bool changed = false; if (model->operators.empty()) { + LOG(INFO) << "Model is empty!!!"; return false; } int op_index = increment == 1 ? 0 : model->operators.size() - 1; diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc index 833c97c758..e79e2a32fc 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc @@ -189,7 +189,10 @@ bool ResolveConstantConcatenation::Run(Model* model, std::size_t op_index) { // Remove all the resolved arrays. for (const string& input_name : concat_op->inputs) { - model->EraseArray(input_name); + // Check to prevent removal of shared tensors + if (CountOpsWithInput(*model, input_name) == 1) { + model->EraseArray(input_name); + } } // Remove concatenate operator diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h index 8f12bc59fb..0bee694387 100644 --- a/tensorflow/contrib/lite/toco/model.h +++ b/tensorflow/contrib/lite/toco/model.h @@ -15,6 +15,7 @@ limitations under the License. #ifndef TENSORFLOW_CONTRIB_LITE_TOCO_MODEL_H_ #define TENSORFLOW_CONTRIB_LITE_TOCO_MODEL_H_ +#include <functional> #include <initializer_list> #include <memory> #include <string> diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc index 1add90fb82..ce0fde57f4 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.cc +++ b/tensorflow/contrib/lite/toco/tooling_util.cc @@ -698,10 +698,11 @@ void CheckNonExistentIOArrays(const Model& model) { void CheckNoMissingArray(const Model& model) { for (const auto& op : model.operators) { for (const auto& input : op->inputs) { - CHECK(model.HasArray(input) || model.optional_arrays.count(input)); + CHECK(model.HasArray(input) || model.optional_arrays.count(input)) + << "Input: " << input << " missing for op: " << op->outputs[0] << "."; } for (const auto& output : op->outputs) { - CHECK(model.HasArray(output)); + CHECK(model.HasArray(output)) << "Output: " << output << " missing."; } } CheckNonExistentIOArrays(model); diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile index dd5770dc99..81327407d4 100644 --- a/tensorflow/contrib/makefile/Makefile +++ b/tensorflow/contrib/makefile/Makefile @@ -377,10 +377,10 @@ $(MARCH_OPTION) \ ifeq ($(BUILD_FOR_TEGRA),1) NVCC := $(JETPACK)/cuda/bin/nvcc - NVCCFLAGS := -x=cu -D__CUDACC__ -DNVCC -DNVIDIA_TEGRA -ccbin $(NDK_ROOT)/toolchains/$(TOOLCHAIN)/prebuilt/$(ANDROID_HOST_OS_ARCH)/bin/$(BIN_PREFIX)-g++ --std c++11 --expt-relaxed-constexpr -m64 -gencode arch=compute_53,\"code=sm_53\" -gencode arch=compute_62,\"code=sm_62\" -DEIGEN_AVOID_STL_ARRAY -DTENSORFLOW_USE_EIGEN_THREADPOOL -DLANG_CXX11 -DEIGEN_HAS_C99_MATH -DGOOGLE_CUDA=1 -DTF_EXTRA_CUDA_CAPABILITIES=5.3 + NVCCFLAGS := -x=cu -D__CUDACC__ -DNVCC -DANDROID_TEGRA -ccbin $(NDK_ROOT)/toolchains/$(TOOLCHAIN)/prebuilt/$(ANDROID_HOST_OS_ARCH)/bin/$(BIN_PREFIX)-g++ --std c++11 --expt-relaxed-constexpr -m64 -gencode arch=compute_53,\"code=sm_53\" -gencode arch=compute_62,\"code=sm_62\" -DEIGEN_AVOID_STL_ARRAY -DTENSORFLOW_USE_EIGEN_THREADPOOL -DLANG_CXX11 -DEIGEN_HAS_C99_MATH -DGOOGLE_CUDA=1 -DTF_EXTRA_CUDA_CAPABILITIES=5.3 CXXFLAGS4NVCC =\ -DIS_SLIM_BUILD \ --DNVIDIA_TEGRA \ +-DANDROID_TEGRA \ -fno-exceptions \ -DNDEBUG $(OPTFLAGS) \ -march=armv8-a \ @@ -391,7 +391,7 @@ $(MARCH_OPTION) \ CXXFLAGS +=\ -DGOOGLE_CUDA=1 \ -D__ANDROID_TYPES_FULL__ \ --DNVIDIA_TEGRA \ +-DANDROID_TEGRA \ -DEIGEN_AVOID_STL_ARRAY \ -DEIGEN_HAS_C99_MATH \ -DLANG_CXX11 -DTENSORFLOW_USE_EIGEN_THREADPOOL -DTF_EXTRA_CUDA_CAPABILITIES=5.3 @@ -407,7 +407,7 @@ $(MARCH_OPTION) \ -I$(JETPACK)/cuda/extras/CUPTI/include - LIBS += \ + CUDA_LIBS := \ -ltfcuda \ -lcudart_static \ -lcudnn \ @@ -420,10 +420,10 @@ $(MARCH_OPTION) \ -lculibos \ -lcurand_static - OBJDIR := $(OBJDIR)Tegra/ - LIBDIR := $(LIBDIR)Tegra/ - BINDIR := $(BINDIR)Tegra/ - DEPDIR := $(DEPDIR)Tegra/ + OBJDIR := $(OBJDIR)android_arm64-v8a/ + LIBDIR := $(LIBDIR)android_arm64-v8a/ + BINDIR := $(BINDIR)android_arm64-v8a/ + DEPDIR := $(DEPDIR)android_arm64-v8a/ TEGRA_LIBS := \ -L$(JETPACK)/cuda/targets/aarch64-linux-androideabi/lib \ @@ -606,7 +606,8 @@ $(wildcard tensorflow/core/util/*/*.cc) \ tensorflow/core/util/version_info.cc # Remove duplicates (for version_info.cc) CORE_CC_ALL_SRCS := $(sort $(CORE_CC_ALL_SRCS)) -CORE_CC_EXCLUDE_SRCS := \ + +CORE_CC_EXCLUDE_SRCS_NON_GPU := \ $(wildcard tensorflow/core/*/*test.cc) \ $(wildcard tensorflow/core/*/*testutil*) \ $(wildcard tensorflow/core/*/*testlib*) \ @@ -626,49 +627,31 @@ $(wildcard tensorflow/core/lib/jpeg/*) \ $(wildcard tensorflow/core/lib/png/*) \ $(wildcard tensorflow/core/util/events_writer.*) \ $(wildcard tensorflow/core/util/reporter.*) \ -$(wildcard tensorflow/core/platform/default/cuda_libdevice_path.*) \ -$(wildcard tensorflow/core/platform/default/stream_executor.*) \ $(wildcard tensorflow/core/platform/default/test_benchmark.*) \ -$(wildcard tensorflow/core/platform/cuda.h) \ -$(wildcard tensorflow/core/platform/cuda_libdevice_path.*) \ $(wildcard tensorflow/core/platform/cloud/*) \ $(wildcard tensorflow/core/platform/google/*) \ $(wildcard tensorflow/core/platform/google/*/*) \ $(wildcard tensorflow/core/platform/jpeg.*) \ $(wildcard tensorflow/core/platform/png.*) \ $(wildcard tensorflow/core/platform/s3/*) \ -$(wildcard tensorflow/core/platform/stream_executor.*) \ $(wildcard tensorflow/core/platform/windows/*) \ -$(wildcard tensorflow/core/user_ops/*.cu.cc) \ -$(wildcard tensorflow/core/common_runtime/gpu/*) \ -$(wildcard tensorflow/core/common_runtime/gpu_device_factory.*) \ $(wildcard tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.*) \ $(wildcard tensorflow/core/grappler/inputs/file_input_yielder.*) \ -$(wildcard tensorflow/core/grappler/clusters/single_machine.*) +$(wildcard tensorflow/core/grappler/clusters/single_machine.*) \ +tensorflow/core/util/cuda_kernel_helper_test.cu.cc + +CORE_CC_EXCLUDE_SRCS := \ +$(CORE_CC_EXCLUDE_SRCS_NON_GPU) \ +$(wildcard tensorflow/core/platform/stream_executor.*) \ +$(wildcard tensorflow/core/platform/default/cuda_libdevice_path.*) \ +$(wildcard tensorflow/core/platform/cuda.h) \ +$(wildcard tensorflow/core/platform/cuda_libdevice_path.*) \ +$(wildcard tensorflow/core/user_ops/*.cu.cc) \ +$(wildcard tensorflow/core/common_runtime/gpu/*) \ +$(wildcard tensorflow/core/common_runtime/gpu_device_factory.*) ifeq ($(BUILD_FOR_TEGRA),1) -CORE_CC_ALL_SRCS := \ -$(wildcard tensorflow/core/*.cc) \ -$(wildcard tensorflow/core/common_runtime/*.cc) \ -$(wildcard tensorflow/core/common_runtime/gpu/*.cc) \ -$(wildcard tensorflow/core/framework/*.cc) \ -$(wildcard tensorflow/core/graph/*.cc) \ -$(wildcard tensorflow/core/platform/*.cc) \ -$(wildcard tensorflow/core/platform/*/*.cc) \ -$(wildcard tensorflow/core/platform/*/*/*.cc) \ -$(wildcard tensorflow/core/util/*.cc) \ -$(wildcard tensorflow/core/util/*/*.cc) \ -$(wildcard tensorflow/cc/training/*.cc) \ -$(wildcard tensorflow/stream_executor/*.cc) \ -$(wildcard tensorflow/stream_executor/*/*.cc) \ -$(wildcard tensorflow/core/grappler/optimizers/*.cc) \ -$(wildcard tensorflow/core/grappler/*.cc) \ -$(wildcard tensorflow/core/grappler/costs/*.cc) \ -$(wildcard tensorflow/core/grappler/clusters/*.cc) \ -$(wildcard tensorflow/core/grappler/utils/*.cc) \ -$(wildcard tensorflow/core/lib/core/*.cc) \ -$(wildcard tensorflow/core/lib/*/*.cc) \ -tensorflow/core/grappler/inputs/utils.cc \ +CORE_CC_ALL_SRCS := $(CORE_CC_ALL_SRCS) \ tensorflow/core/kernels/concat_lib_gpu.cc \ tensorflow/core/kernels/cuda_solvers.cc \ tensorflow/core/kernels/cudnn_pooling_gpu.cc \ @@ -677,28 +660,14 @@ tensorflow/core/kernels/fractional_avg_pool_op.cc \ tensorflow/core/kernels/fractional_max_pool_op.cc \ tensorflow/core/kernels/fractional_pool_common.cc \ tensorflow/core/kernels/pooling_ops_3d.cc \ -tensorflow/core/kernels/sparse_fill_empty_rows_op.cc +tensorflow/core/kernels/sparse_fill_empty_rows_op.cc \ +tensorflow/core/kernels/list_kernels.cc \ +$(wildcard tensorflow/core/common_runtime/gpu/*.cc) \ +$(wildcard tensorflow/stream_executor/*.cc) \ +$(wildcard tensorflow/stream_executor/*/*.cc) CORE_CC_EXCLUDE_SRCS := \ -$(wildcard tensorflow/core/*/*test.cc) \ -$(wildcard tensorflow/core/*/*testutil*) \ -$(wildcard tensorflow/core/*/*testlib*) \ -$(wildcard tensorflow/core/*/*/*test.cc) \ -$(wildcard tensorflow/core/*/*/*testutil*) \ -$(wildcard tensorflow/core/framework/op_gen_lib.cc) \ -$(wildcard tensorflow/core/lib/gif/*) \ -$(wildcard tensorflow/core/lib/jpeg/*) \ -$(wildcard tensorflow/core/lib/png/*) \ -$(wildcard tensorflow/core/lib/db/*) \ -$(wildcard tensorflow/core/platform/jpeg.*) \ -$(wildcard tensorflow/core/platform/png.*) \ -$(wildcard tensorflow/core/platform/cloud/*) \ -$(wildcard tensorflow/core/platform/s3/*) \ -$(wildcard tensorflow/core/platform/windows/*) \ -$(wildcard tensorflow/core/*/*/*testlib*) \ -$(wildcard tensorflow/cc/training/*test.cc) \ -tensorflow/core/lib/io/record_reader.cc \ -tensorflow/core/util/cuda_kernel_helper_test.cu.cc +$(CORE_CC_EXCLUDE_SRCS_NON_GPU) CUDA_CC_SRCS := $(wildcard tensorflow/core/kernels/*.cu.cc) CUDA_CC_OBJS := $(addprefix $(OBJDIR), $(CUDA_CC_SRCS:.cc=.o)) @@ -760,7 +729,7 @@ $(BENCHMARK_NAME): $(BENCHMARK_OBJS) $(LIB_PATH) $(CUDA_LIB_DEPS) @mkdir -p $(dir $@) $(CXX) $(CXXFLAGS) $(INCLUDES) \ -o $(BENCHMARK_NAME) $(BENCHMARK_OBJS) \ - $(LIBFLAGS) $(TEGRA_LIBS) $(LIB_PATH) $(LDFLAGS) $(LIBS) + $(LIBFLAGS) $(TEGRA_LIBS) $(LIB_PATH) $(LDFLAGS) $(LIBS) $(CUDA_LIBS) # NVCC compilation rules for Tegra ifeq ($(BUILD_FOR_TEGRA),1) diff --git a/tensorflow/contrib/makefile/build_all_android.sh b/tensorflow/contrib/makefile/build_all_android.sh index 980a44a595..f67c516186 100755 --- a/tensorflow/contrib/makefile/build_all_android.sh +++ b/tensorflow/contrib/makefile/build_all_android.sh @@ -18,7 +18,7 @@ set -e usage() { - echo "Usage: NDK_ROOT=<path to ndk root> $(basename "$0") [-Es:t:Tx:a:X]" + echo "Usage: NDK_ROOT=<path to ndk root> $(basename "$0") [-Es:t:Tx:a]" echo "-E enable experimental hexnn ops" echo "-s [sub_makefiles] sub makefiles separated by white space" echo "-t [build_target] build target for Android makefile [default=all]" diff --git a/tensorflow/contrib/makefile/build_all_ios.sh b/tensorflow/contrib/makefile/build_all_ios.sh index a18df256f9..2d99791839 100755 --- a/tensorflow/contrib/makefile/build_all_ios.sh +++ b/tensorflow/contrib/makefile/build_all_ios.sh @@ -96,7 +96,7 @@ if [[ "${ONLY_MAKE_TENSORFLOW}" != "true" ]]; then if [[ -z "${BUILD_ARCH}" ]]; then # Compile protobuf for the target iOS device architectures. - tensorflow/contrib/makefile/compile_ios_protobuf.sh -a ${DEFAULT_ARCH} + tensorflow/contrib/makefile/compile_ios_protobuf.sh else # Compile protobuf for the target iOS device architectures. tensorflow/contrib/makefile/compile_ios_protobuf.sh -a ${BUILD_ARCH} diff --git a/tensorflow/contrib/makefile/samples/build_and_run_inception_hexagon.sh b/tensorflow/contrib/makefile/samples/build_and_run_inception_hexagon.sh index 861bb885c7..203ff4f890 100755 --- a/tensorflow/contrib/makefile/samples/build_and_run_inception_hexagon.sh +++ b/tensorflow/contrib/makefile/samples/build_and_run_inception_hexagon.sh @@ -76,6 +76,8 @@ GEN_LIBS_DIR="${GEN_DIR}/libs" GEN_DOWNLOAD_DIR="${GEN_DIR}/downloads" URL_BASE="https://storage.googleapis.com/download.tensorflow.org" +ARCH="armeabi-v7a" + source "${SCRIPT_DIR}/../build_helper.subr" rm -rf "${GEN_DIR}" @@ -219,7 +221,7 @@ if [[ "${BUILD_ONLY}" != "true" ]]; then adb push "${GEN_LIBS_DIR}/libhexagon_nn_skel.so" "/vendor/lib/rfsa/adsp" adb push -p \ - "${TF_ROOT_DIR}/tensorflow/contrib/makefile/gen/bin/hexagon_graph_execution" \ + "${TF_ROOT_DIR}/tensorflow/contrib/makefile/gen/bin/android_${ARCH}/hexagon_graph_execution" \ "/data/local/tmp/" adb wait-for-device adb shell chmod "${ANDROID_EXEC_FILE_MODE}" \ diff --git a/tensorflow/contrib/makefile/sub_makefiles/android/Makefile.in b/tensorflow/contrib/makefile/sub_makefiles/android/Makefile.in index d9277ed60c..3081084ee7 100644 --- a/tensorflow/contrib/makefile/sub_makefiles/android/Makefile.in +++ b/tensorflow/contrib/makefile/sub_makefiles/android/Makefile.in @@ -54,7 +54,7 @@ $(INFERENCE_SO_PATH): $(LIB_OBJS) $(INFERENCE_OBJS) $(CUDA_LIB_DEPS) -o $@ $(INFERENCE_OBJS) $(LIB_OBJS) $(TEGRA_LIBS) \ $(LIBFLAGS) $(LDFLAGS) \ -shared -Wl,-soname,$(INFERENCE_SO_NAME) \ - $(LIBS) + $(LIBS) $(CUDA_LIBS) $(INFERENCE_SO_NAME): $(INFERENCE_SO_PATH) diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt index 5f27566398..5a812af4e9 100644 --- a/tensorflow/contrib/makefile/tf_op_files.txt +++ b/tensorflow/contrib/makefile/tf_op_files.txt @@ -91,6 +91,7 @@ tensorflow/core/kernels/reduction_ops_max.cc tensorflow/core/kernels/reduction_ops_common.cc tensorflow/core/kernels/reduction_ops_any.cc tensorflow/core/kernels/reduction_ops_all.cc +tensorflow/core/kernels/roll_op.cc tensorflow/core/kernels/queue_ops.cc tensorflow/core/kernels/queue_base.cc tensorflow/core/kernels/pooling_ops_common.cc @@ -270,6 +271,7 @@ tensorflow/core/ops/parsing_ops.cc tensorflow/core/ops/no_op.cc tensorflow/core/ops/nn_ops.cc tensorflow/core/ops/nn_grad.cc +tensorflow/core/ops/manip_ops.cc tensorflow/core/ops/math_ops.cc tensorflow/core/ops/math_grad.cc tensorflow/core/ops/logging_ops.cc @@ -291,3 +293,4 @@ tensorflow/core/kernels/batchtospace_op.cc tensorflow/core/kernels/warn_about_ints.cc tensorflow/core/kernels/segment_reduction_ops.cc tensorflow/core/kernels/batch_util.cc +tensorflow/core/ops/audio_ops.cc diff --git a/tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc b/tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc index c2c42b8ed7..6a7f5efecd 100644 --- a/tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc +++ b/tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc @@ -151,7 +151,7 @@ MPIRemoteRendezvous::~MPIRemoteRendezvous() {} void MPIRendezvousMgr::AddRequest(RecvTensorRequest request, const int mpi_dst) { TF_CHECK_OK(recv_tensor_recent_request_ids_.TrackUnique( - req.request_id(), "RecvTensor (MPIRendezvousMgr)", req)); + request.request_id(), "RecvTensor (MPIRendezvousMgr)", request)); const int64 step_id = request.step_id(); const std::string& key = request.rendezvous_key(); Rendezvous::ParsedKey parsed; diff --git a/tensorflow/contrib/mpi/mpi_rendezvous_mgr.h b/tensorflow/contrib/mpi/mpi_rendezvous_mgr.h index e665922135..5596601ddb 100644 --- a/tensorflow/contrib/mpi/mpi_rendezvous_mgr.h +++ b/tensorflow/contrib/mpi/mpi_rendezvous_mgr.h @@ -33,6 +33,7 @@ limitations under the License. #include "tensorflow/contrib/mpi/mpi_msg.pb.h" #include "tensorflow/contrib/mpi/mpi_utils.h" #include "tensorflow/core/distributed_runtime/base_rendezvous_mgr.h" +#include "tensorflow/core/distributed_runtime/recent_request_ids.h" #include "tensorflow/core/distributed_runtime/request_id.h" #include "tensorflow/core/distributed_runtime/worker_env.h" #include "tensorflow/core/protobuf/worker.pb.h" diff --git a/tensorflow/contrib/ndlstm/__init__.py b/tensorflow/contrib/ndlstm/__init__.py index 52e83069cb..a5dd100b26 100644 --- a/tensorflow/contrib/ndlstm/__init__.py +++ b/tensorflow/contrib/ndlstm/__init__.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== +"""Library of multidimensional LSTM models and related code.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function + +from tensorflow.contrib.ndlstm.python import lstm1d +from tensorflow.contrib.ndlstm.python import lstm2d diff --git a/tensorflow/contrib/ndlstm/python/lstm1d.py b/tensorflow/contrib/ndlstm/python/lstm1d.py index d3c3531f40..2e2e9086c0 100644 --- a/tensorflow/contrib/ndlstm/python/lstm1d.py +++ b/tensorflow/contrib/ndlstm/python/lstm1d.py @@ -22,7 +22,6 @@ from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.contrib.framework.python.ops import variables from tensorflow.python.framework import constant_op from tensorflow.python.ops import array_ops -from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import rnn @@ -85,18 +84,11 @@ def ndlstm_base_dynamic(inputs, noutput, scope=None, reverse=False): Output sequence (length, batch_size, noutput) """ with variable_scope.variable_scope(scope, "SeqLstm", [inputs]): - # TODO(tmb) make batch size, sequence_length dynamic - # example: sequence_length = tf.shape(inputs)[0] - _, batch_size, _ = _shape(inputs) - lstm_cell = rnn_cell.BasicLSTMCell(noutput, state_is_tuple=False) - state = array_ops.zeros([batch_size, lstm_cell.state_size]) - sequence_length = int(inputs.get_shape()[0]) - sequence_lengths = math_ops.to_int64( - array_ops.fill([batch_size], sequence_length)) + lstm_cell = rnn_cell.BasicLSTMCell(noutput) if reverse: inputs = array_ops.reverse_v2(inputs, [0]) outputs, _ = rnn.dynamic_rnn( - lstm_cell, inputs, sequence_lengths, state, time_major=True) + lstm_cell, inputs, time_major=True, dtype=inputs.dtype) if reverse: outputs = array_ops.reverse_v2(outputs, [0]) return outputs diff --git a/tensorflow/contrib/opt/python/training/external_optimizer.py b/tensorflow/contrib/opt/python/training/external_optimizer.py index f243317f1d..82ebca7f20 100644 --- a/tensorflow/contrib/opt/python/training/external_optimizer.py +++ b/tensorflow/contrib/opt/python/training/external_optimizer.py @@ -397,10 +397,6 @@ class ScipyOptimizerInterface(ExternalOptimizerInterface): 'automatically and cannot be injected manually'.format(kwarg)) minimize_kwargs.update(optimizer_kwargs) - if method == 'SLSQP': - # SLSQP doesn't support step callbacks. Obviate associated warning - # message. - del minimize_kwargs['callback'] import scipy.optimize # pylint: disable=g-import-not-at-top result = scipy.optimize.minimize(*minimize_args, **minimize_kwargs) diff --git a/tensorflow/contrib/opt/python/training/external_optimizer_test.py b/tensorflow/contrib/opt/python/training/external_optimizer_test.py index 0f597d0a24..953586ee70 100644 --- a/tensorflow/contrib/opt/python/training/external_optimizer_test.py +++ b/tensorflow/contrib/opt/python/training/external_optimizer_test.py @@ -299,6 +299,45 @@ class ScipyOptimizerInterfaceTest(TestCase): method = optimizer.optimizer_kwargs.get('method') self.assertEqual('SLSQP', method) + def test_callbacks(self): + vector_val = np.array([7., -2.], dtype=np.float32) + vector = variables.Variable(vector_val, 'vector') + + minimum_location_val = np.arange(2) + minimum_location = constant_op.constant( + minimum_location_val, dtype=dtypes.float32) + + loss = math_ops.reduce_sum(math_ops.square(vector - minimum_location)) / 2. + loss_val_first = ((vector_val - minimum_location_val)**2).sum() / 2. + + optimizer = external_optimizer.ScipyOptimizerInterface(loss, method='SLSQP') + + with self.test_session() as sess: + sess.run(variables.global_variables_initializer()) + + initial_vector_val = sess.run(vector) + + extra_fetches = [loss] + + step_callback = test.mock.Mock() + loss_callback = test.mock.Mock() + + optimizer.minimize( + sess, + fetches=extra_fetches, + loss_callback=loss_callback, + step_callback=step_callback) + + loss_val_last = sess.run(loss) + + call_first = test.mock.call(loss_val_first) + call_last = test.mock.call(loss_val_last) + loss_calls = [call_first, call_last] + loss_callback.assert_has_calls(loss_calls, any_order=True) + + args, _ = step_callback.call_args + self.assertAllClose(minimum_location_val, args[0]) + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/py2tf/impl/api.py b/tensorflow/contrib/py2tf/impl/api.py index 8ff6618912..85d40f3158 100644 --- a/tensorflow/contrib/py2tf/impl/api.py +++ b/tensorflow/contrib/py2tf/impl/api.py @@ -86,8 +86,8 @@ def convert_inline(f, *args, **kwargs): def convert(recursive=False, arg_types=None): """Decorator that compiles a function to graph mode. - The decorator is dynamic - invoking compilation whenever the decorated fuction - is called. This means the parameter values are known at compilation. + The decorator is dynamic - invoking compilation whenever the decorated + function is called. This means the parameter values are known at compilation. Args: recursive: Whether to recusrively convert any functions that the decorator diff --git a/tensorflow/contrib/receptive_field/python/util/graph_compute_order.py b/tensorflow/contrib/receptive_field/python/util/graph_compute_order.py index b2360fec6c..0388079f20 100644 --- a/tensorflow/contrib/receptive_field/python/util/graph_compute_order.py +++ b/tensorflow/contrib/receptive_field/python/util/graph_compute_order.py @@ -61,7 +61,7 @@ def _compute_output_resolution(input_spatial_resolution, kernel_size, stride, stride: Stride (int). total_padding: Total padding to be applied (int). Returns: - output_resolution: Ouput dimension (int) or None. + output_resolution: Output dimension (int) or None. """ if (input_spatial_resolution is None) or (kernel_size is None) or ( stride is None) or (total_padding is None): diff --git a/tensorflow/contrib/reduce_slice_ops/ops/reduce_slice_ops.cc b/tensorflow/contrib/reduce_slice_ops/ops/reduce_slice_ops.cc index b8b56c0e22..92879ab535 100644 --- a/tensorflow/contrib/reduce_slice_ops/ops/reduce_slice_ops.cc +++ b/tensorflow/contrib/reduce_slice_ops/ops/reduce_slice_ops.cc @@ -87,9 +87,9 @@ and 'indices' is [[0,1] [1,1] [0,2]], -the the output will be [[ 1, 2, 3] - [ 0, 0, 0] - [41,52,63]]. +the output will be [[ 1, 2, 3] + [ 0, 0, 0] + [41,52,63]]. ``` The data must be at least rank 1. The indices must be of shape (?,2) where the @@ -132,9 +132,9 @@ and 'indices' is [[0,1] [1,1] [0,2]], -the the output will be [[ 1, 2, 3] - [ 1, 1, 1] - [40,100,180]]. +the output will be [[ 1, 2, 3] + [ 1, 1, 1] + [40,100,180]]. ``` The data must be at least rank 1. The indices can be of shape (?,2) where the @@ -189,9 +189,9 @@ and 'indices' is [[0,1] [1,1] [0,2]], -the the output will be [[ 1, 20, 3] - [ -BIG_VALUE, -BIG_VALUE, -BIG_VALUE] - [ 400, 20, 60]]. +the output will be [[ 1, 20, 3] + [ -BIG_VALUE, -BIG_VALUE, -BIG_VALUE] + [ 400, 20, 60]]. ``` The data must be at least rank 1. The indices can be of shape (?,2) where the @@ -246,9 +246,9 @@ and 'indices' is [[0,1] [1,1] [0,2]], -the the output will be [[ 1, 20, 3] - [ +BIG_VALUE, +BIG_VALUE, +BIG_VALUE] - [ 1, 5, 3]]. +the output will be [[ 1, 20, 3] + [ +BIG_VALUE, +BIG_VALUE, +BIG_VALUE] + [ 1, 5, 3]]. ``` The data must be at least rank 1. The indices can be of shape (?,2) where the diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py index 09527e8473..0e62b315b6 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py @@ -157,6 +157,21 @@ class RNNCellTest(test.TestCase): # Smoke test self.assertAllClose(res[0], [[0.509682, 0.509682]]) + def testSRUCellWithDiffSize(self): + with self.test_session() as sess: + with variable_scope.variable_scope( + "root", initializer=init_ops.constant_initializer(0.5)): + x = array_ops.zeros([1, 3]) + m = array_ops.zeros([1, 2]) + g, _ = contrib_rnn_cell.SRUCell(2)(x, m) + sess.run([variables_lib.global_variables_initializer()]) + res = sess.run([g], { + x.name: np.array([[1., 1., 1.]]), + m.name: np.array([[0.1, 0.1]]) + }) + # Smoke test + self.assertAllClose(res[0], [[0.55255556, 0.55255556]]) + def testBasicLSTMCell(self): for dtype in [dtypes.float16, dtypes.float32]: np_dtype = dtype.as_numpy_dtype diff --git a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py index c780e85d72..51933be29d 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py @@ -1635,6 +1635,5 @@ class WeightNormLSTMCellTest(test.TestCase): self.assertAllClose(expected_c, actual_c, 1e-5) self.assertAllClose(expected_h, actual_h, 1e-5) - if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py index 124e841fc2..fe07493d0f 100644 --- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py +++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py @@ -2731,25 +2731,9 @@ class SRUCell(rnn_cell_impl._LayerRNNCell): input_depth = inputs_shape[1].value - # Here the contributor believes that the following constraints - # are implied. The reasoning is explained here with reference to - # the paper https://arxiv.org/pdf/1709.02755.pdf upon which this - # implementation is based. - # In section 2.1 Equation 5, specifically: - # h_t = r_t \odot g(c_t) + (1 - r_t) \odot x_t - # the pointwise operation between r_t and x_t means they have - # the same shape (since we are implementing an RNN cell, braodcasting - # does not happen to input of a single timestep); by the same - # reasons, x_t has the same shape as h_t, essentially mandating that - # input_depth = unit_num. - if input_depth != self._num_units: - raise ValueError("SRU requires input_depth == num_units, got " - "input_depth = %s, num_units = %s" % (input_depth, - self._num_units)) - self._kernel = self.add_variable( rnn_cell_impl._WEIGHTS_VARIABLE_NAME, - shape=[input_depth, 3 * self._num_units]) + shape=[input_depth, 4 * self._num_units]) self._bias = self.add_variable( rnn_cell_impl._BIAS_VARIABLE_NAME, @@ -2762,8 +2746,8 @@ class SRUCell(rnn_cell_impl._LayerRNNCell): """Simple recurrent unit (SRU) with num_units cells.""" U = math_ops.matmul(inputs, self._kernel) - x_bar, f_intermediate, r_intermediate = array_ops.split( - value=U, num_or_size_splits=3, axis=1) + x_bar, f_intermediate, r_intermediate, x_tx = array_ops.split( + value=U, num_or_size_splits=4, axis=1) f_r = math_ops.sigmoid( nn_ops.bias_add( @@ -2771,7 +2755,7 @@ class SRUCell(rnn_cell_impl._LayerRNNCell): f, r = array_ops.split(value=f_r, num_or_size_splits=2, axis=1) c = f * state + (1.0 - f) * x_bar - h = r * self._activation(c) + (1.0 - r) * inputs + h = r * self._activation(c) + (1.0 - r) * x_tx return h, c diff --git a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py index 95dea312f3..d6b5eceb47 100644 --- a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py +++ b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py @@ -924,8 +924,7 @@ class LuongMonotonicAttention(_BaseMonotonicAttentionMechanism): _monotonic_probability_fn, sigmoid_noise=sigmoid_noise, mode=mode, seed=sigmoid_noise_seed) super(LuongMonotonicAttention, self).__init__( - query_layer=layers_core.Dense( - num_units, name="query_layer", use_bias=False, dtype=dtype), + query_layer=None, memory_layer=layers_core.Dense( num_units, name="memory_layer", use_bias=False, dtype=dtype), memory=memory, diff --git a/tensorflow/contrib/session_bundle/bundle_shim.py b/tensorflow/contrib/session_bundle/bundle_shim.py index 062c9cc680..1db97020a2 100644 --- a/tensorflow/contrib/session_bundle/bundle_shim.py +++ b/tensorflow/contrib/session_bundle/bundle_shim.py @@ -82,7 +82,8 @@ def _convert_default_signature_to_signature_def(signatures): """ default_signature = signatures.default_signature signature_def = meta_graph_pb2.SignatureDef() - if default_signature.WhichOneof("type") == "regression_signature": + if (default_signature.WhichOneof("type") == + legacy_constants.REGRESSION_SIGNATURE): regression_signature = default_signature.regression_signature signature_def.method_name = signature_constants.REGRESS_METHOD_NAME _add_input_to_signature_def(regression_signature.input.tensor_name, @@ -91,7 +92,8 @@ def _convert_default_signature_to_signature_def(signatures): _add_output_to_signature_def(regression_signature.output.tensor_name, signature_constants.REGRESS_OUTPUTS, signature_def) - elif default_signature.WhichOneof("type") == "classification_signature": + elif (default_signature.WhichOneof("type") == + legacy_constants.CLASSIFICATION_SIGNATURE): classification_signature = default_signature.classification_signature signature_def.method_name = signature_constants.CLASSIFY_METHOD_NAME _add_input_to_signature_def(classification_signature.input.tensor_name, @@ -132,8 +134,9 @@ def _convert_named_signatures_to_signature_def(signatures): signature_constants.PREDICT_OUTPUTS] # TODO(pdudnik): what if there are other signatures? Mimic cr/140900781 once # it is submitted. - if (input_signature.WhichOneof("type") != "generic_signature" or - output_signature.WhichOneof("type") != "generic_signature"): + if (input_signature.WhichOneof("type") != legacy_constants.GENERIC_SIGNATURE + or output_signature.WhichOneof("type") != + legacy_constants.GENERIC_SIGNATURE): raise RuntimeError("Named input and output signatures can only be " "up-converted if they are generic signature. " "Input signature type is %s, output signature type is " diff --git a/tensorflow/contrib/session_bundle/constants.py b/tensorflow/contrib/session_bundle/constants.py index 6ced73241a..e833baee79 100644 --- a/tensorflow/contrib/session_bundle/constants.py +++ b/tensorflow/contrib/session_bundle/constants.py @@ -32,3 +32,6 @@ INIT_OP_KEY = "serving_init_op" SIGNATURES_KEY = "serving_signatures" ASSETS_KEY = "serving_assets" GRAPH_KEY = "serving_graph" +REGRESSION_SIGNATURE = "regression_signature" +CLASSIFICATION_SIGNATURE = "classification_signature" +GENERIC_SIGNATURE = "generic_signature" diff --git a/tensorflow/contrib/slim/python/slim/evaluation_test.py b/tensorflow/contrib/slim/python/slim/evaluation_test.py index 870f504d10..8a267ddac7 100644 --- a/tensorflow/contrib/slim/python/slim/evaluation_test.py +++ b/tensorflow/contrib/slim/python/slim/evaluation_test.py @@ -29,7 +29,6 @@ from tensorflow.contrib.framework.python.ops import variables as variables_lib from tensorflow.contrib.metrics.python.ops import metric_ops from tensorflow.contrib.slim.python.slim import evaluation from tensorflow.contrib.training.python.training import evaluation as evaluation_lib -from tensorflow.core.protobuf import saver_pb2 from tensorflow.python.debug.lib import debug_data from tensorflow.python.debug.wrappers import hooks from tensorflow.python.framework import constant_op @@ -236,7 +235,7 @@ class SingleEvaluationTest(test.TestCase): def _prepareCheckpoint(self, checkpoint_path): init_op = control_flow_ops.group(variables.global_variables_initializer(), variables.local_variables_initializer()) - saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V1) + saver = saver_lib.Saver() with self.test_session() as sess: sess.run(init_op) saver.save(sess, checkpoint_path) diff --git a/tensorflow/contrib/solvers/python/kernel_tests/linear_equations_test.py b/tensorflow/contrib/solvers/python/kernel_tests/linear_equations_test.py index 930df2414b..a1282847be 100644 --- a/tensorflow/contrib/solvers/python/kernel_tests/linear_equations_test.py +++ b/tensorflow/contrib/solvers/python/kernel_tests/linear_equations_test.py @@ -45,32 +45,67 @@ def _get_linear_equations_tests(dtype_, use_static_shape_, shape_): low=-1.0, high=1.0, size=np.prod(shape_)).reshape(shape_).astype(dtype_) # Make a selfadjoint, positive definite. a_np = np.dot(a_np.T, a_np) + # jacobi preconditioner + jacobi_np = np.zeros_like(a_np) + jacobi_np[range(a_np.shape[0]), range(a_np.shape[1])] = ( + 1.0 / a_np.diagonal()) rhs_np = np.random.uniform( low=-1.0, high=1.0, size=shape_[0]).astype(dtype_) + x_np = np.zeros_like(rhs_np) tol = 1e-6 if dtype_ == np.float64 else 1e-3 max_iter = 20 with self.test_session() as sess: if use_static_shape_: a = constant_op.constant(a_np) rhs = constant_op.constant(rhs_np) + x = constant_op.constant(x_np) + jacobi = constant_op.constant(jacobi_np) else: a = array_ops.placeholder(dtype_) rhs = array_ops.placeholder(dtype_) + x = array_ops.placeholder(dtype_) + jacobi = array_ops.placeholder(dtype_) operator = util.create_operator(a) - cg_graph = linear_equations.conjugate_gradient( - operator, rhs, tol=tol, max_iter=max_iter) - if use_static_shape_: - cg_val = sess.run(cg_graph) - else: - cg_val = sess.run(cg_graph, feed_dict={a: a_np, rhs: rhs_np}) - norm_r0 = np.linalg.norm(rhs_np) - norm_r = np.sqrt(cg_val.gamma) - self.assertLessEqual(norm_r, tol * norm_r0) - # Validate that we get an equally small residual norm with numpy - # using the computed solution. - r_np = rhs_np - np.dot(a_np, cg_val.x) - norm_r_np = np.linalg.norm(r_np) - self.assertLessEqual(norm_r_np, tol * norm_r0) + preconditioners = [ + None, util.identity_operator(a), + util.create_operator(jacobi) + ] + cg_results = [] + for preconditioner in preconditioners: + cg_graph = linear_equations.conjugate_gradient( + operator, + rhs, + preconditioner=preconditioner, + x=x, + tol=tol, + max_iter=max_iter) + if use_static_shape_: + cg_val = sess.run(cg_graph) + else: + cg_val = sess.run( + cg_graph, + feed_dict={ + a: a_np, + rhs: rhs_np, + x: x_np, + jacobi: jacobi_np + }) + norm_r0 = np.linalg.norm(rhs_np) + norm_r = np.linalg.norm(cg_val.r) + self.assertLessEqual(norm_r, tol * norm_r0) + # Validate that we get an equally small residual norm with numpy + # using the computed solution. + r_np = rhs_np - np.dot(a_np, cg_val.x) + norm_r_np = np.linalg.norm(r_np) + self.assertLessEqual(norm_r_np, tol * norm_r0) + cg_results.append(cg_val) + # Validate that we get same results using identity_preconditioner + # and None + self.assertEqual(cg_results[0].i, cg_results[1].i) + self.assertAlmostEqual(cg_results[0].gamma, cg_results[1].gamma) + self.assertAllClose(cg_results[0].r, cg_results[1].r, rtol=tol) + self.assertAllClose(cg_results[0].x, cg_results[1].x, rtol=tol) + self.assertAllClose(cg_results[0].p, cg_results[1].p, rtol=tol) return [test_conjugate_gradient] diff --git a/tensorflow/contrib/solvers/python/kernel_tests/util_test.py b/tensorflow/contrib/solvers/python/kernel_tests/util_test.py index 1566984b27..5d7534657b 100644 --- a/tensorflow/contrib/solvers/python/kernel_tests/util_test.py +++ b/tensorflow/contrib/solvers/python/kernel_tests/util_test.py @@ -63,6 +63,43 @@ class UtilTest(test.TestCase): def testCreateOperatorUnknownShape(self): self._testCreateOperator(False) + def _testIdentityOperator(self, use_static_shape_): + for dtype in np.float32, np.float64: + a_np = np.array([[1., 2.], [3., 4.], [5., 6.]], dtype=dtype) + x_np = np.array([[2.], [-3.]], dtype=dtype) + y_np = np.array([[2], [-3.], [5.]], dtype=dtype) + with self.test_session() as sess: + if use_static_shape_: + a = constant_op.constant(a_np, dtype=dtype) + x = constant_op.constant(x_np, dtype=dtype) + y = constant_op.constant(y_np, dtype=dtype) + else: + a = array_ops.placeholder(dtype) + x = array_ops.placeholder(dtype) + y = array_ops.placeholder(dtype) + id_op = util.identity_operator(a) + ax = id_op.apply(x) + aty = id_op.apply_adjoint(y) + op_shape = ops.convert_to_tensor(id_op.shape) + if use_static_shape_: + op_shape_val, ax_val, aty_val = sess.run([op_shape, ax, aty]) + else: + op_shape_val, ax_val, aty_val = sess.run( + [op_shape, ax, aty], feed_dict={ + a: a_np, + x: x_np, + y: y_np + }) + self.assertAllEqual(op_shape_val, [3, 2]) + self.assertAllClose(ax_val, x_np) + self.assertAllClose(aty_val, y_np) + + def testIdentityOperator(self): + self._testIdentityOperator(True) + + def testIdentityOperatorUnknownShape(self): + self._testIdentityOperator(False) + def testL2Norm(self): with self.test_session(): x_np = np.array([[2], [-3.], [5.]]) diff --git a/tensorflow/contrib/solvers/python/ops/linear_equations.py b/tensorflow/contrib/solvers/python/ops/linear_equations.py index 8cba56eba6..2395707257 100644 --- a/tensorflow/contrib/solvers/python/ops/linear_equations.py +++ b/tensorflow/contrib/solvers/python/ops/linear_equations.py @@ -26,11 +26,14 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops def conjugate_gradient(operator, rhs, + preconditioner=None, + x=None, tol=1e-4, max_iter=20, name="conjugate_gradient"): @@ -55,6 +58,15 @@ def conjugate_gradient(operator, vector with the result of applying the operator to `x`, i.e. if `operator` represents matrix `A`, `apply` should return `A * x`. rhs: A rank-1 `Tensor` of shape `[N]` containing the right-hand size vector. + preconditioner: An object representing a linear operator, see `operator` + for detail. The preconditioner should approximate the inverse of `A`. + An efficient preconditioner could dramatically improve the rate of + convergence. If `preconditioner` represents matrix `M`(`M` approximates + `A^{-1}`), the algorithm uses `preconditioner.apply(x)` to estimate + `A^{-1}x`. For this to be useful, the cost of applying `M` should be + much lower than computing `A^{-1}` directly. + x: A rank-1 `Tensor` of shape `[N]` containing the initial guess for the + solution. tol: A float scalar convergence tolerance. max_iter: An integer giving the maximum number of iterations. name: A name scope for the operation. @@ -65,35 +77,49 @@ def conjugate_gradient(operator, - x: A rank-1 `Tensor` of shape `[N]` containing the computed solution. - r: A rank-1 `Tensor` of shape `[M]` containing the residual vector. - p: A rank-1 `Tensor` of shape `[N]`. `A`-conjugate basis vector. - - gamma: \\(||r||_2^2\\) + - gamma: \\(r \dot M \dot r\\), equivalent to \\(||r||_2^2\\) when + `preconditioner=None`. """ # ephemeral class holding CG state. cg_state = collections.namedtuple("CGState", ["i", "x", "r", "p", "gamma"]) def stopping_criterion(i, state): - return math_ops.logical_and(i < max_iter, state.gamma > tol) + return math_ops.logical_and(i < max_iter, linalg_ops.norm(state.r) > tol) - # TODO(rmlarsen): add preconditioning - def cg_step(i, state): + def cg_step(i, state): # pylint: disable=missing-docstring z = operator.apply(state.p) alpha = state.gamma / util.dot(state.p, z) x = state.x + alpha * state.p r = state.r - alpha * z - gamma = util.l2norm_squared(r) - beta = gamma / state.gamma - p = r + beta * state.p + if preconditioner is None: + gamma = util.dot(r, r) + beta = gamma / state.gamma + p = r + beta * state.p + else: + q = preconditioner.apply(r) + gamma = util.dot(r, q) + beta = gamma / state.gamma + p = q + beta * state.p return i + 1, cg_state(i + 1, x, r, p, gamma) with ops.name_scope(name): n = operator.shape[1:] rhs = array_ops.expand_dims(rhs, -1) - gamma0 = util.l2norm_squared(rhs) - tol = tol * tol * gamma0 - x = array_ops.expand_dims( - array_ops.zeros( - n, dtype=rhs.dtype.base_dtype), -1) + if x is None: + x = array_ops.expand_dims( + array_ops.zeros(n, dtype=rhs.dtype.base_dtype), -1) + r0 = rhs + else: + x = array_ops.expand_dims(x, -1) + r0 = rhs - operator.apply(x) + if preconditioner is None: + p0 = r0 + else: + p0 = preconditioner.apply(r0) + gamma0 = util.dot(r0, p0) + tol *= linalg_ops.norm(r0) i = constant_op.constant(0, dtype=dtypes.int32) - state = cg_state(i=i, x=x, r=rhs, p=rhs, gamma=gamma0) + state = cg_state(i=i, x=x, r=r0, p=p0, gamma=gamma0) _, state = control_flow_ops.while_loop(stopping_criterion, cg_step, [i, state]) return cg_state( diff --git a/tensorflow/contrib/solvers/python/ops/util.py b/tensorflow/contrib/solvers/python/ops/util.py index 777e0c185d..96947e8eea 100644 --- a/tensorflow/contrib/solvers/python/ops/util.py +++ b/tensorflow/contrib/solvers/python/ops/util.py @@ -45,6 +45,23 @@ def create_operator(matrix): apply_adjoint=lambda v: math_ops.matmul(matrix, v, adjoint_a=True)) +def identity_operator(matrix): + """Creates a linear operator from a rank-2 identity tensor.""" + + linear_operator = collections.namedtuple( + "LinearOperator", ["shape", "dtype", "apply", "apply_adjoint"]) + shape = matrix.get_shape() + if shape.is_fully_defined(): + shape = shape.as_list() + else: + shape = array_ops.shape(matrix) + return linear_operator( + shape=shape, + dtype=matrix.dtype, + apply=lambda v: v, + apply_adjoint=lambda v: v) + + # TODO(rmlarsen): Measure if we should just call matmul. def dot(x, y): return math_ops.reduce_sum(math_ops.conj(x) * y) diff --git a/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py b/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py index 7970c20a26..78d237e6a2 100644 --- a/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py +++ b/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py @@ -17,6 +17,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from absl import flags import os import subprocess @@ -24,13 +25,21 @@ import sys import tensorflow as tf -tf.flags.DEFINE_string('service_addr', '', - 'Address of TPU profiler service e.g. localhost:8466') -tf.flags.DEFINE_string('logdir', '', - 'Path of TensorBoard log directory e.g. /tmp/tb_log') -tf.flags.DEFINE_integer('duration_ms', 2000, 'Duration of tracing in ms.') +flags.DEFINE_string( + 'service_addr', None, 'Address of TPU profiler service e.g. ' + 'localhost:8466') +flags.DEFINE_string( + 'logdir', None, 'Path of TensorBoard log directory e.g. /tmp/tb_log, ' + 'gs://tb_bucket') +flags.DEFINE_integer('duration_ms', 2000, 'Duration of tracing in ms.') +flags.DEFINE_integer( + 'num_tracing_attempts', 3, 'Automatically retry N times when no trace ' + 'event is collected.') +flags.DEFINE_boolean( + 'include_dataset_ops', True, 'Set to false to profile longer TPU ' + 'device traces.') -FLAGS = tf.flags.FLAGS +FLAGS = flags.FLAGS EXECUTABLE = 'data/capture_tpu_profile' @@ -42,10 +51,13 @@ def main(unused_argv=None): if not FLAGS.service_addr or not FLAGS.logdir: sys.exit('service_addr and logdir must be provided.') executable_path = os.path.join(os.path.dirname(__file__), EXECUTABLE) + logdir = os.path.expandvars(os.path.expanduser(FLAGS.logdir)) cmd = [executable_path] - cmd.append('--logdir='+FLAGS.logdir) + cmd.append('--logdir='+logdir) cmd.append('--service_addr='+FLAGS.service_addr) cmd.append('--duration_ms='+str(FLAGS.duration_ms)) + cmd.append('--num_tracing_attempts='+str(FLAGS.num_tracing_attempts)) + cmd.append('--include_dataset_ops='+str(FLAGS.include_dataset_ops).lower()) subprocess.call(cmd) diff --git a/tensorflow/contrib/tpu/profiler/pip_package/setup.py b/tensorflow/contrib/tpu/profiler/pip_package/setup.py index 179d29602b..33ade16003 100644 --- a/tensorflow/contrib/tpu/profiler/pip_package/setup.py +++ b/tensorflow/contrib/tpu/profiler/pip_package/setup.py @@ -20,16 +20,12 @@ from __future__ import print_function from setuptools import setup -_VERSION = '1.3.0-a1' +_VERSION = '1.5.0-rc1' CONSOLE_SCRIPTS = [ 'capture_tpu_profile=cloud_tpu_profiler.main:run_main', ] -REQUIRED_PACKAGES = [ - 'tensorflow >= 1.2.0', -] - setup( name='cloud_tpu_profiler', version=_VERSION.replace('-', ''), @@ -45,27 +41,22 @@ setup( entry_points={ 'console_scripts': CONSOLE_SCRIPTS, }, - install_requires=REQUIRED_PACKAGES, classifiers=[ # How mature is this project? Common values are # 3 - Alpha # 4 - Beta # 5 - Production/Stable - 'Development Status :: 3 - Alpha', - + 'Development Status :: 4 - Beta', 'Intended Audience :: Developers', 'Intended Audience :: Education', 'Intended Audience :: Science/Research', - 'License :: OSI Approved :: Apache Software License', - 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', - 'Topic :: Scientific/Engineering', 'Topic :: Scientific/Engineering :: Mathematics', 'Topic :: Scientific/Engineering :: Artificial Intelligence', @@ -74,4 +65,5 @@ setup( 'Topic :: Software Development :: Libraries :: Python Modules', ], license='Apache 2.0', - keywords='tensorflow performance tpu',) + keywords='tensorflow performance tpu', +) |