aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Michael Case <mikecase@google.com>2018-02-07 14:36:00 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-02-07 14:39:49 -0800
commitd90054e7c0f41f4bab81df0548577a73b939a87a (patch)
treea15aea686a9d3f305e316d2a6ada0859ad8170d1
parent8461760f9f6cde8ed97507484d2a879140141032 (diff)
Merge changes from github.
PiperOrigin-RevId: 184897758
-rw-r--r--ISSUE_TEMPLATE.md2
-rw-r--r--README.md4
-rw-r--r--RELEASE.md27
-rw-r--r--WORKSPACE8
-rw-r--r--configure.py7
-rw-r--r--tensorflow/BUILD6
-rw-r--r--tensorflow/cc/BUILD1
-rw-r--r--tensorflow/cc/tools/freeze_saved_model_test.cc2
-rw-r--r--tensorflow/compiler/aot/BUILD20
-rw-r--r--tensorflow/compiler/aot/tests/BUILD45
-rw-r--r--tensorflow/compiler/tests/binary_ops_test.py8
-rw-r--r--tensorflow/compiler/tf2xla/kernels/pooling_ops.cc49
-rw-r--r--tensorflow/compiler/xla/client/computation_builder.h2
-rw-r--r--tensorflow/compiler/xla/tools/parser/hlo_parser.cc2
-rw-r--r--tensorflow/contrib/BUILD2
-rw-r--r--tensorflow/contrib/android/java/org/tensorflow/contrib/android/TensorFlowInferenceInterface.java10
-rw-r--r--tensorflow/contrib/cmake/python_modules.txt3
-rw-r--r--tensorflow/contrib/cmake/tf_core_ops.cmake1
-rwxr-xr-xtensorflow/contrib/cmake/tf_python.cmake1
-rw-r--r--tensorflow/contrib/cmake/tools/create_def_file.py6
-rw-r--r--tensorflow/contrib/coder/README.md2
-rw-r--r--tensorflow/contrib/coder/kernels/range_coder.cc2
-rw-r--r--tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_benchmark.py1
-rw-r--r--tensorflow/contrib/eager/python/evaluator.py2
-rw-r--r--tensorflow/contrib/eager/python/examples/resnet50/README.md2
-rw-r--r--tensorflow/contrib/eager/python/examples/resnet50/resnet50.py2
-rw-r--r--tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py1
-rw-r--r--tensorflow/contrib/eager/python/examples/rnn_ptb/README.md2
-rw-r--r--tensorflow/contrib/eager/python/examples/rnn_ptb/rnn_ptb.py5
-rw-r--r--tensorflow/contrib/eager/python/examples/spinn/data.py10
-rw-r--r--tensorflow/contrib/eager/python/examples/spinn/spinn_test.py1
-rw-r--r--tensorflow/contrib/eager/python/network_test.py4
-rw-r--r--tensorflow/contrib/eager/python/saver.py2
-rw-r--r--tensorflow/contrib/ffmpeg/decode_video_op.cc12
-rw-r--r--tensorflow/contrib/framework/python/ops/variables.py4
-rw-r--r--tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py31
-rw-r--r--tensorflow/contrib/gan/python/losses/python/losses_impl_test.py2
-rw-r--r--tensorflow/contrib/hvx/README.md137
-rw-r--r--tensorflow/contrib/kafka/BUILD105
-rw-r--r--tensorflow/contrib/kafka/__init__.py32
-rw-r--r--tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc321
-rw-r--r--tensorflow/contrib/kafka/ops/kafka_ops.cc44
-rw-r--r--tensorflow/contrib/kafka/python/kernel_tests/kafka_test.py115
-rw-r--r--tensorflow/contrib/kafka/python/kernel_tests/kafka_test.sh48
-rw-r--r--tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py74
-rw-r--r--tensorflow/contrib/layers/__init__.py1
-rw-r--r--tensorflow/contrib/layers/python/layers/layers.py37
-rw-r--r--tensorflow/contrib/layers/python/layers/layers_test.py14
-rw-r--r--tensorflow/contrib/learn/python/learn/datasets/synthetic.py2
-rw-r--r--tensorflow/contrib/learn/python/learn/datasets/synthetic_test.py3
-rw-r--r--tensorflow/contrib/learn/python/learn/estimators/dnn_test.py2
-rw-r--r--tensorflow/contrib/lite/build_def.bzl10
-rw-r--r--tensorflow/contrib/lite/examples/label_image/BUILD10
-rw-r--r--tensorflow/contrib/lite/examples/label_image/bitmap_helpers.h16
-rw-r--r--tensorflow/contrib/lite/examples/label_image/bitmap_helpers_impl.h87
-rw-r--r--tensorflow/contrib/lite/examples/label_image/label_image.cc48
-rw-r--r--tensorflow/contrib/lite/examples/label_image/label_image.h7
-rw-r--r--tensorflow/contrib/lite/examples/label_image/label_image.md12
-rw-r--r--tensorflow/contrib/lite/kernels/internal/BUILD24
-rw-r--r--tensorflow/contrib/lite/kernels/internal/optimized/cpu_check.h2
-rw-r--r--tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc2
-rw-r--r--tensorflow/contrib/lite/kernels/internal/tensor_utils.cc1
-rw-r--r--tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h2
-rw-r--r--tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc1
-rw-r--r--tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc5
-rw-r--r--tensorflow/contrib/lite/toco/model.h1
-rw-r--r--tensorflow/contrib/lite/toco/tooling_util.cc5
-rw-r--r--tensorflow/contrib/makefile/Makefile91
-rwxr-xr-xtensorflow/contrib/makefile/build_all_android.sh2
-rwxr-xr-xtensorflow/contrib/makefile/build_all_ios.sh2
-rwxr-xr-xtensorflow/contrib/makefile/samples/build_and_run_inception_hexagon.sh4
-rw-r--r--tensorflow/contrib/makefile/sub_makefiles/android/Makefile.in2
-rw-r--r--tensorflow/contrib/makefile/tf_op_files.txt3
-rw-r--r--tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc2
-rw-r--r--tensorflow/contrib/mpi/mpi_rendezvous_mgr.h1
-rw-r--r--tensorflow/contrib/ndlstm/__init__.py4
-rw-r--r--tensorflow/contrib/ndlstm/python/lstm1d.py12
-rw-r--r--tensorflow/contrib/opt/python/training/external_optimizer.py4
-rw-r--r--tensorflow/contrib/opt/python/training/external_optimizer_test.py39
-rw-r--r--tensorflow/contrib/py2tf/impl/api.py4
-rw-r--r--tensorflow/contrib/receptive_field/python/util/graph_compute_order.py2
-rw-r--r--tensorflow/contrib/reduce_slice_ops/ops/reduce_slice_ops.cc24
-rw-r--r--tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py15
-rw-r--r--tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py1
-rw-r--r--tensorflow/contrib/rnn/python/ops/rnn_cell.py24
-rw-r--r--tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py3
-rw-r--r--tensorflow/contrib/session_bundle/bundle_shim.py11
-rw-r--r--tensorflow/contrib/session_bundle/constants.py3
-rw-r--r--tensorflow/contrib/slim/python/slim/evaluation_test.py3
-rw-r--r--tensorflow/contrib/solvers/python/kernel_tests/linear_equations_test.py63
-rw-r--r--tensorflow/contrib/solvers/python/kernel_tests/util_test.py37
-rw-r--r--tensorflow/contrib/solvers/python/ops/linear_equations.py52
-rw-r--r--tensorflow/contrib/solvers/python/ops/util.py17
-rw-r--r--tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py26
-rw-r--r--tensorflow/contrib/tpu/profiler/pip_package/setup.py16
-rw-r--r--tensorflow/core/BUILD5
-rw-r--r--tensorflow/core/api_def/base_api/api_def_MatchingFiles.pbtxt1
-rw-r--r--tensorflow/core/api_def/base_api/api_def_Roll.pbtxt52
-rw-r--r--tensorflow/core/api_def/base_api/api_def_UnravelIndex.pbtxt32
-rw-r--r--tensorflow/core/common_runtime/gpu/gpu_device.cc3
-rw-r--r--tensorflow/core/distributed_runtime/BUILD1
-rw-r--r--tensorflow/core/distributed_runtime/master_session.cc2
-rw-r--r--tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc18
-rw-r--r--tensorflow/core/distributed_runtime/rpc/grpc_worker_service.h3
-rw-r--r--tensorflow/core/distributed_runtime/session_mgr.cc78
-rw-r--r--tensorflow/core/distributed_runtime/session_mgr.h9
-rw-r--r--tensorflow/core/framework/register_types.h2
-rw-r--r--tensorflow/core/framework/variant_op_registry.cc24
-rw-r--r--tensorflow/core/framework/variant_op_registry.h41
-rw-r--r--tensorflow/core/graph/mkl_layout_pass.cc17
-rw-r--r--tensorflow/core/graph/mkl_layout_pass_test.cc6
-rw-r--r--tensorflow/core/graph/testlib.cc10
-rw-r--r--tensorflow/core/graph/testlib.h4
-rw-r--r--tensorflow/core/kernels/BUILD46
-rw-r--r--tensorflow/core/kernels/compare_and_bitpack_op.cc15
-rw-r--r--tensorflow/core/kernels/decode_bmp_op.cc19
-rw-r--r--tensorflow/core/kernels/fractional_pool_common.h2
-rw-r--r--tensorflow/core/kernels/mkl_aggregate_ops.cc13
-rw-r--r--tensorflow/core/kernels/mkl_avgpooling_op.cc31
-rw-r--r--tensorflow/core/kernels/mkl_concat_op.cc6
-rw-r--r--tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc6
-rw-r--r--tensorflow/core/kernels/mkl_conv_grad_input_ops.cc6
-rw-r--r--tensorflow/core/kernels/mkl_conv_ops.cc7
-rw-r--r--tensorflow/core/kernels/mkl_conv_ops.h6
-rw-r--r--tensorflow/core/kernels/mkl_cwise_ops_common.cc2
-rw-r--r--tensorflow/core/kernels/mkl_fused_batch_norm_op.cc6
-rw-r--r--tensorflow/core/kernels/mkl_identity_op.cc4
-rw-r--r--tensorflow/core/kernels/mkl_input_conversion_op.cc62
-rw-r--r--tensorflow/core/kernels/mkl_lrn_op.cc6
-rw-r--r--tensorflow/core/kernels/mkl_maxpooling_op.cc10
-rw-r--r--tensorflow/core/kernels/mkl_pooling_ops_common.cc6
-rw-r--r--tensorflow/core/kernels/mkl_pooling_ops_common.h8
-rw-r--r--tensorflow/core/kernels/mkl_relu_op.cc8
-rw-r--r--tensorflow/core/kernels/mkl_reshape_op.cc6
-rw-r--r--tensorflow/core/kernels/mkl_softmax_op.cc4
-rw-r--r--tensorflow/core/kernels/mkl_tfconv_op.h4
-rw-r--r--tensorflow/core/kernels/roll_op.cc334
-rw-r--r--tensorflow/core/kernels/roll_op_test.cc484
-rw-r--r--tensorflow/core/kernels/unravel_index_op.cc122
-rw-r--r--tensorflow/core/lib/io/random_inputstream.cc37
-rw-r--r--tensorflow/core/lib/io/random_inputstream.h2
-rw-r--r--tensorflow/core/ops/array_ops.cc7
-rw-r--r--tensorflow/core/ops/image_ops.cc24
-rw-r--r--tensorflow/core/ops/manip_ops.cc33
-rw-r--r--tensorflow/core/ops/nn_ops.cc8
-rw-r--r--tensorflow/core/platform/cpu_feature_guard.cc9
-rw-r--r--tensorflow/core/platform/profile_utils/cpu_utils.h4
-rw-r--r--tensorflow/core/platform/s3/s3_file_system.cc122
-rw-r--r--tensorflow/core/platform/s3/s3_file_system.h22
-rw-r--r--tensorflow/core/platform/s3/s3_file_system_test.cc2
-rw-r--r--tensorflow/core/platform/windows/cpu_info.h2
-rw-r--r--tensorflow/core/profiler/README.md5
-rw-r--r--tensorflow/core/profiler/internal/tfprof_stats.h4
-rw-r--r--tensorflow/core/profiler/profiler.cc8
-rw-r--r--tensorflow/core/public/version.h2
-rw-r--r--tensorflow/core/util/mkl_util.h32
-rw-r--r--tensorflow/core/util/mkl_util_test.cc4
-rw-r--r--tensorflow/docs_src/about/bib.md2
-rw-r--r--tensorflow/docs_src/api_guides/python/contrib.signal.md6
-rw-r--r--tensorflow/docs_src/api_guides/python/regression_examples.md2
-rw-r--r--tensorflow/docs_src/get_started/custom_estimators.md4
-rw-r--r--tensorflow/docs_src/get_started/datasets_quickstart.md4
-rw-r--r--tensorflow/docs_src/get_started/feature_columns.md4
-rw-r--r--tensorflow/docs_src/get_started/premade_estimators.md2
-rw-r--r--tensorflow/docs_src/install/install_c.md2
-rw-r--r--tensorflow/docs_src/install/install_go.md2
-rw-r--r--tensorflow/docs_src/install/install_java.md22
-rw-r--r--tensorflow/docs_src/install/install_linux.md28
-rw-r--r--tensorflow/docs_src/install/install_mac.md10
-rw-r--r--tensorflow/docs_src/install/install_sources.md24
-rw-r--r--tensorflow/docs_src/install/install_windows.md6
-rw-r--r--tensorflow/docs_src/programmers_guide/graphs.md4
-rw-r--r--tensorflow/examples/android/BUILD2
-rw-r--r--tensorflow/examples/android/build.gradle9
-rw-r--r--tensorflow/examples/android/download-models.gradle2
-rw-r--r--tensorflow/examples/android/src/org/tensorflow/demo/LegacyCameraConnectionFragment.java7
-rw-r--r--tensorflow/examples/android/src/org/tensorflow/demo/tracking/MultiBoxTracker.java4
-rw-r--r--tensorflow/examples/udacity/Dockerfile2
-rw-r--r--tensorflow/python/BUILD36
-rw-r--r--tensorflow/python/__init__.py2
-rw-r--r--tensorflow/python/client/session_benchmark.py1
-rw-r--r--tensorflow/python/data/ops/dataset_ops.py49
-rw-r--r--tensorflow/python/data/util/nest.py6
-rw-r--r--tensorflow/python/data/util/sparse.py2
-rw-r--r--tensorflow/python/debug/cli/tensor_format.py2
-rw-r--r--tensorflow/python/debug/lib/debug_data.py2
-rw-r--r--tensorflow/python/eager/execution_callbacks.py2
-rw-r--r--tensorflow/python/estimator/canned/dnn_testing_utils.py2
-rw-r--r--tensorflow/python/estimator/canned/linear_testing_utils.py2
-rw-r--r--tensorflow/python/estimator/estimator.py3
-rw-r--r--tensorflow/python/estimator/run_config.py5
-rw-r--r--tensorflow/python/keras/_impl/keras/layers/convolutional.py2
-rw-r--r--tensorflow/python/kernel_tests/BUILD13
-rw-r--r--tensorflow/python/kernel_tests/array_ops_test.py21
-rw-r--r--tensorflow/python/kernel_tests/constant_op_test.py13
-rw-r--r--tensorflow/python/kernel_tests/conv_ops_test.py3
-rw-r--r--tensorflow/python/kernel_tests/decode_jpeg_op_test.py1
-rw-r--r--tensorflow/python/kernel_tests/io_ops_test.py2
-rw-r--r--tensorflow/python/kernel_tests/losses_test.py16
-rw-r--r--tensorflow/python/kernel_tests/manip_ops_test.py138
-rw-r--r--tensorflow/python/kernel_tests/rnn_test.py1
-rw-r--r--tensorflow/python/kernel_tests/tensordot_op_test.py54
-rw-r--r--tensorflow/python/kernel_tests/topk_op_test.py2
-rw-r--r--tensorflow/python/layers/convolutional.py11
-rw-r--r--tensorflow/python/layers/utils.py2
-rw-r--r--tensorflow/python/ops/array_ops.py7
-rw-r--r--tensorflow/python/ops/functional_ops.py2
-rw-r--r--tensorflow/python/ops/gradients_impl.py1
-rw-r--r--tensorflow/python/ops/image_ops.py4
-rw-r--r--tensorflow/python/ops/image_ops_impl.py106
-rw-r--r--tensorflow/python/ops/image_ops_test.py112
-rw-r--r--tensorflow/python/ops/linalg_grad.py59
-rw-r--r--tensorflow/python/ops/losses/losses_impl.py7
-rw-r--r--tensorflow/python/ops/manip_grad.py31
-rw-r--r--tensorflow/python/ops/manip_ops.py38
-rw-r--r--tensorflow/python/ops/math_ops.py10
-rw-r--r--tensorflow/python/ops/rnn.py6
-rw-r--r--tensorflow/python/ops/standard_ops.py73
-rw-r--r--tensorflow/python/saved_model/loader_impl.py9
-rw-r--r--tensorflow/python/tools/freeze_graph.py38
-rw-r--r--tensorflow/python/tools/freeze_graph_test.py16
-rw-r--r--tensorflow/python/tools/optimize_for_inference_lib.py1
-rw-r--r--tensorflow/python/tools/optimize_for_inference_test.py92
-rw-r--r--tensorflow/python/tools/saved_model_cli.py3
-rw-r--r--tensorflow/python/training/basic_session_run_hooks.py4
-rw-r--r--tensorflow/python/training/input.py2
-rw-r--r--tensorflow/python/training/saver.py12
-rw-r--r--tensorflow/python/util/compat_internal.py34
-rw-r--r--tensorflow/stream_executor/cuda/cuda_diagnostics.cc2
-rw-r--r--tensorflow/stream_executor/dso_loader.cc9
-rw-r--r--tensorflow/tensorflow.bzl7
-rw-r--r--tensorflow/tools/api/golden/tensorflow.image.pbtxt18
-rw-r--r--tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d-transpose.pbtxt1
-rw-r--r--tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt1
-rw-r--r--tensorflow/tools/api/golden/tensorflow.manip.pbtxt7
-rw-r--r--tensorflow/tools/api/golden/tensorflow.pbtxt8
-rwxr-xr-xtensorflow/tools/ci_build/ci_sanity.sh16
-rwxr-xr-xtensorflow/tools/ci_build/windows/libtensorflow_cpu.sh2
-rw-r--r--tensorflow/tools/ci_build/windows/libtensorflow_gpu.sh2
-rw-r--r--tensorflow/tools/docker/jupyter_notebook_config.py1
-rw-r--r--tensorflow/tools/docs/pretty_docs.py2
-rw-r--r--tensorflow/tools/lib_package/BUILD5
-rw-r--r--tensorflow/tools/pip_package/BUILD11
-rwxr-xr-xtensorflow/tools/pip_package/build_pip_package.sh2
-rw-r--r--tensorflow/tools/pip_package/setup.py11
-rw-r--r--tensorflow/workspace.bzl21
-rw-r--r--third_party/com_google_absl.BUILD5
-rw-r--r--third_party/flatbuffers/flatbuffers.BUILD2
-rw-r--r--third_party/gast.BUILD2
-rw-r--r--third_party/gpus/cuda_configure.bzl2
-rw-r--r--third_party/jpeg/jpeg.BUILD50
-rw-r--r--third_party/kafka/BUILD147
-rw-r--r--third_party/kafka/config.patch44
-rw-r--r--third_party/pcre.BUILD2
-rw-r--r--third_party/py/python_configure.bzl2
-rw-r--r--third_party/termcolor.BUILD2
256 files changed, 4478 insertions, 898 deletions
diff --git a/ISSUE_TEMPLATE.md b/ISSUE_TEMPLATE.md
index 1a401997c6..2f3df7cda9 100644
--- a/ISSUE_TEMPLATE.md
+++ b/ISSUE_TEMPLATE.md
@@ -4,7 +4,7 @@ https://stackoverflow.com/questions/tagged/tensorflow
If you open a GitHub issue, here is our policy:
-1. It must be a bug or a feature request.
+1. It must be a bug, a feature request, or a significant problem with documentation (for small docs fixes please send a PR instead).
2. The form below must be filled out.
3. It shouldn't be a TensorBoard issue. Those go [here](https://github.com/tensorflow/tensorboard/issues).
diff --git a/README.md b/README.md
index 0c93813e58..916e5200b2 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@
| **`Linux CPU`** | **`Linux GPU`** | **`Mac OS CPU`** | **`Windows CPU`** | **`Android`** |
|-----------------|---------------------|------------------|-------------------|---------------|
-| [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-cpu)](https://ci.tensorflow.org/job/tensorflow-master-cpu) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-linux-gpu)](https://ci.tensorflow.org/job/tensorflow-master-linux-gpu) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-mac)](https://ci.tensorflow.org/job/tensorflow-master-mac) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-win-cmake-py)](https://ci.tensorflow.org/job/tensorflow-master-win-cmake-py) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-android)](https://ci.tensorflow.org/job/tensorflow-master-android) |
+| [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-cpu)](https://ci.tensorflow.org/job/tensorflow-master-cpu) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-linux-gpu)](https://ci.tensorflow.org/job/tensorflow-master-linux-gpu) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-mac)](https://ci.tensorflow.org/job/tensorflow-master-mac) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-win-cmake-py)](https://ci.tensorflow.org/job/tensorflow-master-win-cmake-py) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-android)](https://ci.tensorflow.org/job/tensorflow-master-android) [ ![Download](https://api.bintray.com/packages/google/tensorflow/tensorflow/images/download.svg) ](https://bintray.com/google/tensorflow/tensorflow/_latestVersion) |
**TensorFlow** is an open source software library for numerical computation using
data flow graphs. The graph nodes represent mathematical operations, while
@@ -27,7 +27,7 @@ guidelines](CONTRIBUTING.md). This project adheres to TensorFlow's
uphold this code.**
**We use [GitHub issues](https://github.com/tensorflow/tensorflow/issues) for
-tracking requests and bugs. So please see
+tracking requests and bugs. So please see
[TensorFlow Discuss](https://groups.google.com/a/tensorflow.org/forum/#!forum/discuss) for general questions
and discussion, and please direct specific questions to [Stack Overflow](https://stackoverflow.com/questions/tagged/tensorflow).**
diff --git a/RELEASE.md b/RELEASE.md
index fdf10407fd..b11b1e40db 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,18 +1,39 @@
# Release 1.5.0
## Breaking Changes
-* Prebuilt binaries are now built against CUDA 9 and cuDNN 7.
+* Prebuilt binaries are now built against CUDA 9.0 and cuDNN 7.
* Our Linux binaries are built using ubuntu 16 containers, potentially
introducing glibc incompatibility issues with ubuntu 14.
* Starting from 1.6 release, our prebuilt binaries will use AVX instructions.
This may break TF on older CPUs.
+## Known Bugs
+* Using XLA:GPU with CUDA 9 and CUDA 9.1 results in garbage results and/or
+ `CUDA_ILLEGAL_ADDRESS` failures.
+
+ Google discovered in mid-December 2017 that the PTX-to-SASS compiler in CUDA 9
+ and CUDA 9.1 sometimes does not properly compute the carry bit when
+ decomposing 64-bit address calculations with large offsets (e.g. `load [x +
+ large_constant]`) into 32-bit arithmetic in SASS.
+
+ As a result, these versions of `ptxas` miscompile most XLA programs which use
+ more than 4GB of temp memory. This results in garbage results and/or
+ `CUDA_ERROR_ILLEGAL_ADDRESS` failures.
+
+ A fix in CUDA 9.1.121 is expected in late February 2018. We do not expect a
+ fix for CUDA 9.0.x. Until the fix is available, the only workaround is to
+ [downgrade](https://developer.nvidia.com/cuda-toolkit-archive) to CUDA 8.0.x
+ or disable XLA:GPU.
+
+ TensorFlow will print a warning if you use XLA:GPU with a known-bad version of
+ CUDA; see e00ba24c4038e7644da417ddc639169b6ea59122.
+
## Major Features And Improvements
* [Eager execution](https://github.com/tensorflow/tensorflow/tree/r1.5/tensorflow/contrib/eager)
preview version is now available.
* [TensorFlow Lite](https://github.com/tensorflow/tensorflow/tree/r1.5/tensorflow/contrib/lite)
dev preview is now available.
-* CUDA 9 and cuDNN 7 support.
+* CUDA 9.0 and cuDNN 7 support.
* Accelerated Linear Algebra (XLA):
* Add `complex64` support to XLA compiler.
* `bfloat` support is now added to XLA infrastructure.
@@ -523,7 +544,7 @@ answered questions, and were part of inspiring discussions.
* Fixed LIBXSMM integration.
* Make decode_jpeg/decode_png/decode_gif handle all formats, since users frequently try to decode an image as the wrong type.
* Improve implicit broadcasting lowering.
-* Improving stability of GCS/Bigquery clients by a faster retrying of stale transmissions.
+* Improving stability of GCS/BigQuery clients by a faster retrying of stale transmissions.
* Remove OpKernelConstruction::op_def() as part of minimizing proto dependencies.
* VectorLaplaceDiag distribution added.
* Android demo no longer requires libtensorflow_demo.so to run (libtensorflow_inference.so still required)
diff --git a/WORKSPACE b/WORKSPACE
index 7ae39374f1..1e38a9a8cd 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -41,12 +41,12 @@ load("//tensorflow:workspace.bzl", "tf_workspace")
tf_workspace()
new_http_archive(
- name = "inception5h",
+ name = "inception_v1",
build_file = "models.BUILD",
- sha256 = "d13569f6a98159de37e92e9c8ec4dae8f674fbf475f69fe6199b514f756d4364",
+ sha256 = "7efe12a8363f09bc24d7b7a450304a15655a57a7751929b2c1593a71183bb105",
urls = [
- "http://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip",
- "http://download.tensorflow.org/models/inception5h.zip",
+ "http://storage.googleapis.com/download.tensorflow.org/models/inception_v1.zip",
+ "http://download.tensorflow.org/models/inception_v1.zip",
],
)
diff --git a/configure.py b/configure.py
index 083fed1710..27519b4aba 100644
--- a/configure.py
+++ b/configure.py
@@ -298,7 +298,7 @@ def get_var(environ_cp,
System".
enabled_by_default: boolean for default behavior.
question: optional string for how to ask for user input.
- yes_reply: optionanl string for reply when feature is enabled.
+ yes_reply: optional string for reply when feature is enabled.
no_reply: optional string for reply when feature is disabled.
Returns:
@@ -411,7 +411,7 @@ def set_action_env_var(environ_cp,
System".
enabled_by_default: boolean for default behavior.
question: optional string for how to ask for user input.
- yes_reply: optionanl string for reply when feature is enabled.
+ yes_reply: optional string for reply when feature is enabled.
no_reply: optional string for reply when feature is disabled.
"""
var = int(
@@ -1354,6 +1354,7 @@ def main():
environ_cp['TF_NEED_GCP'] = '0'
environ_cp['TF_NEED_HDFS'] = '0'
environ_cp['TF_NEED_JEMALLOC'] = '0'
+ environ_cp['TF_NEED_KAFKA'] = '0'
environ_cp['TF_NEED_OPENCL_SYCL'] = '0'
environ_cp['TF_NEED_COMPUTECPP'] = '0'
environ_cp['TF_NEED_OPENCL'] = '0'
@@ -1372,6 +1373,8 @@ def main():
'with_hdfs_support', True, 'hdfs')
set_build_var(environ_cp, 'TF_NEED_S3', 'Amazon S3 File System',
'with_s3_support', True, 's3')
+ set_build_var(environ_cp, 'TF_NEED_KAFKA', 'Apache Kafka Platform',
+ 'with_kafka_support', False, 'kafka')
set_build_var(environ_cp, 'TF_ENABLE_XLA', 'XLA JIT', 'with_xla_support',
False, 'xla')
set_build_var(environ_cp, 'TF_NEED_GDR', 'GDR', 'with_gdr_support',
diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index e89667cbfd..a73e89bc1a 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -211,6 +211,12 @@ config_setting(
visibility = ["//visibility:public"],
)
+config_setting(
+ name = "with_kafka_support",
+ define_values = {"with_kafka_support": "true"},
+ visibility = ["//visibility:public"],
+)
+
# Crosses between platforms and file system libraries not supported on those
# platforms due to limitations in nested select() statements.
config_setting(
diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD
index c9ade5fb83..9060c19e9d 100644
--- a/tensorflow/cc/BUILD
+++ b/tensorflow/cc/BUILD
@@ -433,6 +433,7 @@ tf_gen_op_wrappers_cc(
"linalg_ops",
"logging_ops",
"lookup_ops",
+ "manip_ops",
"math_ops",
"nn_ops",
"no_op",
diff --git a/tensorflow/cc/tools/freeze_saved_model_test.cc b/tensorflow/cc/tools/freeze_saved_model_test.cc
index 57244a4f0a..52a81a5028 100644
--- a/tensorflow/cc/tools/freeze_saved_model_test.cc
+++ b/tensorflow/cc/tools/freeze_saved_model_test.cc
@@ -71,7 +71,7 @@ class FreezeTest : public ::testing::Test {
return Status::OK();
}
- // Adds `graph_def` to `saved_model_bundle` and intializes a session with
+ // Adds `graph_def` to `saved_model_bundle` and initializes a session with
// `init_node`.
Status AddGraphDefToSavedModelBundle(const GraphDef& graph_def,
const string& init_node,
diff --git a/tensorflow/compiler/aot/BUILD b/tensorflow/compiler/aot/BUILD
index 0540260efd..bc46918df9 100644
--- a/tensorflow/compiler/aot/BUILD
+++ b/tensorflow/compiler/aot/BUILD
@@ -132,7 +132,10 @@ tf_library(
config = "test_graph_tfadd.config.pbtxt",
cpp_class = "AddComp",
graph = "test_graph_tfadd.pbtxt",
- tags = ["manual"],
+ tags = [
+ "manual",
+ "notap",
+ ],
)
# A test of tf_library that includes a graph with an unknown op, but where
@@ -143,7 +146,10 @@ tf_library(
config = "test_graph_tfunknownop.config.pbtxt",
cpp_class = "UnknownOpAddComp",
graph = "test_graph_tfunknownop.pbtxt",
- tags = ["manual"],
+ tags = [
+ "manual",
+ "notap",
+ ],
)
# A test of tf_library that includes a graph with an unknown op, but where
@@ -155,7 +161,10 @@ tf_library(
config = "test_graph_tfunknownop2.config.pbtxt",
cpp_class = "UnknownOpAddComp",
graph = "test_graph_tfunknownop.pbtxt",
- tags = ["manual"],
+ tags = [
+ "manual",
+ "notap",
+ ],
)
# A test of tf_library that includes a graph with an unknown op, but where
@@ -166,7 +175,10 @@ tf_library(
config = "test_graph_tfunknownop3.config.pbtxt",
cpp_class = "UnknownOpAddComp",
graph = "test_graph_tfunknownop.pbtxt",
- tags = ["manual"],
+ tags = [
+ "manual",
+ "notap",
+ ],
)
# Utility library for benchmark binaries, used by the *_benchmark rules that are
diff --git a/tensorflow/compiler/aot/tests/BUILD b/tensorflow/compiler/aot/tests/BUILD
index 7dfd49cc3b..43d8ae4108 100644
--- a/tensorflow/compiler/aot/tests/BUILD
+++ b/tensorflow/compiler/aot/tests/BUILD
@@ -74,7 +74,10 @@ tf_library(
# compile but the others in this directory succeed, you may need to
# expand the "required by all tf_library targets" list in tfcompile.bzl.
include_standard_runtime_deps = False,
- tags = ["manual"],
+ tags = [
+ "manual",
+ "notap",
+ ],
)
tf_library(
@@ -84,7 +87,10 @@ tf_library(
cpp_class = "AddWithCkptComp",
freeze_checkpoint = "test_graph_tfadd_with_ckpt.ckpt",
graph = "test_graph_tfadd_with_ckpt.pb",
- tags = ["manual"],
+ tags = [
+ "manual",
+ "notap",
+ ],
)
tf_library(
@@ -95,7 +101,10 @@ tf_library(
freeze_checkpoint = "test_graph_tfadd_with_ckpt_saver.ckpt",
freeze_saver = "test_graph_tfadd_with_ckpt_saver.saver",
graph = "test_graph_tfadd_with_ckpt_saver.pb",
- tags = ["manual"],
+ tags = [
+ "manual",
+ "notap",
+ ],
)
tf_library(
@@ -104,7 +113,10 @@ tf_library(
config = "test_graph_tffunction.config.pbtxt",
cpp_class = "FunctionComp",
graph = "test_graph_tffunction.pb",
- tags = ["manual"],
+ tags = [
+ "manual",
+ "notap",
+ ],
)
tf_library(
@@ -113,7 +125,10 @@ tf_library(
config = "test_graph_tfgather.config.pbtxt",
cpp_class = "GatherComp",
graph = "test_graph_tfgather.pb",
- tags = ["manual"],
+ tags = [
+ "manual",
+ "notap",
+ ],
)
tf_library(
@@ -122,7 +137,10 @@ tf_library(
config = "test_graph_tfmatmul.config.pbtxt",
cpp_class = "foo::bar::MatMulComp",
graph = "test_graph_tfmatmul.pb",
- tags = ["manual"],
+ tags = [
+ "manual",
+ "notap",
+ ],
)
tf_library(
@@ -131,7 +149,10 @@ tf_library(
config = "test_graph_tfmatmulandadd.config.pbtxt",
cpp_class = "MatMulAndAddComp",
graph = "test_graph_tfmatmulandadd.pb",
- tags = ["manual"],
+ tags = [
+ "manual",
+ "notap",
+ ],
tfcompile_flags = "--gen_name_to_index --gen_program_shape",
)
@@ -141,13 +162,19 @@ tf_library(
config = "test_graph_tfsplits.config.pbtxt",
cpp_class = "SplitsComp",
graph = "test_graph_tfsplits.pb",
- tags = ["manual"],
+ tags = [
+ "manual",
+ "notap",
+ ],
)
tf_cc_test(
name = "tfcompile_test",
srcs = ["tfcompile_test.cc"],
- tags = ["manual"],
+ tags = [
+ "manual",
+ "notap",
+ ],
deps = [
":test_graph_tfadd",
":test_graph_tfadd_with_ckpt",
diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py
index 9d34cdfe10..30a6d3a74d 100644
--- a/tensorflow/compiler/tests/binary_ops_test.py
+++ b/tensorflow/compiler/tests/binary_ops_test.py
@@ -774,15 +774,15 @@ class BinaryOpsTest(XLATestCase):
def DISABLED_testSparseMatMul(self):
# Binary wrappers for sparse_matmul with different hints
def SparseMatmulWrapperTF(a, b):
- return tf.sparse_matmul(a, b, a_is_sparse=True)
+ return math_ops.sparse_matmul(a, b, a_is_sparse=True)
def SparseMatmulWrapperFT(a, b):
- return tf.sparse_matmul(a, b, b_is_sparse=True)
+ return math_ops.sparse_matmul(a, b, b_is_sparse=True)
def SparseMatmulWrapperTT(a, b):
- return tf.sparse_matmul(a, b, a_is_sparse=True, b_is_sparse=True)
+ return math_ops.sparse_matmul(a, b, a_is_sparse=True, b_is_sparse=True)
- self._testMatMul(tf.sparse_matmul)
+ self._testMatMul(math_ops.sparse_matmul)
self._testMatMul(SparseMatmulWrapperTF)
self._testMatMul(SparseMatmulWrapperFT)
self._testMatMul(SparseMatmulWrapperTT)
diff --git a/tensorflow/compiler/tf2xla/kernels/pooling_ops.cc b/tensorflow/compiler/tf2xla/kernels/pooling_ops.cc
index 2ba572fd0e..d4fb5dd4e0 100644
--- a/tensorflow/compiler/tf2xla/kernels/pooling_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/pooling_ops.cc
@@ -38,8 +38,22 @@ class PoolingOp : public XlaOpKernel {
PoolingOp(OpKernelConstruction* ctx, int num_spatial_dims)
: XlaOpKernel(ctx), num_spatial_dims_(num_spatial_dims) {
if (ctx->num_inputs() == 1) {
- OP_REQUIRES_OK(ctx, ctx->GetAttr("ksize", &ksize_));
- OP_REQUIRES_OK(ctx, ctx->GetAttr("strides", &stride_));
+ std::vector<int32> ksize_int;
+ std::vector<int32> stride_int;
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("ksize", &ksize_int));
+ OP_REQUIRES(ctx, ksize_int.size() == num_dims(),
+ errors::InvalidArgument("Sliding window ksize field must "
+ "specify ",
+ num_dims(), " dimensions"));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("strides", &stride_int));
+ OP_REQUIRES(ctx, stride_int.size() == num_dims(),
+ errors::InvalidArgument("Sliding window stride field must "
+ "specify ",
+ num_dims(), " dimensions"));
+ for (int i = 0; i < num_dims(); ++i) {
+ ksize_.push_back(ksize_int[i]);
+ stride_.push_back(stride_int[i]);
+ }
}
Padding padding;
OP_REQUIRES_OK(ctx, ctx->GetAttr("padding", &padding));
@@ -65,28 +79,33 @@ class PoolingOp : public XlaOpKernel {
xla::ComputationDataHandle input = ctx->Input(0);
const TensorShape input_shape = ctx->InputShape(0);
+ std::vector<int64> ksize = ksize_;
+ std::vector<int64> stride = stride_;
if (ctx->num_inputs() != 1) {
const TensorShape ksize_shape = ctx->InputShape(1);
+ // Validate input sizes.
OP_REQUIRES(ctx, TensorShapeUtils::IsVector(ksize_shape),
errors::InvalidArgument("ksize must be a vector, not shape ",
ksize_shape.DebugString()));
- OP_REQUIRES_OK(ctx, ctx->ConstantInputAsIntVector(1, &ksize_));
+ OP_REQUIRES(ctx, ksize_shape.num_elements() == num_dims(),
+ errors::InvalidArgument("Sliding window ksize field must "
+ "specify ",
+ num_dims(), " dimensions"));
+ ksize.clear();
+ OP_REQUIRES_OK(ctx, ctx->ConstantInputAsIntVector(1, &ksize));
const TensorShape stride_shape = ctx->InputShape(2);
+ // Validate input sizes.
OP_REQUIRES(ctx, TensorShapeUtils::IsVector(stride_shape),
errors::InvalidArgument("stride must be a vector, not shape ",
stride_shape.DebugString()));
- OP_REQUIRES_OK(ctx, ctx->ConstantInputAsIntVector(2, &stride_));
+ OP_REQUIRES(ctx, stride_shape.num_elements() == num_dims(),
+ errors::InvalidArgument("Sliding window stride field must "
+ "specify ",
+ num_dims(), " dimensions"));
+ stride.clear();
+ OP_REQUIRES_OK(ctx, ctx->ConstantInputAsIntVector(2, &stride));
}
-
- OP_REQUIRES(ctx, ksize_.size() == num_dims(),
- errors::InvalidArgument("Sliding window ksize field must "
- "specify ",
- num_dims(), " dimensions"));
- OP_REQUIRES(ctx, stride_.size() == num_dims(),
- errors::InvalidArgument("Sliding window stride field must "
- "specify ",
- num_dims(), " dimensions"));
OP_REQUIRES(ctx, input_shape.dims() == num_dims(),
errors::InvalidArgument("Input to ", type_string(),
" operator must have ", num_dims(),
@@ -94,8 +113,8 @@ class PoolingOp : public XlaOpKernel {
const DataType type = input_type(0);
xla::ComputationDataHandle pooled = ctx->builder()->ReduceWindow(
- input, InitValue(ctx->builder(), type), *Reduction(ctx, type), ksize_,
- stride_, padding_);
+ input, InitValue(ctx->builder(), type), *Reduction(ctx, type), ksize,
+ stride, padding_);
ctx->SetOutput(0, PostProcessOutput(ctx, pooled, type, input_shape));
}
diff --git a/tensorflow/compiler/xla/client/computation_builder.h b/tensorflow/compiler/xla/client/computation_builder.h
index d82ba63e8a..ea4cdb7667 100644
--- a/tensorflow/compiler/xla/client/computation_builder.h
+++ b/tensorflow/compiler/xla/client/computation_builder.h
@@ -67,7 +67,7 @@ class ComputationBuilder {
// OpMetadata is often applied to a series of XLA HLO instructions. As a
// result, OpMetadata is set on the Computation Builder. All subsequent
// instructions generated via this Computation Builder will have the same
- // OpMetadata attached until a call to ClearOpMetdata.
+ // OpMetadata attached until a call to ClearOpMetadata.
void SetOpMetadata(const OpMetadata& metadata) { metadata_ = metadata; }
// Clears the HloMetadata state.
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
index 42e7f91f26..d9c4d094b8 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
@@ -2173,7 +2173,7 @@ bool HloParser::ParseConvolutionDimensionNumbers(
//
// {[2:3:4], [5:6:7], [8:9]}
//
-// The the parsed result will be:
+// The parsed result will be:
//
// {/*starts=*/{2, 5, 8}, /*limits=*/{3, 6, 9}, /*strides=*/{4, 7, 1}}
//
diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD
index 0451f00629..3ed8cef56c 100644
--- a/tensorflow/contrib/BUILD
+++ b/tensorflow/contrib/BUILD
@@ -50,6 +50,7 @@ py_library(
"//tensorflow/contrib/image:single_image_random_dot_stereograms_py",
"//tensorflow/contrib/input_pipeline:input_pipeline_py",
"//tensorflow/contrib/integrate:integrate_py",
+ "//tensorflow/contrib/kafka",
"//tensorflow/contrib/keras",
"//tensorflow/contrib/kernel_methods",
"//tensorflow/contrib/kfac",
@@ -142,6 +143,7 @@ cc_library(
"//tensorflow/contrib/factorization:all_ops",
"//tensorflow/contrib/framework:all_ops",
"//tensorflow/contrib/input_pipeline:input_pipeline_ops_op_lib",
+ "//tensorflow/contrib/kafka:kafka_ops_op_lib",
"//tensorflow/contrib/layers:sparse_feature_cross_op_op_lib",
"//tensorflow/contrib/nccl:nccl_ops_op_lib",
"//tensorflow/contrib/nearest_neighbor:nearest_neighbor_ops_op_lib",
diff --git a/tensorflow/contrib/android/java/org/tensorflow/contrib/android/TensorFlowInferenceInterface.java b/tensorflow/contrib/android/java/org/tensorflow/contrib/android/TensorFlowInferenceInterface.java
index dc5b9fb887..abddadac5b 100644
--- a/tensorflow/contrib/android/java/org/tensorflow/contrib/android/TensorFlowInferenceInterface.java
+++ b/tensorflow/contrib/android/java/org/tensorflow/contrib/android/TensorFlowInferenceInterface.java
@@ -194,6 +194,11 @@ public class TensorFlowInferenceInterface {
* @param outputNames A list of output nodes which should be filled by the inference pass.
*/
public void run(String[] outputNames, boolean enableStats) {
+ run(outputNames, enableStats, new String[] {});
+ }
+
+ /** An overloaded version of runInference that allows supplying targetNodeNames as well */
+ public void run(String[] outputNames, boolean enableStats, String[] targetNodeNames) {
// Release any Tensors from the previous run calls.
closeFetches();
@@ -204,6 +209,11 @@ public class TensorFlowInferenceInterface {
runner.fetch(tid.name, tid.outputIndex);
}
+ // Add targets.
+ for (String t : targetNodeNames) {
+ runner.addTarget(t);
+ }
+
// Run the session.
try {
if (enableStats) {
diff --git a/tensorflow/contrib/cmake/python_modules.txt b/tensorflow/contrib/cmake/python_modules.txt
index ad8c995eef..57a52bf4ca 100644
--- a/tensorflow/contrib/cmake/python_modules.txt
+++ b/tensorflow/contrib/cmake/python_modules.txt
@@ -6,6 +6,7 @@ tensorflow/core/example
tensorflow/core/framework
tensorflow/core/lib
tensorflow/core/lib/core
+tensorflow/core/profiler
tensorflow/core/protobuf
tensorflow/core/util
tensorflow/examples
@@ -219,6 +220,8 @@ tensorflow/contrib/input_pipeline/python/ops
tensorflow/contrib/integrate
tensorflow/contrib/integrate/python
tensorflow/contrib/integrate/python/ops
+tensorflow/contrib/kafka/python
+tensorflow/contrib/kafka/python/ops
tensorflow/contrib/keras
tensorflow/contrib/keras/api
tensorflow/contrib/keras/api/keras
diff --git a/tensorflow/contrib/cmake/tf_core_ops.cmake b/tensorflow/contrib/cmake/tf_core_ops.cmake
index 138993db35..c42bc35ce7 100644
--- a/tensorflow/contrib/cmake/tf_core_ops.cmake
+++ b/tensorflow/contrib/cmake/tf_core_ops.cmake
@@ -30,6 +30,7 @@ set(tf_op_lib_names
"list_ops"
"lookup_ops"
"logging_ops"
+ "manip_ops"
"math_ops"
"nn_ops"
"no_op"
diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake
index 294b9c5941..34c466fa01 100755
--- a/tensorflow/contrib/cmake/tf_python.cmake
+++ b/tensorflow/contrib/cmake/tf_python.cmake
@@ -335,6 +335,7 @@ GENERATE_PYTHON_OP_LIB("list_ops")
GENERATE_PYTHON_OP_LIB("logging_ops")
GENERATE_PYTHON_OP_LIB("lookup_ops")
GENERATE_PYTHON_OP_LIB("nn_ops")
+GENERATE_PYTHON_OP_LIB("manip_ops")
GENERATE_PYTHON_OP_LIB("parsing_ops")
GENERATE_PYTHON_OP_LIB("random_ops")
GENERATE_PYTHON_OP_LIB("remote_fused_graph_ops"
diff --git a/tensorflow/contrib/cmake/tools/create_def_file.py b/tensorflow/contrib/cmake/tools/create_def_file.py
index f67698eb99..53c2285699 100644
--- a/tensorflow/contrib/cmake/tools/create_def_file.py
+++ b/tensorflow/contrib/cmake/tools/create_def_file.py
@@ -31,7 +31,7 @@ from __future__ import division
from __future__ import print_function
import argparse
-import io
+import codecs
import os
import re
import subprocess
@@ -103,7 +103,7 @@ def main():
for lib_path in args.input:
proc = subprocess.Popen([DUMPBIN, "/nologo", "/linkermember:1", lib_path],
stdout=subprocess.PIPE)
- for line in io.TextIOWrapper(proc.stdout, encoding="utf-8"):
+ for line in codecs.getreader("utf-8")(proc.stdout):
cols = line.split()
if len(cols) < 2:
continue
@@ -131,7 +131,7 @@ def main():
# We compare on undname but use the decorated name from candidates.
dupes = 0
proc = subprocess.Popen([UNDNAME, tmpfile.name], stdout=subprocess.PIPE)
- for idx, line in enumerate(io.TextIOWrapper(proc.stdout, encoding="utf-8")):
+ for idx, line in enumerate(codecs.getreader("utf-8")(proc.stdout)):
decorated = candidates[idx]
if decorated in taken:
# Symbol is already in output, done.
diff --git a/tensorflow/contrib/coder/README.md b/tensorflow/contrib/coder/README.md
index e1e867db5a..c6c379c458 100644
--- a/tensorflow/contrib/coder/README.md
+++ b/tensorflow/contrib/coder/README.md
@@ -30,7 +30,7 @@ following sense:
around,
- The number of CDF axes does not extend, i.e., `CDF.ndim == data.ndim + 1`.
-In the previous example where data has shape (10, 10), the followings are
+In the previous example where data has shape (10, 10), the following are
acceptable CDF shapes:
- (10, 10, 65)
diff --git a/tensorflow/contrib/coder/kernels/range_coder.cc b/tensorflow/contrib/coder/kernels/range_coder.cc
index f4f076b6c4..21b35155ff 100644
--- a/tensorflow/contrib/coder/kernels/range_coder.cc
+++ b/tensorflow/contrib/coder/kernels/range_coder.cc
@@ -276,7 +276,7 @@ void RangeEncoder::Finalize(string* sink) {
}
} else if (base_ != 0) {
// If base == 0, then pick 0 from [base, base + size) and no zeros are
- // explcitly written.
+ // explicitly written.
//
// Otherwise, pick (base + (2^16 - base[16:0])), i.e., round up base to the
// next multiple of 2^16. As 2^16 < size, this value should be in the
diff --git a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_benchmark.py b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_benchmark.py
index 4fc5ff1bd1..933df6d71d 100644
--- a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_benchmark.py
+++ b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_benchmark.py
@@ -20,6 +20,7 @@ from __future__ import print_function
import time
+from six.moves import xrange # pylint: disable=redefined-builtin
from tensorflow.contrib import rnn as contrib_rnn
from tensorflow.contrib.cudnn_rnn.python.ops import cudnn_rnn_ops
from tensorflow.contrib.rnn.python.ops import lstm_ops
diff --git a/tensorflow/contrib/eager/python/evaluator.py b/tensorflow/contrib/eager/python/evaluator.py
index 3faaeef590..68e7b5421f 100644
--- a/tensorflow/contrib/eager/python/evaluator.py
+++ b/tensorflow/contrib/eager/python/evaluator.py
@@ -178,7 +178,7 @@ class Evaluator(object):
call_op: An op that updates evaluation state on a mini-batch of examples.
Must generate an tf.errors.OutOfRangeError when done.
results_op: A dictionary of tensors that compute the final evaluation
- results from the evaulation state.
+ results from the evaluation state.
sess: The Session to run the evaluation in. Defaults to the default
Session.
diff --git a/tensorflow/contrib/eager/python/examples/resnet50/README.md b/tensorflow/contrib/eager/python/examples/resnet50/README.md
index db023e6c97..79e4600529 100644
--- a/tensorflow/contrib/eager/python/examples/resnet50/README.md
+++ b/tensorflow/contrib/eager/python/examples/resnet50/README.md
@@ -34,7 +34,7 @@ bazel run -c opt --config=cuda :resnet50_graph_test -- --benchmarks=.
(Or remove the `--config=cuda` flag for running on CPU instead of GPU).
-On October 31, 2017, the benchmarks demostrated comparable performance
+On October 31, 2017, the benchmarks demonstrated comparable performance
for eager and graph execution of this particular model when using
a single NVIDIA Titan X (Pascal) GPU on a host with an
Intel Xeon E5-1650 CPU @ 3.50GHz and a batch size of 32.
diff --git a/tensorflow/contrib/eager/python/examples/resnet50/resnet50.py b/tensorflow/contrib/eager/python/examples/resnet50/resnet50.py
index b302a87e0e..9982fdb07e 100644
--- a/tensorflow/contrib/eager/python/examples/resnet50/resnet50.py
+++ b/tensorflow/contrib/eager/python/examples/resnet50/resnet50.py
@@ -97,7 +97,7 @@ class _ConvBlock(tfe.Network):
Args:
kernel_size: the kernel size of middle conv layer at main path
- filters: list of integers, the filterss of 3 conv layer at main path
+ filters: list of integers, the filters of 3 conv layer at main path
stage: integer, current stage label, used for generating layer names
block: 'a','b'..., current block label, used for generating layer names
data_format: data_format for the input ('channels_first' or
diff --git a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py
index 76e06269b6..0ff8746884 100644
--- a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py
+++ b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py
@@ -22,6 +22,7 @@ import gc
import tempfile
import time
+from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow as tf
import tensorflow.contrib.eager as tfe
diff --git a/tensorflow/contrib/eager/python/examples/rnn_ptb/README.md b/tensorflow/contrib/eager/python/examples/rnn_ptb/README.md
index 743ebb68ee..966177e91c 100644
--- a/tensorflow/contrib/eager/python/examples/rnn_ptb/README.md
+++ b/tensorflow/contrib/eager/python/examples/rnn_ptb/README.md
@@ -40,7 +40,7 @@ bazel run -c opt --config=cuda :rnn_ptb_graph_test -- --benchmarks=.
(Or remove the `--config=cuda` flag for running on CPU instead of GPU).
-On October 31, 2017, the benchmarks demostrated slightly better performance
+On October 31, 2017, the benchmarks demonstrated slightly better performance
(3-6%) for graph execution over eager execution for this particular model when
using a single NVIDIA Titan X (Pascal) GPU on a host with an Intel Xeon E5-1650
CPU @ 3.50GHz and a batch size of 32.
diff --git a/tensorflow/contrib/eager/python/examples/rnn_ptb/rnn_ptb.py b/tensorflow/contrib/eager/python/examples/rnn_ptb/rnn_ptb.py
index 7b9637a9d5..5c5c59c877 100644
--- a/tensorflow/contrib/eager/python/examples/rnn_ptb/rnn_ptb.py
+++ b/tensorflow/contrib/eager/python/examples/rnn_ptb/rnn_ptb.py
@@ -88,7 +88,7 @@ class Embedding(tf.layers.Layer):
class PTBModel(tfe.Network):
- """LSTM for word language modelling.
+ """LSTM for word language modeling.
Model described in:
(Zaremba, et. al.) Recurrent Neural Network Regularization
@@ -339,8 +339,7 @@ if __name__ == "__main__":
"http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz")
parser.add_argument(
"--logdir", type=str, default="", help="Directory for checkpoint.")
- parser.add_argument(
- "--epoch", type=int, default=20, help="Number of epoches.")
+ parser.add_argument("--epoch", type=int, default=20, help="Number of epochs.")
parser.add_argument("--batch-size", type=int, default=20, help="Batch size.")
parser.add_argument(
"--seq-len", type=int, default=35, help="Sequence length.")
diff --git a/tensorflow/contrib/eager/python/examples/spinn/data.py b/tensorflow/contrib/eager/python/examples/spinn/data.py
index a6e046320f..fcaae0a4f8 100644
--- a/tensorflow/contrib/eager/python/examples/spinn/data.py
+++ b/tensorflow/contrib/eager/python/examples/spinn/data.py
@@ -51,11 +51,11 @@ def get_non_parenthesis_words(items):
"""Get the non-parenthesis items from a SNLI parsed sentence.
Args:
- items: Data items from a parsed SNLI setence, with parentheses. E.g.,
+ items: Data items from a parsed SNLI sentence, with parentheses. E.g.,
["(", "Man", "(", "(", "(", "(", "(", "wearing", "pass", ")", ...
Returns:
- A list of non-parenthis word items, all converted to lower case. E.g.,
+ A list of non-parentheses word items, all converted to lower case. E.g.,
["man", "wearing", "pass", ...
"""
return [x.lower() for x in items if x not in PARENTHESES and x]
@@ -201,7 +201,7 @@ def load_word_vectors(data_root, vocab):
def calculate_bins(length2count, min_bin_size):
- """Cacluate bin boundaries given a histogram of lengths and mininum bin size.
+ """Calculate bin boundaries given a histogram of lengths and minimum bin size.
Args:
length2count: A `dict` mapping length to sentence count.
@@ -335,9 +335,9 @@ class SnliData(object):
# The sorting above and the batching here makes sure that sentences of
# similar max lengths are batched together, minimizing the inefficiency
# due to uneven max lengths. The sentences are batched differently in
- # each call to get_generator() due to the shuffling before sotring
+ # each call to get_generator() due to the shuffling before sorting
# above. The pad_and_reverse_word_ids() and pad_transitions() functions
- # take care of any remaning unevenness of the max sentence lengths.
+ # take care of any remaining unevenness of the max sentence lengths.
end = min(begin + batch_size, len(labels))
# Transpose, because the SPINN model requires time-major, instead of
# batch-major.
diff --git a/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py b/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py
index 84e25cf81a..7b2f09cba1 100644
--- a/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py
+++ b/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py
@@ -26,6 +26,7 @@ import tempfile
import time
import numpy as np
+from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow as tf
# pylint: disable=g-bad-import-order
diff --git a/tensorflow/contrib/eager/python/network_test.py b/tensorflow/contrib/eager/python/network_test.py
index 8e6b947e5c..3329fc6c51 100644
--- a/tensorflow/contrib/eager/python/network_test.py
+++ b/tensorflow/contrib/eager/python/network_test.py
@@ -539,7 +539,7 @@ class NetworkTest(test.TestCase):
# No issue here since the name is unique within its scope.
name_conflict3 = MyNetwork(name="name_conflict")
net2 = MyNetwork() # name=outside_scope/my_network_2 to avoid the
- # variable_scope my_network_1 below.
+ # variable_scope my_network_1 below.
vs_name_conflict = MyNetwork(name="vs_name_conflict") # conflict below
with variable_scope.variable_scope("intervening_scope"):
with variable_scope.variable_scope(captured_scope):
@@ -688,7 +688,7 @@ class NetworkTest(test.TestCase):
net2(one)
# Layer names typically are globally unique rather than being unique within
# the scope of their first use. However, within a Network they must be named
- # locally so that previous Layer consutrciton does not interfere with
+ # locally so that previous Layer construction does not interfere with
# variable naming (e.g. add a Layer construction before the Network,
# suddenly your previously saved checkpoint is incompatible).
self.assertEqual("dense", net1.l1.name)
diff --git a/tensorflow/contrib/eager/python/saver.py b/tensorflow/contrib/eager/python/saver.py
index 57b070ec6e..62421849c7 100644
--- a/tensorflow/contrib/eager/python/saver.py
+++ b/tensorflow/contrib/eager/python/saver.py
@@ -82,7 +82,7 @@ def restore_variables_on_create(save_path, map_func=None):
map_func_wrapper = lambda self, x: x
else:
if not callable(map_func):
- raise ValueError("map_func must be callaled.")
+ raise ValueError("map_func must be callable.")
map_func_wrapper = lambda self, x: map_func(x)
ckpt_var_cache = dict()
diff --git a/tensorflow/contrib/ffmpeg/decode_video_op.cc b/tensorflow/contrib/ffmpeg/decode_video_op.cc
index d44032968d..6f8ad486d1 100644
--- a/tensorflow/contrib/ffmpeg/decode_video_op.cc
+++ b/tensorflow/contrib/ffmpeg/decode_video_op.cc
@@ -102,16 +102,12 @@ REGISTER_OP("DecodeVideo")
return Status::OK();
})
.Doc(R"doc(
-Processes the contents of an audio file into a tensor using FFmpeg to decode
+Processes the contents of an video file into a tensor using FFmpeg to decode
the file.
-One row of the tensor is created for each channel in the audio file. Each
-channel contains audio samples starting at the beginning of the audio and
-having `1/samples_per_second` time between them. If the `channel_count` is
-different from the contents of the file, channels will be merged or created.
-
-contents: The binary audio file contents, as a string or rank-0 string
- tensor.
+contents: The binary contents of the video file to decode. This is a
+ scalar.
+output: A rank-4 `Tensor` that has `[frames, height, width, 3]` RGB as output.
)doc");
} // namespace ffmpeg
diff --git a/tensorflow/contrib/framework/python/ops/variables.py b/tensorflow/contrib/framework/python/ops/variables.py
index a9d47ac9b9..0754c3e0e3 100644
--- a/tensorflow/contrib/framework/python/ops/variables.py
+++ b/tensorflow/contrib/framework/python/ops/variables.py
@@ -25,6 +25,7 @@ import re
from tensorflow.contrib.framework.python.ops import add_arg_scope as contrib_add_arg_scope
from tensorflow.contrib.framework.python.ops import gen_variable_ops
from tensorflow.contrib.util import loader
+from tensorflow.core.protobuf import saver_pb2
from tensorflow.python import pywrap_tensorflow
from tensorflow.python.framework import device as tf_device
from tensorflow.python.framework import dtypes
@@ -684,7 +685,8 @@ def assign_from_checkpoint_fn(model_path, var_list, ignore_missing_vars=False,
'Variable %s missing in checkpoint %s', var, model_path)
var_list = available_vars
if var_list:
- saver = tf_saver.Saver(var_list, reshape=reshape_variables)
+ saver = tf_saver.Saver(var_list, reshape=reshape_variables,
+ write_version=saver_pb2.SaverDef.V1)
def callback(session):
saver.restore(session, model_path)
return callback
diff --git a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py
index 986a5ff6dc..fdfabd07c1 100644
--- a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py
+++ b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py
@@ -28,6 +28,7 @@ from __future__ import division
from __future__ import print_function
import functools
+import os
import sys
import tarfile
@@ -189,20 +190,34 @@ def get_graph_def_from_resource(filename):
return graph_pb2.GraphDef.FromString(resource_loader.load_resource(filename))
-def get_graph_def_from_url_tarball(url, filename):
- """Get a GraphDef proto from a tarball on the web."""
- def _progress(count, block_size, total_size):
- sys.stdout.write('\r>> Downloading %s %.1f%%' % (
- url, float(count * block_size) / float(total_size) * 100.0))
- sys.stdout.flush()
- tar_filename, _ = urllib.request.urlretrieve(url, reporthook=_progress)
+def get_graph_def_from_url_tarball(url, filename, tar_filename=None):
+ """Get a GraphDef proto from a tarball on the web.
+
+ Args:
+ url: Web address of tarball
+ filename: Filename of graph definition within tarball
+ tar_filename: Temporary download filename (None = always download)
+
+ Returns:
+ A GraphDef loaded from a file in the downloaded tarball.
+ """
+ if not (tar_filename and os.path.exists(tar_filename)):
+
+ def _progress(count, block_size, total_size):
+ sys.stdout.write('\r>> Downloading %s %.1f%%' %
+ (url,
+ float(count * block_size) / float(total_size) * 100.0))
+ sys.stdout.flush()
+
+ tar_filename, _ = urllib.request.urlretrieve(url, tar_filename, _progress)
with tarfile.open(tar_filename, 'r:gz') as tar:
proto_str = tar.extractfile(filename).read()
return graph_pb2.GraphDef.FromString(proto_str)
def _default_graph_def_fn():
- return get_graph_def_from_url_tarball(INCEPTION_URL, INCEPTION_FROZEN_GRAPH)
+ return get_graph_def_from_url_tarball(INCEPTION_URL, INCEPTION_FROZEN_GRAPH,
+ os.path.basename(INCEPTION_URL))
def run_inception(images,
diff --git a/tensorflow/contrib/gan/python/losses/python/losses_impl_test.py b/tensorflow/contrib/gan/python/losses/python/losses_impl_test.py
index 7d2a7a254f..56ac45554d 100644
--- a/tensorflow/contrib/gan/python/losses/python/losses_impl_test.py
+++ b/tensorflow/contrib/gan/python/losses/python/losses_impl_test.py
@@ -620,7 +620,7 @@ class CombineAdversarialLossTest(test.TestCase):
with self.test_session(use_gpu=True) as sess:
for _ in range(10): # spot check closeness on more than one sample.
gnorm_np, precond_gnorm_np = sess.run([gnorm, precond_gnorm])
- self.assertNear(gnorm_np, precond_gnorm_np, 1e-5)
+ self.assertNear(gnorm_np, precond_gnorm_np, 1e-4)
class CycleConsistencyLossTest(test.TestCase):
diff --git a/tensorflow/contrib/hvx/README.md b/tensorflow/contrib/hvx/README.md
index 5a6f2f3086..163993a3f6 100644
--- a/tensorflow/contrib/hvx/README.md
+++ b/tensorflow/contrib/hvx/README.md
@@ -1,60 +1,67 @@
# TensorFlow Runtime with HVX Acceleration
-## Description
+This README explain how to build and use the TensorFlow runtime with HVX Acceleration. HVX is an extension of Hexagon, a DSP provided by Qualcomm, which can compute vector calculations faster using less energy than ARM processors.
-This README explain how to build and use the TensorFlow Runtime with HVX Acceleration. HVX is an extension of Hexagon which is a DSP provided by qualcomm which can compute vector calculations faster using lower energy than ARM processors.
+## Dependencies
+
+* [Android SDK](https://developer.android.com/studio/index.html).
+* [Android NDK](https://developer.android.com/ndk/index.html). Save the path in `${NDK_ROOT}`.
+* A rooted Qualcomm-based Android device connected to the computer (preferably, a [Snapdragon Development Board](https://developer.qualcomm.com/hardware/additional-snapdragon), but it could be a rooted phone with a Qualcomm SoC, albeit this guide may not work with it). The device needs to be rooted for development and testing purposes, and shouldn't be needed in production. See [Behold, The Snapdragon MDP](https://developer.qualcomm.com/blog/behold-snapdragon-mdp) for more information.
+* [Hexagon SDK v3.0](https://developer.qualcomm.com/software/hexagon-dsp-sdk/tools). Save the path in `${QUALCOMM_SDK}`.
+* The current directory should be TensorFlow source code (`git clone https://github.com/tensorflow/tensorflow.git && cd tensorflow`), and saved into `${TF_ROOT_DIR}`.
+
+You may also need to add a test signature in the device to run HVX-based binaries. Follow the instructions in `${QUALCOMM_SDK}/docs/Tools_Signing.html`, using Python 2.
+
+Note that if the device is not rooted, you may not be able to get the serial number, push the test signature and/or run binary files that call HVX libraries.
## Quick Start Guide
-We provides several tools to build and run inference with this runtime quickly.
+We provide several tools to build and run inference with this runtime quickly.
-#### All-in-one script to run inception model with prebuild hexagon library
-If you don’t need to build your own implementation of hexagon HVX, we provide a shortcut to execute graphs by using pre-compiled binaries.
+### Run inception model with a prebuilt Hexagon library
+If you don’t need to build your own implementation of Hexagon HVX, we provide a shortcut to execute graphs by using pre-compiled binaries.
+
+```shell
+./tensorflow/contrib/makefile/samples/build_and_run_inception_hexagon.sh -p
```
-git clone https://github.com/tensorflow/tensorflow.git
-cd tensorflow
-NDK_ROOT="/path/to/ndk" ./tensorflow/contrib/makefile/build_all_android.sh -X
-```
-(-X downloads dependencies to hexagon HVX and graphs, and copy all dependencies to android and execute a test)
-#### All-in-one script to run inception model by building entire libraries from source code
- If you want to build your own implementation of hexagon HVX, we provide a sample all-in-one script to execute graphs which downloads source and build everything for hexagon.
+The `-p` option makes the script download dependencies (i.e., Hexagon HVX binaries and graphs models), copy them to the Android device and execute a test.
-```
-git clone https://github.com/tensorflow/tensorflow.git
-cd tensorflow
-QUALCOMM_SDK="/path/to/qualcomm/sdk" NDK_ROOT="/path/to/ndk" ./tensorflow/contrib/makefile/samples/build_and_run_inception_hexagon.sh
+### Run inception model by building all from the source code
+
+If you want to build your own implementation of Hexagon HVX, we provide a sample all-in-one script to execute graphs which downloads the source and builds everything that's necessary.
+
+```shell
+./tensorflow/contrib/makefile/samples/build_and_run_inception_hexagon.sh
```
## Building libraries
If you've finished walking through the quick start guide, you may want to try building each binary manually.
-#### Build libhexagon_nn_skel.so
-Download hexagon nn library from codeaurora.org and build it.
+### Build libhexagon\_nn\_skel.so
-```
+Download Hexagon NN library from codeaurora.org and build it.
+
+```shell
git clone https://source.codeaurora.org/quic/hexagon_nn/nnlib
cd nnlib
```
-(Just follow instructions in README.HOW_TO_BUILD. You can find libhexagon_nn_skel.so in hexagon_Release_dynamic_toolv72_v60/ship)
-Then copy the generated binary to GEN_LIBS_DIR
+Just follow the instructions in `README.HOW_TO_BUILD`. You can find the file `libhexagon_nn_skel.so` in `hexagon_Release_dynamic_toolv72_v60/ship`.
+Then copy the generated binary to `${GEN_LIBS_DIR}`.
-```
+```shell
GEN_LIBS_DIR="/path/to/a/dir/to/store/hexagon/libraries"
cp -v "hexagon_Release_dynamic_toolv72_v60/ship/libhexagon_nn_skel.so" "${GEN_LIBS_DIR}"
```
-#### Build libhexagon_controller.so
+### Build libhexagon\_controller.so
+
Download tensorflow and build hexagon controller.
-```
-git clone https://github.com/tensorflow/tensorflow.git
-cd tensorflow
-TF_ROOT_DIR="$(pwd)"
-QUALCOMM_SDK="/path/to/qualcomm/sdk"
+```shell
GENERATED_NNLIB_DIRECTORY="/path/to/nnlib"
GENERATED_HEXAGON_CONTROLLER_DIRECTORY="${QUALCOMM_SDK}/examples/common/generated_hexagon_controller"
rm -rf "${GENERATED_HEXAGON_CONTROLLER_DIRECTORY}"
@@ -70,12 +77,12 @@ make tree VERBOSE=1 V=android_Release
cp -v "${GENERATED_HEXAGON_CONTROLLER_DIRECTORY}/android_Release/ship/libhexagon_controller.so" "${GEN_LIBS_DIR}"
```
-#### Build tensorflow linking hexagon library
-Build tensorflow with the build_all_android.sh with specifying -x option.
+### Build TensorFlow linking Hexagon library
-```
+Build TensorFlow with `build_all_android.sh` specifying the `-x` option.
+
+```shell
BUILD_ALL_ANDROID_PATH="${TF_ROOT_DIR}/tensorflow/contrib/makefile/build_all_android.sh"
-NDK_ROOT="/path/to/ndk/root"
CC_PREFIX=${CC_PREFIX} NDK_ROOT=${NDK_ROOT} "${BUILD_ALL_ANDROID_PATH}" \
-x "${GEN_LIBS_DIR}" \
@@ -83,11 +90,11 @@ CC_PREFIX=${CC_PREFIX} NDK_ROOT=${NDK_ROOT} "${BUILD_ALL_ANDROID_PATH}" \
-t hexagon_graph_execution
```
-#### Push binaries to your Android device
+### Push binaries to your Android device
Before running tests on your Android device, you need to push several binaries to it.
-```
+```shell
adb push "${GEN_LIBS_DIR}/libhexagon_controller.so" "/data/local/tmp"
adb push "${GEN_LIBS_DIR}/libhexagon_nn_skel.so" "/vendor/lib/rfsa/adsp"
adb push -p \
@@ -100,40 +107,54 @@ adb shell chmod "${ANDROID_EXEC_FILE_MODE}" \
adb wait-for-device
```
-#### Run tests on the device
+### Run tests on the device
Finally, you can run the inference tests on your device.
-```
+```shell
adb shell 'LD_LIBRARY_PATH=/data/local/tmp:$LD_LIBRARY_PATH' \
"/data/local/tmp/hexagon_graph_execution"
```
-#### Troubleshooting
-If you're using the Open-Q 820 Snapdragon development kit, you may run into an issue with running the executable due to a missing testsig library. From the Hexagon SDK documentation: *Dynamic shared objects are required to be digitally signed and then authenticated at runtime before they are allowed to be loaded and executed.* Generating a testsig library is necessary to run the unsigned sample library built from this project.
+### Troubleshooting
+
+#### Testsig issue
+
+If you're using the Open-Q 820 Snapdragon Development Kit, you may run into an issue with running the executable due to a missing `testsig` library. From the Hexagon SDK documentation: *Dynamic shared objects are required to be digitally signed and then authenticated at runtime before they are allowed to be loaded and executed.* Generating a testsig library is necessary to run the unsigned sample library built from this project.
-If the lack of a testsig library is your problem, you will see errors of the type:
+If the lack of a `testsig` library is your problem, you will see errors of the type:
`vendor/qcom/proprietary/adsprpc/src/fastrpc_apps_user.c:169::error: -1: 0 == (nErr = remotectl_open(name, (int*)ph, dlerrstr, sizeof(dlerrstr), &dlerr))`
-appearing in adb logcat.
-
-There are several ways to create the testsig library, the only prerequisite is Python and the correct version of the Hexagon-SDK. The following steps is one way to create this library:
-1. Run adb as root: `adb root`
-2. Run the command `adb shell cat /sys/devices/soc0/serial_number`
-3. Convert the decimal number you get as output to hex
-4. Run the python script: `python ${QUALCOMM_SDK}/tools/elfsigner/elfsigner.py -t $(SERIAL_NUMBER_HEX_VALUE)`
-5. The output of the python script is a shared library stored in ${QUALCOMM_SDK}/tools/elfsigner/output/testsig-$(SERIAL_NUMBER_HEX_VALUE).so
-6. Push the shared library to your device:
+appearing in `adb logcat` or ["Expected: (version) >= (1), actual: 0 vs 1" while running a binary from adb](https://github.com/tensorflow/tensorflow/issues/11210).
+
+You need to add a test signature, as described at the beginning of this README. After rebooting your device, you should be able to run the sample application.
+
+#### Qualcomm SDK Linux installation fails with "Malformed \uxxxx encoding"
+
+The installation file is based on LaunchAnywhere, which fails in Linux if the `PS1` env variable contains non-common Unicode chars:
+
```
-adb root
-adb wait-for-device
-adb remount
-adb wait-for-device
-adb shell mkdir /system/lib/rfsa
-adb shell mkdir /system/lib/rfsa/adsp
-adb push ${QUALCOMM_SDK}/tools/elfsigner/output/testsig-$(SERIAL_NUMBER_HEX_VALUE).so /system/lib/rfsa/adsp/
+Preparing to install...
+Extracting the JRE from the installer archive...
+Unpacking the JRE...
+Extracting the installation resources from the installer archive...
+Configuring the installer for this system's environment...
+
+Launching installer...
+
+An internal LaunchAnywhere application error has occurred and this application cannot proceed. (LAX)
+
+Stack Trace:
+java.lang.IllegalArgumentException: Malformed \uxxxx encoding.
+ at java.util.Properties.loadConvert(Properties.java:574)
+ at java.util.Properties.load0(Properties.java:391)
+ at java.util.Properties.load(Properties.java:317)
+ at com.zerog.common.java.util.PropertiesUtil.loadProperties(Unknown Source)
+ at com.zerog.lax.LAX.<init>(Unknown Source)
+ at com.zerog.lax.LAX.main(Unknown Source)
```
-After rebooting your device, you should be able to run the sample application.
+It can be solved by temporarily assigning the `PS1` environment variable to something simple, such as '$'.
+
+## Maintainers
-Maintainers:
-- Satoshi Kataoka (satok@google.com, github.com/satok16)
+* Satoshi Kataoka (satok@google.com, github.com/satok16)
diff --git a/tensorflow/contrib/kafka/BUILD b/tensorflow/contrib/kafka/BUILD
new file mode 100644
index 0000000000..efb403462a
--- /dev/null
+++ b/tensorflow/contrib/kafka/BUILD
@@ -0,0 +1,105 @@
+package(
+ default_visibility = ["//visibility:private"],
+)
+
+licenses(["notice"]) # Apache 2.0
+
+exports_files(["LICENSE"])
+
+load("//tensorflow:tensorflow.bzl", "tf_gen_op_libs")
+load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_py")
+load("//tensorflow:tensorflow.bzl", "tf_kernel_library")
+load("//tensorflow:tensorflow.bzl", "tf_py_test")
+
+tf_kernel_library(
+ name = "kafka_kernels",
+ srcs = ["kernels/kafka_dataset_ops.cc"],
+ visibility = ["//visibility:public"],
+ deps = [
+ "//tensorflow/core:framework",
+ "//tensorflow/core:lib",
+ "//tensorflow/core:lib_internal",
+ "//tensorflow/core/kernels:bounds_check_lib",
+ "//tensorflow/core/kernels:dataset",
+ "//third_party/eigen3",
+ "@kafka",
+ ],
+)
+
+tf_gen_op_libs(
+ op_lib_names = ["kafka_ops"],
+ deps = [
+ "//tensorflow/core:lib",
+ ],
+)
+
+tf_gen_op_wrapper_py(
+ name = "gen_kafka_ops",
+ out = "python/ops/gen_kafka_ops.py",
+ require_shape_functions = True,
+ deps = [":kafka_ops_op_lib"],
+)
+
+py_library(
+ name = "kafka",
+ srcs = [
+ "__init__.py",
+ "python/ops/kafka_dataset_ops.py",
+ ],
+ srcs_version = "PY2AND3",
+ visibility = ["//visibility:public"],
+ deps = [
+ ":gen_kafka_ops",
+ "//tensorflow/contrib/util:util_py",
+ "//tensorflow/python:array_ops",
+ "//tensorflow/python:control_flow_ops",
+ "//tensorflow/python:framework",
+ "//tensorflow/python:framework_for_generated_wrappers",
+ "//tensorflow/python:platform",
+ "//tensorflow/python:state_ops",
+ "//tensorflow/python:training",
+ "//tensorflow/python/data/ops:dataset_ops",
+ "//tensorflow/python/data/ops:iterator_ops",
+ "//tensorflow/python/data/ops:readers",
+ ],
+)
+
+# The Kafka server has to be setup before running the test.
+# The Kafka server is setup through Docker so the Docker engine
+# has to be installed.
+#
+# Once the Docker engine is ready:
+# To setup the Kafka server:
+# $ bash tensorflow/contrib/kafka/python/kernel_tests/kafka_test.sh start kafka
+#
+# After the test is complete:
+# To team down the Kafka server:
+# $ bash tensorflow/contrib/kafka/python/kernel_tests/kafka_test.sh stop kafka
+tf_py_test(
+ name = "kafka_test",
+ srcs = ["python/kernel_tests/kafka_test.py"],
+ additional_deps = [
+ ":kafka",
+ "//third_party/py/numpy",
+ "//tensorflow/python:client_testlib",
+ "//tensorflow/python:framework",
+ "//tensorflow/python:framework_test_lib",
+ "//tensorflow/python:platform_test",
+ ],
+ tags = [
+ "manual",
+ "notap",
+ ],
+)
+
+filegroup(
+ name = "all_files",
+ srcs = glob(
+ ["**/*"],
+ exclude = [
+ "**/METADATA",
+ "**/OWNERS",
+ ],
+ ),
+ visibility = ["//tensorflow:__subpackages__"],
+)
diff --git a/tensorflow/contrib/kafka/__init__.py b/tensorflow/contrib/kafka/__init__.py
new file mode 100644
index 0000000000..4d755c4056
--- /dev/null
+++ b/tensorflow/contrib/kafka/__init__.py
@@ -0,0 +1,32 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Kafka Dataset.
+
+@@KafkaDataset
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.kafka.python.ops.kafka_dataset_ops import KafkaDataset
+
+from tensorflow.python.util.all_util import remove_undocumented
+
+_allowed_symbols = [
+ "KafkaDataset",
+]
+
+remove_undocumented(__name__)
diff --git a/tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc b/tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc
new file mode 100644
index 0000000000..88ef5f3571
--- /dev/null
+++ b/tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc
@@ -0,0 +1,321 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/kernels/dataset.h"
+
+#include "tensorflow/core/framework/tensor.h"
+
+#include "src-cpp/rdkafkacpp.h"
+
+namespace tensorflow {
+
+class KafkaDatasetOp : public DatasetOpKernel {
+ public:
+ using DatasetOpKernel::DatasetOpKernel;
+
+ void MakeDataset(OpKernelContext* ctx, DatasetBase** output) override {
+ const Tensor* topics_tensor;
+ OP_REQUIRES_OK(ctx, ctx->input("topics", &topics_tensor));
+ OP_REQUIRES(
+ ctx, topics_tensor->dims() <= 1,
+ errors::InvalidArgument("`topics` must be a scalar or a vector."));
+
+ std::vector<string> topics;
+ topics.reserve(topics_tensor->NumElements());
+ for (int i = 0; i < topics_tensor->NumElements(); ++i) {
+ topics.push_back(topics_tensor->flat<string>()(i));
+ }
+
+ std::string servers = "";
+ OP_REQUIRES_OK(ctx,
+ ParseScalarArgument<std::string>(ctx, "servers", &servers));
+ std::string group = "";
+ OP_REQUIRES_OK(ctx, ParseScalarArgument<std::string>(ctx, "group", &group));
+ bool eof = false;
+ OP_REQUIRES_OK(ctx, ParseScalarArgument<bool>(ctx, "eof", &eof));
+ int64 timeout = -1;
+ OP_REQUIRES_OK(ctx, ParseScalarArgument<int64>(ctx, "timeout", &timeout));
+ OP_REQUIRES(ctx, (timeout > 0),
+ errors::InvalidArgument(
+ "Timeout value should be large than 0, got ", timeout));
+ *output = new Dataset(ctx, std::move(topics), servers, group, eof, timeout);
+ }
+
+ private:
+ class Dataset : public GraphDatasetBase {
+ public:
+ Dataset(OpKernelContext* ctx, std::vector<string> topics,
+ const string& servers, const string& group, const bool eof,
+ const int64 timeout)
+ : GraphDatasetBase(ctx),
+ topics_(std::move(topics)),
+ servers_(servers),
+ group_(group),
+ eof_(eof),
+ timeout_(timeout) {}
+
+ std::unique_ptr<IteratorBase> MakeIterator(
+ const string& prefix) const override {
+ return std::unique_ptr<IteratorBase>(
+ new Iterator({this, strings::StrCat(prefix, "::Kafka")}));
+ }
+
+ const DataTypeVector& output_dtypes() const override {
+ static DataTypeVector* dtypes = new DataTypeVector({DT_STRING});
+ return *dtypes;
+ }
+
+ const std::vector<PartialTensorShape>& output_shapes() const override {
+ static std::vector<PartialTensorShape>* shapes =
+ new std::vector<PartialTensorShape>({{}});
+ return *shapes;
+ }
+
+ string DebugString() override { return "KafkaDatasetOp::Dataset"; }
+
+ protected:
+ Status AsGraphDefInternal(DatasetGraphDefBuilder* b,
+ Node** output) const override {
+ Node* topics = nullptr;
+ TF_RETURN_IF_ERROR(b->AddVector(topics_, &topics));
+ Node* servers = nullptr;
+ TF_RETURN_IF_ERROR(b->AddScalar(servers_, &servers));
+ Node* group = nullptr;
+ TF_RETURN_IF_ERROR(b->AddScalar(group_, &group));
+ Node* eof = nullptr;
+ TF_RETURN_IF_ERROR(b->AddScalar(eof_, &eof));
+ Node* timeout = nullptr;
+ TF_RETURN_IF_ERROR(b->AddScalar(timeout_, &timeout));
+ TF_RETURN_IF_ERROR(
+ b->AddDataset(this, {topics, servers, group, eof, timeout}, output));
+ return Status::OK();
+ }
+
+ private:
+ class Iterator : public DatasetIterator<Dataset> {
+ public:
+ explicit Iterator(const Params& params)
+ : DatasetIterator<Dataset>(params) {}
+
+ Status GetNextInternal(IteratorContext* ctx,
+ std::vector<Tensor>* out_tensors,
+ bool* end_of_sequence) override {
+ mutex_lock l(mu_);
+ do {
+ // We are currently processing a topic, so try to read the next line.
+ if (consumer_.get()) {
+ while (true) {
+ if (limit_ >= 0 &&
+ (topic_partition_->offset() >= limit_ || offset_ >= limit_)) {
+ // EOF current topic
+ break;
+ }
+ std::unique_ptr<RdKafka::Message> message(
+ consumer_->consume(dataset()->timeout_));
+ if (message->err() == RdKafka::ERR_NO_ERROR) {
+ // Produce the line as output.
+ Tensor line_tensor(cpu_allocator(), DT_STRING, {});
+ line_tensor.scalar<string>()() =
+ std::string(static_cast<const char*>(message->payload()),
+ message->len());
+ out_tensors->emplace_back(std::move(line_tensor));
+ *end_of_sequence = false;
+ // Sync offset
+ offset_ = message->offset();
+ return Status::OK();
+ }
+
+ if (message->err() == RdKafka::ERR__PARTITION_EOF &&
+ dataset()->eof_) {
+ // EOF current topic
+ break;
+ }
+ if (message->err() != RdKafka::ERR__TIMED_OUT) {
+ return errors::Internal("Failed to consume:",
+ message->errstr());
+ }
+ message.reset(nullptr);
+ consumer_->poll(0);
+ }
+
+ // We have reached the end of the current topic, so maybe
+ // move on to next topic.
+ ResetStreamsLocked();
+ ++current_topic_index_;
+ }
+
+ // Iteration ends when there are no more topic to process.
+ if (current_topic_index_ == dataset()->topics_.size()) {
+ *end_of_sequence = true;
+ return Status::OK();
+ }
+
+ TF_RETURN_IF_ERROR(SetupStreamsLocked(ctx->env()));
+ } while (true);
+ }
+
+ protected:
+ Status SaveInternal(IteratorStateWriter* writer) override {
+ mutex_lock l(mu_);
+ TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("current_topic_index"),
+ current_topic_index_));
+
+ // `consumer_` is empty if
+ // 1. GetNext has not been called even once.
+ // 2. All topics have been read and iterator has been exhausted.
+ if (consumer_.get()) {
+ TF_RETURN_IF_ERROR(
+ writer->WriteScalar(full_name("current_pos"), offset_));
+ }
+ return Status::OK();
+ }
+
+ Status RestoreInternal(IteratorContext* ctx,
+ IteratorStateReader* reader) override {
+ mutex_lock l(mu_);
+ ResetStreamsLocked();
+ int64 current_topic_index;
+ TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("current_topic_index"),
+ &current_topic_index));
+ current_topic_index_ = size_t(current_topic_index);
+ // The key "current_pos" is written only if the iterator was saved
+ // with an open topic.
+ if (reader->Contains(full_name("current_pos"))) {
+ int64 current_pos;
+ TF_RETURN_IF_ERROR(
+ reader->ReadScalar(full_name("current_pos"), &current_pos));
+
+ TF_RETURN_IF_ERROR(SetupStreamsLocked(ctx->env()));
+ topic_partition_->set_offset(current_pos);
+ if (topic_partition_->offset() != current_pos) {
+ return errors::Internal("Failed to restore to offset ",
+ current_pos);
+ }
+ offset_ = current_pos;
+ }
+ return Status::OK();
+ }
+
+ private:
+ // Sets up Kafka streams to read from the topic at
+ // `current_topic_index_`.
+ Status SetupStreamsLocked(Env* env) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+ if (current_topic_index_ >= dataset()->topics_.size()) {
+ return errors::InvalidArgument(
+ "current_topic_index_:", current_topic_index_,
+ " >= topics_.size():", dataset()->topics_.size());
+ }
+
+ // Actually move on to next topic.
+ string entry = dataset()->topics_[current_topic_index_];
+
+ std::vector<string> parts = str_util::Split(entry, ":");
+ if (parts.size() < 1) {
+ return errors::InvalidArgument("Invalid parameters: ", entry);
+ }
+ string topic = parts[0];
+ int32 partition = 0;
+ if (parts.size() > 1) {
+ if (!strings::safe_strto32(parts[1], &partition)) {
+ return errors::InvalidArgument("Invalid parameters: ", entry);
+ }
+ }
+ int64 offset = 0;
+ if (parts.size() > 2) {
+ if (!strings::safe_strto64(parts[2], &offset)) {
+ return errors::InvalidArgument("Invalid parameters: ", entry);
+ }
+ }
+
+ topic_partition_.reset(
+ RdKafka::TopicPartition::create(topic, partition, offset));
+
+ offset_ = topic_partition_->offset();
+ limit_ = -1;
+ if (parts.size() > 3) {
+ if (!strings::safe_strto64(parts[3], &limit_)) {
+ return errors::InvalidArgument("Invalid parameters: ", entry);
+ }
+ }
+
+ std::unique_ptr<RdKafka::Conf> conf(
+ RdKafka::Conf::create(RdKafka::Conf::CONF_GLOBAL));
+ std::unique_ptr<RdKafka::Conf> topic_conf(
+ RdKafka::Conf::create(RdKafka::Conf::CONF_TOPIC));
+
+ std::string errstr;
+
+ RdKafka::Conf::ConfResult result =
+ conf->set("default_topic_conf", topic_conf.get(), errstr);
+ if (result != RdKafka::Conf::CONF_OK) {
+ return errors::Internal("Failed to set default_topic_conf:", errstr);
+ }
+
+ result = conf->set("bootstrap.servers", dataset()->servers_, errstr);
+ if (result != RdKafka::Conf::CONF_OK) {
+ return errors::Internal("Failed to set bootstrap.servers ",
+ dataset()->servers_, ":", errstr);
+ }
+ result = conf->set("group.id", dataset()->group_, errstr);
+ if (result != RdKafka::Conf::CONF_OK) {
+ return errors::Internal("Failed to set group.id ", dataset()->group_,
+ ":", errstr);
+ }
+
+ consumer_.reset(RdKafka::KafkaConsumer::create(conf.get(), errstr));
+ if (!consumer_.get()) {
+ return errors::Internal("Failed to create consumer:", errstr);
+ }
+
+ std::vector<RdKafka::TopicPartition*> partitions;
+ partitions.emplace_back(topic_partition_.get());
+ RdKafka::ErrorCode err = consumer_->assign(partitions);
+ if (err != RdKafka::ERR_NO_ERROR) {
+ return errors::Internal(
+ "Failed to assign partition [", topic_partition_->topic(), ", ",
+ topic_partition_->partition(), ", ", topic_partition_->offset(),
+ "]:", RdKafka::err2str(err));
+ }
+
+ return Status::OK();
+ }
+
+ // Resets all Kafka streams.
+ void ResetStreamsLocked() EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+ consumer_->unassign();
+ consumer_->close();
+ consumer_.reset(nullptr);
+ }
+
+ mutex mu_;
+ size_t current_topic_index_ GUARDED_BY(mu_) = 0;
+ int64 offset_ GUARDED_BY(mu_) = 0;
+ int64 limit_ GUARDED_BY(mu_) = -1;
+ std::unique_ptr<RdKafka::TopicPartition> topic_partition_ GUARDED_BY(mu_);
+ std::unique_ptr<RdKafka::KafkaConsumer> consumer_ GUARDED_BY(mu_);
+ };
+
+ const std::vector<string> topics_;
+ const std::string servers_;
+ const std::string group_;
+ const bool eof_;
+ const int64 timeout_;
+ };
+};
+
+REGISTER_KERNEL_BUILDER(Name("KafkaDataset").Device(DEVICE_CPU),
+ KafkaDatasetOp);
+
+} // namespace tensorflow
diff --git a/tensorflow/contrib/kafka/ops/kafka_ops.cc b/tensorflow/contrib/kafka/ops/kafka_ops.cc
new file mode 100644
index 0000000000..8cdf16103b
--- /dev/null
+++ b/tensorflow/contrib/kafka/ops/kafka_ops.cc
@@ -0,0 +1,44 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+namespace tensorflow {
+
+REGISTER_OP("KafkaDataset")
+ .Input("topics: string")
+ .Input("servers: string")
+ .Input("group: string")
+ .Input("eof: bool")
+ .Input("timeout: int64")
+ .Output("handle: variant")
+ .SetIsStateful()
+ .SetShapeFn(shape_inference::ScalarShape)
+ .Doc(R"doc(
+Creates a dataset that emits the messages of one or more Kafka topics.
+
+topics: A `tf.string` tensor containing one or more subscriptions,
+ in the format of [topic:partition:offset:length],
+ by default length is -1 for unlimited.
+servers: A list of bootstrap servers.
+group: The consumer group id.
+eof: If True, the kafka reader will stop on EOF.
+timeout: The timeout value for the Kafka Consumer to wait
+ (in millisecond).
+)doc");
+
+} // namespace tensorflow
diff --git a/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.py b/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.py
new file mode 100644
index 0000000000..621911876f
--- /dev/null
+++ b/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.py
@@ -0,0 +1,115 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+# ==============================================================================
+"""Tests for KafkaDataset."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.kafka.python.ops import kafka_dataset_ops
+from tensorflow.python.data.ops import iterator_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.ops import array_ops
+from tensorflow.python.platform import test
+
+
+class KafkaDatasetTest(test.TestCase):
+
+ def setUp(self):
+ # The Kafka server has to be setup before the test
+ # and tear down after the test manually.
+ # The docker engine has to be installed.
+ #
+ # To setup the Kafka server:
+ # $ bash kafka_test.sh start kafka
+ #
+ # To team down the Kafka server:
+ # $ bash kafka_test.sh stop kafka
+ pass
+
+ def testKafkaDataset(self):
+ topics = array_ops.placeholder(dtypes.string, shape=[None])
+ num_epochs = array_ops.placeholder(dtypes.int64, shape=[])
+ batch_size = array_ops.placeholder(dtypes.int64, shape=[])
+
+ repeat_dataset = kafka_dataset_ops.KafkaDataset(
+ topics, group="test", eof=True).repeat(num_epochs)
+ batch_dataset = repeat_dataset.batch(batch_size)
+
+ iterator = iterator_ops.Iterator.from_structure(batch_dataset.output_types)
+ init_op = iterator.make_initializer(repeat_dataset)
+ init_batch_op = iterator.make_initializer(batch_dataset)
+ get_next = iterator.get_next()
+
+ with self.test_session() as sess:
+ # Basic test: read from topic 0.
+ sess.run(init_op, feed_dict={topics: ["test:0:0:4"], num_epochs: 1})
+ for i in range(5):
+ self.assertEqual("D" + str(i), sess.run(get_next))
+ with self.assertRaises(errors.OutOfRangeError):
+ sess.run(get_next)
+
+ # Basic test: read from topic 1.
+ sess.run(init_op, feed_dict={topics: ["test:0:5:-1"], num_epochs: 1})
+ for i in range(5):
+ self.assertEqual("D" + str(i + 5), sess.run(get_next))
+ with self.assertRaises(errors.OutOfRangeError):
+ sess.run(get_next)
+
+ # Basic test: read from both topics.
+ sess.run(
+ init_op,
+ feed_dict={
+ topics: ["test:0:0:4", "test:0:5:-1"],
+ num_epochs: 1
+ })
+ for j in range(2):
+ for i in range(5):
+ self.assertEqual("D" + str(i + j * 5), sess.run(get_next))
+ with self.assertRaises(errors.OutOfRangeError):
+ sess.run(get_next)
+
+ # Test repeated iteration through both files.
+ sess.run(
+ init_op,
+ feed_dict={
+ topics: ["test:0:0:4", "test:0:5:-1"],
+ num_epochs: 10
+ })
+ for _ in range(10):
+ for j in range(2):
+ for i in range(5):
+ self.assertEqual("D" + str(i + j * 5), sess.run(get_next))
+ with self.assertRaises(errors.OutOfRangeError):
+ sess.run(get_next)
+
+ # Test batched and repeated iteration through both files.
+ sess.run(
+ init_batch_op,
+ feed_dict={
+ topics: ["test:0:0:4", "test:0:5:-1"],
+ num_epochs: 10,
+ batch_size: 5
+ })
+ for _ in range(10):
+ self.assertAllEqual(["D" + str(i) for i in range(5)],
+ sess.run(get_next))
+ self.assertAllEqual(["D" + str(i + 5) for i in range(5)],
+ sess.run(get_next))
+
+
+if __name__ == "__main__":
+ test.main()
diff --git a/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.sh b/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.sh
new file mode 100644
index 0000000000..adf027b8e7
--- /dev/null
+++ b/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+set -e
+set -o pipefail
+
+if [ "$#" -ne 2 ]; then
+ echo "Usage: $0 start|stop <kafka container name>" >&2
+ exit 1
+fi
+
+container=$2
+if [ "$1" == "start" ]; then
+ docker run -d --rm --net=host --name=$container spotify/kafka
+ echo Wait 5 secs until kafka is up and running
+ sleep 5
+ echo Create test topic
+ docker exec $container bash -c '/opt/kafka_2.11-0.10.1.0/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic test'
+ echo Create test message
+ docker exec $container bash -c 'echo -e "D0\nD1\nD2\nD3\nD4\nD5\nD6\nD7\nD8\nD9" > /test'
+ echo Produce test message
+ docker exec $container bash -c '/opt/kafka_2.11-0.10.1.0/bin/kafka-console-producer.sh --topic test --broker-list 127.0.0.1:9092 < /test'
+
+ echo Container $container started successfully
+elif [ "$1" == "stop" ]; then
+ docker rm -f $container
+
+ echo Container $container stopped successfully
+else
+ echo "Usage: $0 start|stop <kafka container name>" >&2
+ exit 1
+fi
+
+
+
diff --git a/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py b/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py
new file mode 100644
index 0000000000..8e51d27a34
--- /dev/null
+++ b/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py
@@ -0,0 +1,74 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Kafka Dataset."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.kafka.python.ops import gen_kafka_ops
+from tensorflow.python.data.ops.readers import Dataset
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+
+
+class KafkaDataset(Dataset):
+ """A Kafka Dataset that consumes the message.
+ """
+
+ def __init__(self,
+ topics,
+ servers="localhost",
+ group="",
+ eof=False,
+ timeout=1000):
+ """Create a KafkaReader.
+
+ Args:
+ topics: A `tf.string` tensor containing one or more subscriptions,
+ in the format of [topic:partition:offset:length],
+ by default length is -1 for unlimited.
+ servers: A list of bootstrap servers.
+ group: The consumer group id.
+ eof: If True, the kafka reader will stop on EOF.
+ timeout: The timeout value for the Kafka Consumer to wait
+ (in millisecond).
+ """
+ super(KafkaDataset, self).__init__()
+ self._topics = ops.convert_to_tensor(
+ topics, dtype=dtypes.string, name="topics")
+ self._servers = ops.convert_to_tensor(
+ servers, dtype=dtypes.string, name="servers")
+ self._group = ops.convert_to_tensor(
+ group, dtype=dtypes.string, name="group")
+ self._eof = ops.convert_to_tensor(eof, dtype=dtypes.bool, name="eof")
+ self._timeout = ops.convert_to_tensor(
+ timeout, dtype=dtypes.int64, name="timeout")
+
+ def _as_variant_tensor(self):
+ return gen_kafka_ops.kafka_dataset(self._topics, self._servers, self._group,
+ self._eof, self._timeout)
+
+ @property
+ def output_classes(self):
+ return ops.Tensor
+
+ @property
+ def output_shapes(self):
+ return tensor_shape.scalar()
+
+ @property
+ def output_types(self):
+ return dtypes.string
diff --git a/tensorflow/contrib/layers/__init__.py b/tensorflow/contrib/layers/__init__.py
index 6c624929f2..ef419862b4 100644
--- a/tensorflow/contrib/layers/__init__.py
+++ b/tensorflow/contrib/layers/__init__.py
@@ -27,6 +27,7 @@ See the @{$python/contrib.layers} guide.
@@convolution2d_transpose
@@conv3d_transpose
@@convolution3d_transpose
+@@dense_to_sparse
@@dropout
@@elu
@@embedding_lookup_unique
diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py
index 7c52da7b49..1c3af19a6c 100644
--- a/tensorflow/contrib/layers/python/layers/layers.py
+++ b/tensorflow/contrib/layers/python/layers/layers.py
@@ -29,6 +29,7 @@ from tensorflow.contrib.framework.python.ops import variables
from tensorflow.contrib.layers.python.layers import initializers
from tensorflow.contrib.layers.python.layers import utils
from tensorflow.python.eager import context
+from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import function
from tensorflow.python.framework import ops
@@ -58,12 +59,12 @@ __all__ = [
'avg_pool2d', 'avg_pool3d', 'batch_norm', 'bias_add', 'conv2d', 'conv3d',
'conv2d_in_plane', 'conv2d_transpose', 'conv3d_transpose', 'convolution',
'convolution2d', 'convolution2d_in_plane', 'convolution2d_transpose',
- 'convolution3d', 'convolution3d_transpose', 'dropout', 'elu', 'flatten',
- 'fully_connected', 'GDN', 'gdn', 'layer_norm', 'linear', 'pool',
- 'max_pool2d', 'max_pool3d', 'one_hot_encoding', 'relu', 'relu6', 'repeat',
- 'scale_gradient', 'separable_conv2d', 'separable_convolution2d', 'softmax',
- 'spatial_softmax', 'stack', 'unit_norm', 'legacy_fully_connected',
- 'legacy_linear', 'legacy_relu', 'maxout'
+ 'convolution3d', 'convolution3d_transpose', 'dense_to_sparse', 'dropout',
+ 'elu', 'flatten', 'fully_connected', 'GDN', 'gdn', 'layer_norm', 'linear',
+ 'pool', 'max_pool2d', 'max_pool3d', 'one_hot_encoding', 'relu', 'relu6',
+ 'repeat', 'scale_gradient', 'separable_conv2d', 'separable_convolution2d',
+ 'softmax', 'spatial_softmax', 'stack', 'unit_norm',
+ 'legacy_fully_connected', 'legacy_linear', 'legacy_relu', 'maxout'
]
DATA_FORMAT_NCHW = 'NCHW'
@@ -1401,6 +1402,30 @@ def convolution3d_transpose(
@add_arg_scope
+def dense_to_sparse(tensor, eos_token=0, outputs_collections=None, scope=None):
+ """Converts a dense tensor into a sparse tensor.
+ An example use would be to convert dense labels to sparse ones
+ so that they can be fed to the ctc_loss.
+
+ Args:
+ tensor: An `int` `Tensor` to be converted to a `Sparse`.
+ eos_token: An integer.
+ It is part of the target label that signfies the end of a sentence.
+ outputs_collections: Collection to add the outputs.
+ scope: Optional scope for name_scope.
+ """
+ with variable_scope.variable_scope(scope, 'dense_to_sparse', [tensor]) as sc:
+ tensor = ops.convert_to_tensor(tensor)
+ indices = array_ops.where(
+ math_ops.not_equal(tensor, constant_op.constant(eos_token,
+ tensor.dtype)))
+ values = array_ops.gather_nd(tensor, indices)
+ shape = array_ops.shape(tensor, out_type=dtypes.int64)
+ outputs = sparse_tensor.SparseTensor(indices, values, shape)
+ return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
+
+
+@add_arg_scope
def dropout(inputs,
keep_prob=0.5,
noise_shape=None,
diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py
index 49b23ce8fa..972ff10bf9 100644
--- a/tensorflow/contrib/layers/python/layers/layers_test.py
+++ b/tensorflow/contrib/layers/python/layers/layers_test.py
@@ -44,6 +44,7 @@ from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import partitioned_variables
from tensorflow.python.ops import random_ops
+from tensorflow.python.ops import sparse_ops
from tensorflow.python.ops import state_ops
from tensorflow.python.ops import template
from tensorflow.python.ops import variable_scope
@@ -1301,6 +1302,19 @@ class ConvolutionInPlaneTest(test.TestCase):
self.assertAllClose(result, expected, rtol=1e-5, atol=1e-5)
+class DenseToSparseTest(test.TestCase):
+
+ def testDenseFromConstantToSparse(self):
+ expected_constant = np.reshape(np.arange(24, dtype=np.int64), (3, 4, 2))
+ tensor = constant_op.constant(expected_constant)
+ sparse = _layers.dense_to_sparse(tensor)
+ dense = sparse_ops.sparse_to_dense(sparse.indices, sparse.dense_shape,
+ sparse.values)
+ with self.test_session() as sess:
+ constant = sess.run(dense)
+ self.assertAllEqual(expected_constant, constant)
+
+
class DropoutTest(test.TestCase):
def testCreateDropout(self):
diff --git a/tensorflow/contrib/learn/python/learn/datasets/synthetic.py b/tensorflow/contrib/learn/python/learn/datasets/synthetic.py
index 649996c49c..9a843168c2 100644
--- a/tensorflow/contrib/learn/python/learn/datasets/synthetic.py
+++ b/tensorflow/contrib/learn/python/learn/datasets/synthetic.py
@@ -151,7 +151,7 @@ def spirals(n_samples=100,
# Add more points if n_samples is not divisible by n_classes (unbalanced!)
extras = n_samples % n_classes
if extras > 0:
- x_exrta, y_extra = _modes[mode](np.random.rand(extras) * 2 * np.pi, *args,
+ x_extra, y_extra = _modes[mode](np.random.rand(extras) * 2 * np.pi, *args,
**kwargs)
spir_x = np.append(spir_x, x_extra)
spir_y = np.append(spir_y, y_extra)
diff --git a/tensorflow/contrib/learn/python/learn/datasets/synthetic_test.py b/tensorflow/contrib/learn/python/learn/datasets/synthetic_test.py
index 613d8d39a3..5809995c8c 100644
--- a/tensorflow/contrib/learn/python/learn/datasets/synthetic_test.py
+++ b/tensorflow/contrib/learn/python/learn/datasets/synthetic_test.py
@@ -136,6 +136,9 @@ class SyntheticTest(test.TestCase):
self.assertRaises(AssertionError, np.testing.assert_array_equal,
spir0.data, spir1.data)
+ def test_spirals_synthetic(self):
+ synthetic.spirals(3)
+
if __name__ == '__main__':
test.main()
diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn_test.py b/tensorflow/contrib/learn/python/learn/estimators/dnn_test.py
index 12f9bba531..2bd57597c2 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/dnn_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dnn_test.py
@@ -1224,7 +1224,7 @@ class DNNRegressorTest(test.TestCase):
self, predictions, expected_shape):
predictions_nparray = np.array(predictions)
self.assertAllEqual(expected_shape, predictions_nparray.shape)
- self.assertTrue(np.issubdtype(predictions_nparray.dtype, np.float))
+ self.assertTrue(np.issubdtype(predictions_nparray.dtype, np.floating))
def testPredict_AsIterableFalse(self):
"""Tests predict method with as_iterable=False."""
diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl
index 0a097d5a69..19829e4991 100644
--- a/tensorflow/contrib/lite/build_def.bzl
+++ b/tensorflow/contrib/lite/build_def.bzl
@@ -5,25 +5,25 @@ def tflite_copts():
copts = [
"-DFARMHASH_NO_CXX_STRING",
] + select({
- "//tensorflow:android_arm64": [
+ str(Label("//tensorflow:android_arm64")): [
"-std=c++11",
"-O3",
],
- "//tensorflow:android_arm": [
+ str(Label("//tensorflow:android_arm")): [
"-mfpu=neon",
"-mfloat-abi=softfp",
"-std=c++11",
"-O3",
],
- "//tensorflow:android_x86": [
+ str(Label("//tensorflow:android_x86")): [
"-DGEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK",
],
- "//tensorflow:ios_x86_64": [
+ str(Label("//tensorflow:ios_x86_64")): [
"-msse4.1",
],
"//conditions:default": [],
}) + select({
- "//tensorflow:with_default_optimizations": [],
+ str(Label("//tensorflow:with_default_optimizations")): [],
"//conditions:default": ["-DGEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK"],
})
diff --git a/tensorflow/contrib/lite/examples/label_image/BUILD b/tensorflow/contrib/lite/examples/label_image/BUILD
index 476d85c031..959347b549 100644
--- a/tensorflow/contrib/lite/examples/label_image/BUILD
+++ b/tensorflow/contrib/lite/examples/label_image/BUILD
@@ -42,7 +42,15 @@ cc_library(
"bitmap_helpers_impl.h",
"label_image.h",
],
- deps = ["//tensorflow/contrib/lite:string"],
+ deps = [
+ "//tensorflow/contrib/lite:builtin_op_data",
+ "//tensorflow/contrib/lite:framework",
+ "//tensorflow/contrib/lite:schema_fbs_version",
+ "//tensorflow/contrib/lite:string",
+ "//tensorflow/contrib/lite:string_util",
+ "//tensorflow/contrib/lite/kernels:builtin_ops",
+ "//tensorflow/contrib/lite/schema:schema_fbs",
+ ],
)
# TODO(ahentz): Test disabled as it has a memory leek from read_bmp
diff --git a/tensorflow/contrib/lite/examples/label_image/bitmap_helpers.h b/tensorflow/contrib/lite/examples/label_image/bitmap_helpers.h
index 860e27e5ba..97343dde6b 100644
--- a/tensorflow/contrib/lite/examples/label_image/bitmap_helpers.h
+++ b/tensorflow/contrib/lite/examples/label_image/bitmap_helpers.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
-#ifndef TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_BITMAP_HELPERS_H
-#define TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_BITMAP_HELPERS_H
+#ifndef TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_BITMAP_HELPERS_H_
+#define TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_BITMAP_HELPERS_H_
#include "tensorflow/contrib/lite/examples/label_image/bitmap_helpers_impl.h"
#include "tensorflow/contrib/lite/examples/label_image/label_image.h"
@@ -26,15 +26,15 @@ uint8_t* read_bmp(const std::string& input_bmp_name, int* width, int* height,
int* channels, Settings* s);
template <class T>
-void downsize(T* out, uint8_t* in, int image_height, int image_width,
- int image_channels, int wanted_height, int wanted_width,
- int wanted_channels, Settings* s);
+void resize(T* out, uint8_t* in, int image_height, int image_width,
+ int image_channels, int wanted_height, int wanted_width,
+ int wanted_channels, Settings* s);
// explicit instantiation
-template void downsize<uint8_t>(uint8_t*, unsigned char*, int, int, int, int,
- int, int, Settings*);
-template void downsize<float>(float*, unsigned char*, int, int, int, int, int,
+template void resize<uint8_t>(uint8_t*, unsigned char*, int, int, int, int, int,
int, Settings*);
+template void resize<float>(float*, unsigned char*, int, int, int, int, int,
+ int, Settings*);
} // namespace label_image
} // namespace tflite
diff --git a/tensorflow/contrib/lite/examples/label_image/bitmap_helpers_impl.h b/tensorflow/contrib/lite/examples/label_image/bitmap_helpers_impl.h
index 64a931082b..d57f597875 100644
--- a/tensorflow/contrib/lite/examples/label_image/bitmap_helpers_impl.h
+++ b/tensorflow/contrib/lite/examples/label_image/bitmap_helpers_impl.h
@@ -13,8 +13,14 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
-#ifndef TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_BITMAP_HELPERS_IMPL_H
-#define TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_BITMAP_HELPERS_IMPL_H
+#ifndef TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_BITMAP_HELPERS_IMPL_H_
+#define TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_BITMAP_HELPERS_IMPL_H_
+
+#include "tensorflow/contrib/lite/builtin_op_data.h"
+#include "tensorflow/contrib/lite/interpreter.h"
+#include "tensorflow/contrib/lite/kernels/register.h"
+#include "tensorflow/contrib/lite/string_util.h"
+#include "tensorflow/contrib/lite/version.h"
#include "tensorflow/contrib/lite/examples/label_image/label_image.h"
@@ -22,28 +28,67 @@ namespace tflite {
namespace label_image {
template <class T>
-void downsize(T* out, uint8_t* in, int image_height, int image_width,
- int image_channels, int wanted_height, int wanted_width,
- int wanted_channels, Settings* s) {
- for (int y = 0; y < wanted_height; ++y) {
- const int in_y = (y * image_height) / wanted_height;
- uint8_t* in_row = in + (in_y * image_width * image_channels);
- T* out_row = out + (y * wanted_width * wanted_channels);
- for (int x = 0; x < wanted_width; ++x) {
- const int in_x = (x * image_width) / wanted_width;
- uint8_t* in_pixel = in_row + (in_x * image_channels);
- T* out_pixel = out_row + (x * wanted_channels);
- for (int c = 0; c < wanted_channels; ++c) {
- if (s->input_floating)
- out_pixel[c] = (in_pixel[c] - s->input_mean) / s->input_std;
- else
- out_pixel[c] = in_pixel[c];
- }
- }
+void resize(T* out, uint8_t* in, int image_height, int image_width,
+ int image_channels, int wanted_height, int wanted_width,
+ int wanted_channels, Settings* s) {
+ int number_of_pixels = image_height * image_width * image_channels;
+ std::unique_ptr<Interpreter> interpreter(new Interpreter);
+
+ int base_index = 0;
+
+ // two inputs: input and new_sizes
+ interpreter->AddTensors(2, &base_index);
+ // one output
+ interpreter->AddTensors(1, &base_index);
+ // set input and output tensors
+ interpreter->SetInputs({0, 1});
+ interpreter->SetOutputs({2});
+
+ // set parameters of tensors
+ TfLiteQuantizationParams quant;
+ interpreter->SetTensorParametersReadWrite(
+ 0, kTfLiteFloat32, "input",
+ {1, image_height, image_width, image_channels}, quant);
+ interpreter->SetTensorParametersReadWrite(1, kTfLiteInt32, "new_size", {2},
+ quant);
+ interpreter->SetTensorParametersReadWrite(
+ 2, kTfLiteFloat32, "output",
+ {1, wanted_height, wanted_width, wanted_channels}, quant);
+
+ ops::builtin::BuiltinOpResolver resolver;
+ TfLiteRegistration* resize_op =
+ resolver.FindOp(BuiltinOperator_RESIZE_BILINEAR);
+ interpreter->AddNodeWithParameters({0, 1}, {2}, nullptr, 0, nullptr,
+ resize_op, nullptr);
+
+ interpreter->AllocateTensors();
+
+ // fill input image
+ // in[] are integers, cannot do memcpy() directly
+ auto input = interpreter->typed_tensor<float>(0);
+ for (int i = 0; i < number_of_pixels; i++) {
+ input[i] = in[i];
+ }
+
+ // fill new_sizes
+ interpreter->typed_tensor<int>(1)[0] = wanted_height;
+ interpreter->typed_tensor<int>(1)[1] = wanted_width;
+
+ interpreter->Invoke();
+
+ auto output = interpreter->typed_tensor<float>(2);
+ auto output_number_of_pixels =
+ wanted_height * wanted_height * wanted_channels;
+
+ for (int i = 0; i < output_number_of_pixels; i++) {
+ if (s->input_floating)
+ out[i] = (output[i] - s->input_mean) / s->input_std;
+ else
+ out[i] = (uint8_t)output[i];
}
}
} // namespace label_image
} // namespace tflite
-#endif // TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_BITMAP_HELPERS_IMPL_H
+#endif // TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_BITMAP_HELPERS_IMPL_H_
diff --git a/tensorflow/contrib/lite/examples/label_image/label_image.cc b/tensorflow/contrib/lite/examples/label_image/label_image.cc
index 4d2e1ce0bc..a91467d345 100644
--- a/tensorflow/contrib/lite/examples/label_image/label_image.cc
+++ b/tensorflow/contrib/lite/examples/label_image/label_image.cc
@@ -148,14 +148,22 @@ void RunInference(Settings* s) {
int wanted_width = dims->data[2];
int wanted_channels = dims->data[3];
- if (s->input_floating) {
- downsize<float>(interpreter->typed_tensor<float>(input), in, image_height,
+ switch (interpreter->tensor(input)->type) {
+ case kTfLiteFloat32:
+ s->input_floating = true;
+ resize<float>(interpreter->typed_tensor<float>(input), in, image_height,
image_width, image_channels, wanted_height, wanted_width,
wanted_channels, s);
- } else {
- downsize<uint8_t>(interpreter->typed_tensor<uint8_t>(input), in,
+ break;
+ case kTfLiteUInt8:
+ resize<uint8_t>(interpreter->typed_tensor<uint8_t>(input), in,
image_height, image_width, image_channels, wanted_height,
wanted_width, wanted_channels, s);
+ break;
+ default:
+ LOG(FATAL) << "cannot handle input type "
+ << interpreter->tensor(input)->type << " yet";
+ exit(-1);
}
struct timeval start_time, stop_time;
@@ -177,13 +185,21 @@ void RunInference(Settings* s) {
std::vector<std::pair<float, int>> top_results;
- if (s->input_floating) {
- get_top_n<float>(interpreter->typed_output_tensor<float>(0), output_size,
- num_results, threshold, &top_results, s->input_floating);
- } else {
- get_top_n<uint8_t>(interpreter->typed_output_tensor<uint8_t>(0),
- output_size, num_results, threshold, &top_results,
- s->input_floating);
+ int output = interpreter->outputs()[0];
+ switch (interpreter->tensor(output)->type) {
+ case kTfLiteFloat32:
+ get_top_n<float>(interpreter->typed_output_tensor<float>(0), output_size,
+ num_results, threshold, &top_results, true);
+ break;
+ case kTfLiteUInt8:
+ get_top_n<uint8_t>(interpreter->typed_output_tensor<uint8_t>(0),
+ output_size, num_results, threshold, &top_results,
+ false);
+ break;
+ default:
+ LOG(FATAL) << "cannot handle output type "
+ << interpreter->tensor(input)->type << " yet";
+ exit(-1);
}
std::vector<string> labels;
@@ -203,13 +219,11 @@ void display_usage() {
LOG(INFO) << "label_image\n"
<< "--accelerated, -a: [0|1], use Android NNAPI or note\n"
<< "--count, -c: loop interpreter->Invoke() for certain times\n"
- << "--input_floating, -f: [0|1] type of input layer is floating "
- "point numbers\n"
<< "--input_mean, -b: input mean\n"
<< "--input_std, -s: input standard deviation\n"
<< "--image, -i: image_name.bmp\n"
<< "--labels, -l: labels for the model\n"
- << "--tflite_mode, -m: model_name.tflite\n"
+ << "--tflite_model, -m: model_name.tflite\n"
<< "--threads, -t: number of threads\n"
<< "--verbose, -v: [0|1] print more information\n"
<< "\n";
@@ -223,7 +237,6 @@ int Main(int argc, char** argv) {
static struct option long_options[] = {
{"accelerated", required_argument, 0, 'a'},
{"count", required_argument, 0, 'c'},
- {"input_floating", required_argument, 0, 'f'},
{"verbose", required_argument, 0, 'v'},
{"image", required_argument, 0, 'i'},
{"labels", required_argument, 0, 'l'},
@@ -254,11 +267,6 @@ int Main(int argc, char** argv) {
s.loop_count = strtol( // NOLINT(runtime/deprecated_fn)
optarg, (char**)NULL, 10);
break;
- case 'f':
- s.input_floating = strtol( // NOLINT(runtime/deprecated_fn)
- optarg, (char**)NULL, 10);
- s.input_layer_type = "float";
- break;
case 'i':
s.input_bmp_name = optarg;
break;
diff --git a/tensorflow/contrib/lite/examples/label_image/label_image.h b/tensorflow/contrib/lite/examples/label_image/label_image.h
index ce98e06fc1..4de32e33fb 100644
--- a/tensorflow/contrib/lite/examples/label_image/label_image.h
+++ b/tensorflow/contrib/lite/examples/label_image/label_image.h
@@ -16,9 +16,11 @@ limitations under the License.
#ifndef TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_LABEL_IMAGE_H
#define TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_LABEL_IMAGE_H
-#include <string>
#include "tensorflow/contrib/lite/string.h"
+namespace tflite {
+namespace label_image {
+
struct Settings {
bool verbose = false;
bool accel = false;
@@ -33,4 +35,7 @@ struct Settings {
int number_of_threads = 4;
};
+} // namespace label_image
+} // namespace tflite
+
#endif // TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_LABEL_IMAGE_H
diff --git a/tensorflow/contrib/lite/examples/label_image/label_image.md b/tensorflow/contrib/lite/examples/label_image/label_image.md
index d6019d673f..9ce32cf101 100644
--- a/tensorflow/contrib/lite/examples/label_image/label_image.md
+++ b/tensorflow/contrib/lite/examples/label_image/label_image.md
@@ -1,8 +1,12 @@
label_image for TensorFlow Lite inspired by TensorFlow's label_image.
+
+To build label_image for Android, run $TENSORFLOW_ROOT/configure
+and set Android NDK or configure NDK setting in
+$TENSORFLOW_ROOT/WORKSPACE first.
To build it for android ARMv8:
```
-> bazel build --cxxopt=-std=c++11 \
+> bazel build --config monolithic --cxxopt=-std=c++11 \
--crosstool_top=//external:android/crosstool \
--host_crosstool_top=@bazel_tools//tools/cpp:toolchain \
--cpu=arm64-v8a \
@@ -10,13 +14,13 @@ To build it for android ARMv8:
```
or
```
-> bazel build --config android_arm64 --cxxopt=-std=c++11 \
+> bazel build --config android_arm64 --config monolithic --cxxopt=-std=c++11 \
//tensorflow/contrib/lite/examples/label_image:label_image
```
To build it for android arm-v7a:
```
-> bazel build --cxxopt=-std=c++11 \
+> bazel build --config monolithic --cxxopt=-std=c++11 \
--crosstool_top=//external:android/crosstool \
--host_crosstool_top=@bazel_tools//tools/cpp:toolchain \
--cpu=armeabi-v7a \
@@ -24,7 +28,7 @@ To build it for android arm-v7a:
```
or
```
-> bazel build --config android_arm --cxxopt=-std=c++11 \
+> bazel build --config android_arm --config monolithic --cxxopt=-std=c++11 \
//tensorflow/contrib/lite/examples/label_image:label_image
```
diff --git a/tensorflow/contrib/lite/kernels/internal/BUILD b/tensorflow/contrib/lite/kernels/internal/BUILD
index 4691a543e9..a6ccc99a51 100644
--- a/tensorflow/contrib/lite/kernels/internal/BUILD
+++ b/tensorflow/contrib/lite/kernels/internal/BUILD
@@ -278,6 +278,8 @@ cc_library(
"optimized/neon_tensor_utils.cc",
],
hdrs = [
+ "common.h",
+ "optimized/cpu_check.h",
"optimized/neon_tensor_utils.h",
"optimized/tensor_utils_impl.h",
],
@@ -285,8 +287,11 @@ cc_library(
deps = [
":cpu_check",
":portable_tensor_utils",
+ ":types",
"//tensorflow/contrib/lite:builtin_op_data",
"//tensorflow/contrib/lite/kernels:activation_functor",
+ "@arm_neon_2_x86_sse",
+ "@gemmlowp",
],
)
@@ -306,14 +311,21 @@ cc_library(
"tensor_utils.cc",
],
hdrs = [
+ "common.h",
+ "compatibility.h",
+ "optimized/cpu_check.h",
+ "optimized/neon_tensor_utils.h",
"optimized/tensor_utils_impl.h",
"reference/portable_tensor_utils.h",
"tensor_utils.h",
+ "types.h",
],
copts = NEON_FLAGS_IF_APPLICABLE,
deps = [
"//tensorflow/contrib/lite/kernels:activation_functor",
"//tensorflow/contrib/lite:builtin_op_data",
+ "@arm_neon_2_x86_sse",
+ "@gemmlowp",
] + select({
":arm": [
":neon_tensor_utils",
@@ -333,6 +345,18 @@ cc_library(
":ios_arm64": [
":neon_tensor_utils",
],
+ ":x86_64": [
+ ":neon_tensor_utils",
+ ],
+ ":x86": [
+ ":neon_tensor_utils",
+ ],
+ ":k8": [
+ ":neon_tensor_utils",
+ ],
+ ":darwin": [
+ ":neon_tensor_utils",
+ ],
"//conditions:default": [
":portable_tensor_utils",
],
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/cpu_check.h b/tensorflow/contrib/lite/kernels/internal/optimized/cpu_check.h
index 6cb556bf45..3a53d3ab07 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/cpu_check.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/cpu_check.h
@@ -34,7 +34,7 @@ inline bool TestCPUFeatureNeon() {
#endif // __aarch64__
}
-#elif __ARM_NEON
+#elif defined USE_NEON || defined __ARM_NEON
inline bool TestCPUFeatureNeon() { return true; }
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc b/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc
index bf0bdfb1fb..883c7f270d 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc
@@ -16,11 +16,11 @@ limitations under the License.
#include "tensorflow/contrib/lite/builtin_op_data.h"
#include "tensorflow/contrib/lite/kernels/activation_functor.h"
+#include "tensorflow/contrib/lite/kernels/internal/common.h"
#include "tensorflow/contrib/lite/kernels/internal/optimized/tensor_utils_impl.h"
#ifdef USE_NEON
-#include <arm_neon.h>
#define kFloatWeightsPerNeonLane 4
namespace tflite {
diff --git a/tensorflow/contrib/lite/kernels/internal/tensor_utils.cc b/tensorflow/contrib/lite/kernels/internal/tensor_utils.cc
index 904a97803a..f4181b18a8 100644
--- a/tensorflow/contrib/lite/kernels/internal/tensor_utils.cc
+++ b/tensorflow/contrib/lite/kernels/internal/tensor_utils.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/contrib/lite/kernels/internal/tensor_utils.h"
+#include "tensorflow/contrib/lite/kernels/internal/common.h"
#ifndef USE_NEON
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
diff --git a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
index 7019c29959..76032771af 100644
--- a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
+++ b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
@@ -1571,7 +1571,7 @@ inline int ANeuralNetworksModel_addOperation(ANeuralNetworksModel* model,
}
/**
- * Specfifies which operands will be the model's inputs and outputs.
+ * Specifies which operands will be the model's inputs and outputs.
*
* An operand cannot be used for both input and output. Doing so will
* return an error.
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc
index 2340f0e850..6961e23690 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc
@@ -132,6 +132,7 @@ bool GraphTransformationsPass(int increment, Model* model,
CHECK(increment == 1 || increment == -1);
bool changed = false;
if (model->operators.empty()) {
+ LOG(INFO) << "Model is empty!!!";
return false;
}
int op_index = increment == 1 ? 0 : model->operators.size() - 1;
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc
index 833c97c758..e79e2a32fc 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc
@@ -189,7 +189,10 @@ bool ResolveConstantConcatenation::Run(Model* model, std::size_t op_index) {
// Remove all the resolved arrays.
for (const string& input_name : concat_op->inputs) {
- model->EraseArray(input_name);
+ // Check to prevent removal of shared tensors
+ if (CountOpsWithInput(*model, input_name) == 1) {
+ model->EraseArray(input_name);
+ }
}
// Remove concatenate operator
diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h
index 8f12bc59fb..0bee694387 100644
--- a/tensorflow/contrib/lite/toco/model.h
+++ b/tensorflow/contrib/lite/toco/model.h
@@ -15,6 +15,7 @@ limitations under the License.
#ifndef TENSORFLOW_CONTRIB_LITE_TOCO_MODEL_H_
#define TENSORFLOW_CONTRIB_LITE_TOCO_MODEL_H_
+#include <functional>
#include <initializer_list>
#include <memory>
#include <string>
diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc
index 1add90fb82..ce0fde57f4 100644
--- a/tensorflow/contrib/lite/toco/tooling_util.cc
+++ b/tensorflow/contrib/lite/toco/tooling_util.cc
@@ -698,10 +698,11 @@ void CheckNonExistentIOArrays(const Model& model) {
void CheckNoMissingArray(const Model& model) {
for (const auto& op : model.operators) {
for (const auto& input : op->inputs) {
- CHECK(model.HasArray(input) || model.optional_arrays.count(input));
+ CHECK(model.HasArray(input) || model.optional_arrays.count(input))
+ << "Input: " << input << " missing for op: " << op->outputs[0] << ".";
}
for (const auto& output : op->outputs) {
- CHECK(model.HasArray(output));
+ CHECK(model.HasArray(output)) << "Output: " << output << " missing.";
}
}
CheckNonExistentIOArrays(model);
diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile
index dd5770dc99..81327407d4 100644
--- a/tensorflow/contrib/makefile/Makefile
+++ b/tensorflow/contrib/makefile/Makefile
@@ -377,10 +377,10 @@ $(MARCH_OPTION) \
ifeq ($(BUILD_FOR_TEGRA),1)
NVCC := $(JETPACK)/cuda/bin/nvcc
- NVCCFLAGS := -x=cu -D__CUDACC__ -DNVCC -DNVIDIA_TEGRA -ccbin $(NDK_ROOT)/toolchains/$(TOOLCHAIN)/prebuilt/$(ANDROID_HOST_OS_ARCH)/bin/$(BIN_PREFIX)-g++ --std c++11 --expt-relaxed-constexpr -m64 -gencode arch=compute_53,\"code=sm_53\" -gencode arch=compute_62,\"code=sm_62\" -DEIGEN_AVOID_STL_ARRAY -DTENSORFLOW_USE_EIGEN_THREADPOOL -DLANG_CXX11 -DEIGEN_HAS_C99_MATH -DGOOGLE_CUDA=1 -DTF_EXTRA_CUDA_CAPABILITIES=5.3
+ NVCCFLAGS := -x=cu -D__CUDACC__ -DNVCC -DANDROID_TEGRA -ccbin $(NDK_ROOT)/toolchains/$(TOOLCHAIN)/prebuilt/$(ANDROID_HOST_OS_ARCH)/bin/$(BIN_PREFIX)-g++ --std c++11 --expt-relaxed-constexpr -m64 -gencode arch=compute_53,\"code=sm_53\" -gencode arch=compute_62,\"code=sm_62\" -DEIGEN_AVOID_STL_ARRAY -DTENSORFLOW_USE_EIGEN_THREADPOOL -DLANG_CXX11 -DEIGEN_HAS_C99_MATH -DGOOGLE_CUDA=1 -DTF_EXTRA_CUDA_CAPABILITIES=5.3
CXXFLAGS4NVCC =\
-DIS_SLIM_BUILD \
--DNVIDIA_TEGRA \
+-DANDROID_TEGRA \
-fno-exceptions \
-DNDEBUG $(OPTFLAGS) \
-march=armv8-a \
@@ -391,7 +391,7 @@ $(MARCH_OPTION) \
CXXFLAGS +=\
-DGOOGLE_CUDA=1 \
-D__ANDROID_TYPES_FULL__ \
--DNVIDIA_TEGRA \
+-DANDROID_TEGRA \
-DEIGEN_AVOID_STL_ARRAY \
-DEIGEN_HAS_C99_MATH \
-DLANG_CXX11 -DTENSORFLOW_USE_EIGEN_THREADPOOL -DTF_EXTRA_CUDA_CAPABILITIES=5.3
@@ -407,7 +407,7 @@ $(MARCH_OPTION) \
-I$(JETPACK)/cuda/extras/CUPTI/include
- LIBS += \
+ CUDA_LIBS := \
-ltfcuda \
-lcudart_static \
-lcudnn \
@@ -420,10 +420,10 @@ $(MARCH_OPTION) \
-lculibos \
-lcurand_static
- OBJDIR := $(OBJDIR)Tegra/
- LIBDIR := $(LIBDIR)Tegra/
- BINDIR := $(BINDIR)Tegra/
- DEPDIR := $(DEPDIR)Tegra/
+ OBJDIR := $(OBJDIR)android_arm64-v8a/
+ LIBDIR := $(LIBDIR)android_arm64-v8a/
+ BINDIR := $(BINDIR)android_arm64-v8a/
+ DEPDIR := $(DEPDIR)android_arm64-v8a/
TEGRA_LIBS := \
-L$(JETPACK)/cuda/targets/aarch64-linux-androideabi/lib \
@@ -606,7 +606,8 @@ $(wildcard tensorflow/core/util/*/*.cc) \
tensorflow/core/util/version_info.cc
# Remove duplicates (for version_info.cc)
CORE_CC_ALL_SRCS := $(sort $(CORE_CC_ALL_SRCS))
-CORE_CC_EXCLUDE_SRCS := \
+
+CORE_CC_EXCLUDE_SRCS_NON_GPU := \
$(wildcard tensorflow/core/*/*test.cc) \
$(wildcard tensorflow/core/*/*testutil*) \
$(wildcard tensorflow/core/*/*testlib*) \
@@ -626,49 +627,31 @@ $(wildcard tensorflow/core/lib/jpeg/*) \
$(wildcard tensorflow/core/lib/png/*) \
$(wildcard tensorflow/core/util/events_writer.*) \
$(wildcard tensorflow/core/util/reporter.*) \
-$(wildcard tensorflow/core/platform/default/cuda_libdevice_path.*) \
-$(wildcard tensorflow/core/platform/default/stream_executor.*) \
$(wildcard tensorflow/core/platform/default/test_benchmark.*) \
-$(wildcard tensorflow/core/platform/cuda.h) \
-$(wildcard tensorflow/core/platform/cuda_libdevice_path.*) \
$(wildcard tensorflow/core/platform/cloud/*) \
$(wildcard tensorflow/core/platform/google/*) \
$(wildcard tensorflow/core/platform/google/*/*) \
$(wildcard tensorflow/core/platform/jpeg.*) \
$(wildcard tensorflow/core/platform/png.*) \
$(wildcard tensorflow/core/platform/s3/*) \
-$(wildcard tensorflow/core/platform/stream_executor.*) \
$(wildcard tensorflow/core/platform/windows/*) \
-$(wildcard tensorflow/core/user_ops/*.cu.cc) \
-$(wildcard tensorflow/core/common_runtime/gpu/*) \
-$(wildcard tensorflow/core/common_runtime/gpu_device_factory.*) \
$(wildcard tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.*) \
$(wildcard tensorflow/core/grappler/inputs/file_input_yielder.*) \
-$(wildcard tensorflow/core/grappler/clusters/single_machine.*)
+$(wildcard tensorflow/core/grappler/clusters/single_machine.*) \
+tensorflow/core/util/cuda_kernel_helper_test.cu.cc
+
+CORE_CC_EXCLUDE_SRCS := \
+$(CORE_CC_EXCLUDE_SRCS_NON_GPU) \
+$(wildcard tensorflow/core/platform/stream_executor.*) \
+$(wildcard tensorflow/core/platform/default/cuda_libdevice_path.*) \
+$(wildcard tensorflow/core/platform/cuda.h) \
+$(wildcard tensorflow/core/platform/cuda_libdevice_path.*) \
+$(wildcard tensorflow/core/user_ops/*.cu.cc) \
+$(wildcard tensorflow/core/common_runtime/gpu/*) \
+$(wildcard tensorflow/core/common_runtime/gpu_device_factory.*)
ifeq ($(BUILD_FOR_TEGRA),1)
-CORE_CC_ALL_SRCS := \
-$(wildcard tensorflow/core/*.cc) \
-$(wildcard tensorflow/core/common_runtime/*.cc) \
-$(wildcard tensorflow/core/common_runtime/gpu/*.cc) \
-$(wildcard tensorflow/core/framework/*.cc) \
-$(wildcard tensorflow/core/graph/*.cc) \
-$(wildcard tensorflow/core/platform/*.cc) \
-$(wildcard tensorflow/core/platform/*/*.cc) \
-$(wildcard tensorflow/core/platform/*/*/*.cc) \
-$(wildcard tensorflow/core/util/*.cc) \
-$(wildcard tensorflow/core/util/*/*.cc) \
-$(wildcard tensorflow/cc/training/*.cc) \
-$(wildcard tensorflow/stream_executor/*.cc) \
-$(wildcard tensorflow/stream_executor/*/*.cc) \
-$(wildcard tensorflow/core/grappler/optimizers/*.cc) \
-$(wildcard tensorflow/core/grappler/*.cc) \
-$(wildcard tensorflow/core/grappler/costs/*.cc) \
-$(wildcard tensorflow/core/grappler/clusters/*.cc) \
-$(wildcard tensorflow/core/grappler/utils/*.cc) \
-$(wildcard tensorflow/core/lib/core/*.cc) \
-$(wildcard tensorflow/core/lib/*/*.cc) \
-tensorflow/core/grappler/inputs/utils.cc \
+CORE_CC_ALL_SRCS := $(CORE_CC_ALL_SRCS) \
tensorflow/core/kernels/concat_lib_gpu.cc \
tensorflow/core/kernels/cuda_solvers.cc \
tensorflow/core/kernels/cudnn_pooling_gpu.cc \
@@ -677,28 +660,14 @@ tensorflow/core/kernels/fractional_avg_pool_op.cc \
tensorflow/core/kernels/fractional_max_pool_op.cc \
tensorflow/core/kernels/fractional_pool_common.cc \
tensorflow/core/kernels/pooling_ops_3d.cc \
-tensorflow/core/kernels/sparse_fill_empty_rows_op.cc
+tensorflow/core/kernels/sparse_fill_empty_rows_op.cc \
+tensorflow/core/kernels/list_kernels.cc \
+$(wildcard tensorflow/core/common_runtime/gpu/*.cc) \
+$(wildcard tensorflow/stream_executor/*.cc) \
+$(wildcard tensorflow/stream_executor/*/*.cc)
CORE_CC_EXCLUDE_SRCS := \
-$(wildcard tensorflow/core/*/*test.cc) \
-$(wildcard tensorflow/core/*/*testutil*) \
-$(wildcard tensorflow/core/*/*testlib*) \
-$(wildcard tensorflow/core/*/*/*test.cc) \
-$(wildcard tensorflow/core/*/*/*testutil*) \
-$(wildcard tensorflow/core/framework/op_gen_lib.cc) \
-$(wildcard tensorflow/core/lib/gif/*) \
-$(wildcard tensorflow/core/lib/jpeg/*) \
-$(wildcard tensorflow/core/lib/png/*) \
-$(wildcard tensorflow/core/lib/db/*) \
-$(wildcard tensorflow/core/platform/jpeg.*) \
-$(wildcard tensorflow/core/platform/png.*) \
-$(wildcard tensorflow/core/platform/cloud/*) \
-$(wildcard tensorflow/core/platform/s3/*) \
-$(wildcard tensorflow/core/platform/windows/*) \
-$(wildcard tensorflow/core/*/*/*testlib*) \
-$(wildcard tensorflow/cc/training/*test.cc) \
-tensorflow/core/lib/io/record_reader.cc \
-tensorflow/core/util/cuda_kernel_helper_test.cu.cc
+$(CORE_CC_EXCLUDE_SRCS_NON_GPU)
CUDA_CC_SRCS := $(wildcard tensorflow/core/kernels/*.cu.cc)
CUDA_CC_OBJS := $(addprefix $(OBJDIR), $(CUDA_CC_SRCS:.cc=.o))
@@ -760,7 +729,7 @@ $(BENCHMARK_NAME): $(BENCHMARK_OBJS) $(LIB_PATH) $(CUDA_LIB_DEPS)
@mkdir -p $(dir $@)
$(CXX) $(CXXFLAGS) $(INCLUDES) \
-o $(BENCHMARK_NAME) $(BENCHMARK_OBJS) \
- $(LIBFLAGS) $(TEGRA_LIBS) $(LIB_PATH) $(LDFLAGS) $(LIBS)
+ $(LIBFLAGS) $(TEGRA_LIBS) $(LIB_PATH) $(LDFLAGS) $(LIBS) $(CUDA_LIBS)
# NVCC compilation rules for Tegra
ifeq ($(BUILD_FOR_TEGRA),1)
diff --git a/tensorflow/contrib/makefile/build_all_android.sh b/tensorflow/contrib/makefile/build_all_android.sh
index 980a44a595..f67c516186 100755
--- a/tensorflow/contrib/makefile/build_all_android.sh
+++ b/tensorflow/contrib/makefile/build_all_android.sh
@@ -18,7 +18,7 @@
set -e
usage() {
- echo "Usage: NDK_ROOT=<path to ndk root> $(basename "$0") [-Es:t:Tx:a:X]"
+ echo "Usage: NDK_ROOT=<path to ndk root> $(basename "$0") [-Es:t:Tx:a]"
echo "-E enable experimental hexnn ops"
echo "-s [sub_makefiles] sub makefiles separated by white space"
echo "-t [build_target] build target for Android makefile [default=all]"
diff --git a/tensorflow/contrib/makefile/build_all_ios.sh b/tensorflow/contrib/makefile/build_all_ios.sh
index a18df256f9..2d99791839 100755
--- a/tensorflow/contrib/makefile/build_all_ios.sh
+++ b/tensorflow/contrib/makefile/build_all_ios.sh
@@ -96,7 +96,7 @@ if [[ "${ONLY_MAKE_TENSORFLOW}" != "true" ]]; then
if [[ -z "${BUILD_ARCH}" ]]; then
# Compile protobuf for the target iOS device architectures.
- tensorflow/contrib/makefile/compile_ios_protobuf.sh -a ${DEFAULT_ARCH}
+ tensorflow/contrib/makefile/compile_ios_protobuf.sh
else
# Compile protobuf for the target iOS device architectures.
tensorflow/contrib/makefile/compile_ios_protobuf.sh -a ${BUILD_ARCH}
diff --git a/tensorflow/contrib/makefile/samples/build_and_run_inception_hexagon.sh b/tensorflow/contrib/makefile/samples/build_and_run_inception_hexagon.sh
index 861bb885c7..203ff4f890 100755
--- a/tensorflow/contrib/makefile/samples/build_and_run_inception_hexagon.sh
+++ b/tensorflow/contrib/makefile/samples/build_and_run_inception_hexagon.sh
@@ -76,6 +76,8 @@ GEN_LIBS_DIR="${GEN_DIR}/libs"
GEN_DOWNLOAD_DIR="${GEN_DIR}/downloads"
URL_BASE="https://storage.googleapis.com/download.tensorflow.org"
+ARCH="armeabi-v7a"
+
source "${SCRIPT_DIR}/../build_helper.subr"
rm -rf "${GEN_DIR}"
@@ -219,7 +221,7 @@ if [[ "${BUILD_ONLY}" != "true" ]]; then
adb push "${GEN_LIBS_DIR}/libhexagon_nn_skel.so" "/vendor/lib/rfsa/adsp"
adb push -p \
- "${TF_ROOT_DIR}/tensorflow/contrib/makefile/gen/bin/hexagon_graph_execution" \
+ "${TF_ROOT_DIR}/tensorflow/contrib/makefile/gen/bin/android_${ARCH}/hexagon_graph_execution" \
"/data/local/tmp/"
adb wait-for-device
adb shell chmod "${ANDROID_EXEC_FILE_MODE}" \
diff --git a/tensorflow/contrib/makefile/sub_makefiles/android/Makefile.in b/tensorflow/contrib/makefile/sub_makefiles/android/Makefile.in
index d9277ed60c..3081084ee7 100644
--- a/tensorflow/contrib/makefile/sub_makefiles/android/Makefile.in
+++ b/tensorflow/contrib/makefile/sub_makefiles/android/Makefile.in
@@ -54,7 +54,7 @@ $(INFERENCE_SO_PATH): $(LIB_OBJS) $(INFERENCE_OBJS) $(CUDA_LIB_DEPS)
-o $@ $(INFERENCE_OBJS) $(LIB_OBJS) $(TEGRA_LIBS) \
$(LIBFLAGS) $(LDFLAGS) \
-shared -Wl,-soname,$(INFERENCE_SO_NAME) \
- $(LIBS)
+ $(LIBS) $(CUDA_LIBS)
$(INFERENCE_SO_NAME): $(INFERENCE_SO_PATH)
diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt
index 5f27566398..5a812af4e9 100644
--- a/tensorflow/contrib/makefile/tf_op_files.txt
+++ b/tensorflow/contrib/makefile/tf_op_files.txt
@@ -91,6 +91,7 @@ tensorflow/core/kernels/reduction_ops_max.cc
tensorflow/core/kernels/reduction_ops_common.cc
tensorflow/core/kernels/reduction_ops_any.cc
tensorflow/core/kernels/reduction_ops_all.cc
+tensorflow/core/kernels/roll_op.cc
tensorflow/core/kernels/queue_ops.cc
tensorflow/core/kernels/queue_base.cc
tensorflow/core/kernels/pooling_ops_common.cc
@@ -270,6 +271,7 @@ tensorflow/core/ops/parsing_ops.cc
tensorflow/core/ops/no_op.cc
tensorflow/core/ops/nn_ops.cc
tensorflow/core/ops/nn_grad.cc
+tensorflow/core/ops/manip_ops.cc
tensorflow/core/ops/math_ops.cc
tensorflow/core/ops/math_grad.cc
tensorflow/core/ops/logging_ops.cc
@@ -291,3 +293,4 @@ tensorflow/core/kernels/batchtospace_op.cc
tensorflow/core/kernels/warn_about_ints.cc
tensorflow/core/kernels/segment_reduction_ops.cc
tensorflow/core/kernels/batch_util.cc
+tensorflow/core/ops/audio_ops.cc
diff --git a/tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc b/tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc
index c2c42b8ed7..6a7f5efecd 100644
--- a/tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc
+++ b/tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc
@@ -151,7 +151,7 @@ MPIRemoteRendezvous::~MPIRemoteRendezvous() {}
void MPIRendezvousMgr::AddRequest(RecvTensorRequest request,
const int mpi_dst) {
TF_CHECK_OK(recv_tensor_recent_request_ids_.TrackUnique(
- req.request_id(), "RecvTensor (MPIRendezvousMgr)", req));
+ request.request_id(), "RecvTensor (MPIRendezvousMgr)", request));
const int64 step_id = request.step_id();
const std::string& key = request.rendezvous_key();
Rendezvous::ParsedKey parsed;
diff --git a/tensorflow/contrib/mpi/mpi_rendezvous_mgr.h b/tensorflow/contrib/mpi/mpi_rendezvous_mgr.h
index e665922135..5596601ddb 100644
--- a/tensorflow/contrib/mpi/mpi_rendezvous_mgr.h
+++ b/tensorflow/contrib/mpi/mpi_rendezvous_mgr.h
@@ -33,6 +33,7 @@ limitations under the License.
#include "tensorflow/contrib/mpi/mpi_msg.pb.h"
#include "tensorflow/contrib/mpi/mpi_utils.h"
#include "tensorflow/core/distributed_runtime/base_rendezvous_mgr.h"
+#include "tensorflow/core/distributed_runtime/recent_request_ids.h"
#include "tensorflow/core/distributed_runtime/request_id.h"
#include "tensorflow/core/distributed_runtime/worker_env.h"
#include "tensorflow/core/protobuf/worker.pb.h"
diff --git a/tensorflow/contrib/ndlstm/__init__.py b/tensorflow/contrib/ndlstm/__init__.py
index 52e83069cb..a5dd100b26 100644
--- a/tensorflow/contrib/ndlstm/__init__.py
+++ b/tensorflow/contrib/ndlstm/__init__.py
@@ -12,7 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
+"""Library of multidimensional LSTM models and related code."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
+
+from tensorflow.contrib.ndlstm.python import lstm1d
+from tensorflow.contrib.ndlstm.python import lstm2d
diff --git a/tensorflow/contrib/ndlstm/python/lstm1d.py b/tensorflow/contrib/ndlstm/python/lstm1d.py
index d3c3531f40..2e2e9086c0 100644
--- a/tensorflow/contrib/ndlstm/python/lstm1d.py
+++ b/tensorflow/contrib/ndlstm/python/lstm1d.py
@@ -22,7 +22,6 @@ from six.moves import xrange # pylint: disable=redefined-builtin
from tensorflow.contrib.framework.python.ops import variables
from tensorflow.python.framework import constant_op
from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import random_ops
from tensorflow.python.ops import rnn
@@ -85,18 +84,11 @@ def ndlstm_base_dynamic(inputs, noutput, scope=None, reverse=False):
Output sequence (length, batch_size, noutput)
"""
with variable_scope.variable_scope(scope, "SeqLstm", [inputs]):
- # TODO(tmb) make batch size, sequence_length dynamic
- # example: sequence_length = tf.shape(inputs)[0]
- _, batch_size, _ = _shape(inputs)
- lstm_cell = rnn_cell.BasicLSTMCell(noutput, state_is_tuple=False)
- state = array_ops.zeros([batch_size, lstm_cell.state_size])
- sequence_length = int(inputs.get_shape()[0])
- sequence_lengths = math_ops.to_int64(
- array_ops.fill([batch_size], sequence_length))
+ lstm_cell = rnn_cell.BasicLSTMCell(noutput)
if reverse:
inputs = array_ops.reverse_v2(inputs, [0])
outputs, _ = rnn.dynamic_rnn(
- lstm_cell, inputs, sequence_lengths, state, time_major=True)
+ lstm_cell, inputs, time_major=True, dtype=inputs.dtype)
if reverse:
outputs = array_ops.reverse_v2(outputs, [0])
return outputs
diff --git a/tensorflow/contrib/opt/python/training/external_optimizer.py b/tensorflow/contrib/opt/python/training/external_optimizer.py
index f243317f1d..82ebca7f20 100644
--- a/tensorflow/contrib/opt/python/training/external_optimizer.py
+++ b/tensorflow/contrib/opt/python/training/external_optimizer.py
@@ -397,10 +397,6 @@ class ScipyOptimizerInterface(ExternalOptimizerInterface):
'automatically and cannot be injected manually'.format(kwarg))
minimize_kwargs.update(optimizer_kwargs)
- if method == 'SLSQP':
- # SLSQP doesn't support step callbacks. Obviate associated warning
- # message.
- del minimize_kwargs['callback']
import scipy.optimize # pylint: disable=g-import-not-at-top
result = scipy.optimize.minimize(*minimize_args, **minimize_kwargs)
diff --git a/tensorflow/contrib/opt/python/training/external_optimizer_test.py b/tensorflow/contrib/opt/python/training/external_optimizer_test.py
index 0f597d0a24..953586ee70 100644
--- a/tensorflow/contrib/opt/python/training/external_optimizer_test.py
+++ b/tensorflow/contrib/opt/python/training/external_optimizer_test.py
@@ -299,6 +299,45 @@ class ScipyOptimizerInterfaceTest(TestCase):
method = optimizer.optimizer_kwargs.get('method')
self.assertEqual('SLSQP', method)
+ def test_callbacks(self):
+ vector_val = np.array([7., -2.], dtype=np.float32)
+ vector = variables.Variable(vector_val, 'vector')
+
+ minimum_location_val = np.arange(2)
+ minimum_location = constant_op.constant(
+ minimum_location_val, dtype=dtypes.float32)
+
+ loss = math_ops.reduce_sum(math_ops.square(vector - minimum_location)) / 2.
+ loss_val_first = ((vector_val - minimum_location_val)**2).sum() / 2.
+
+ optimizer = external_optimizer.ScipyOptimizerInterface(loss, method='SLSQP')
+
+ with self.test_session() as sess:
+ sess.run(variables.global_variables_initializer())
+
+ initial_vector_val = sess.run(vector)
+
+ extra_fetches = [loss]
+
+ step_callback = test.mock.Mock()
+ loss_callback = test.mock.Mock()
+
+ optimizer.minimize(
+ sess,
+ fetches=extra_fetches,
+ loss_callback=loss_callback,
+ step_callback=step_callback)
+
+ loss_val_last = sess.run(loss)
+
+ call_first = test.mock.call(loss_val_first)
+ call_last = test.mock.call(loss_val_last)
+ loss_calls = [call_first, call_last]
+ loss_callback.assert_has_calls(loss_calls, any_order=True)
+
+ args, _ = step_callback.call_args
+ self.assertAllClose(minimum_location_val, args[0])
+
if __name__ == '__main__':
test.main()
diff --git a/tensorflow/contrib/py2tf/impl/api.py b/tensorflow/contrib/py2tf/impl/api.py
index 8ff6618912..85d40f3158 100644
--- a/tensorflow/contrib/py2tf/impl/api.py
+++ b/tensorflow/contrib/py2tf/impl/api.py
@@ -86,8 +86,8 @@ def convert_inline(f, *args, **kwargs):
def convert(recursive=False, arg_types=None):
"""Decorator that compiles a function to graph mode.
- The decorator is dynamic - invoking compilation whenever the decorated fuction
- is called. This means the parameter values are known at compilation.
+ The decorator is dynamic - invoking compilation whenever the decorated
+ function is called. This means the parameter values are known at compilation.
Args:
recursive: Whether to recusrively convert any functions that the decorator
diff --git a/tensorflow/contrib/receptive_field/python/util/graph_compute_order.py b/tensorflow/contrib/receptive_field/python/util/graph_compute_order.py
index b2360fec6c..0388079f20 100644
--- a/tensorflow/contrib/receptive_field/python/util/graph_compute_order.py
+++ b/tensorflow/contrib/receptive_field/python/util/graph_compute_order.py
@@ -61,7 +61,7 @@ def _compute_output_resolution(input_spatial_resolution, kernel_size, stride,
stride: Stride (int).
total_padding: Total padding to be applied (int).
Returns:
- output_resolution: Ouput dimension (int) or None.
+ output_resolution: Output dimension (int) or None.
"""
if (input_spatial_resolution is None) or (kernel_size is None) or (
stride is None) or (total_padding is None):
diff --git a/tensorflow/contrib/reduce_slice_ops/ops/reduce_slice_ops.cc b/tensorflow/contrib/reduce_slice_ops/ops/reduce_slice_ops.cc
index b8b56c0e22..92879ab535 100644
--- a/tensorflow/contrib/reduce_slice_ops/ops/reduce_slice_ops.cc
+++ b/tensorflow/contrib/reduce_slice_ops/ops/reduce_slice_ops.cc
@@ -87,9 +87,9 @@ and 'indices' is [[0,1]
[1,1]
[0,2]],
-the the output will be [[ 1, 2, 3]
- [ 0, 0, 0]
- [41,52,63]].
+the output will be [[ 1, 2, 3]
+ [ 0, 0, 0]
+ [41,52,63]].
```
The data must be at least rank 1. The indices must be of shape (?,2) where the
@@ -132,9 +132,9 @@ and 'indices' is [[0,1]
[1,1]
[0,2]],
-the the output will be [[ 1, 2, 3]
- [ 1, 1, 1]
- [40,100,180]].
+the output will be [[ 1, 2, 3]
+ [ 1, 1, 1]
+ [40,100,180]].
```
The data must be at least rank 1. The indices can be of shape (?,2) where the
@@ -189,9 +189,9 @@ and 'indices' is [[0,1]
[1,1]
[0,2]],
-the the output will be [[ 1, 20, 3]
- [ -BIG_VALUE, -BIG_VALUE, -BIG_VALUE]
- [ 400, 20, 60]].
+the output will be [[ 1, 20, 3]
+ [ -BIG_VALUE, -BIG_VALUE, -BIG_VALUE]
+ [ 400, 20, 60]].
```
The data must be at least rank 1. The indices can be of shape (?,2) where the
@@ -246,9 +246,9 @@ and 'indices' is [[0,1]
[1,1]
[0,2]],
-the the output will be [[ 1, 20, 3]
- [ +BIG_VALUE, +BIG_VALUE, +BIG_VALUE]
- [ 1, 5, 3]].
+the output will be [[ 1, 20, 3]
+ [ +BIG_VALUE, +BIG_VALUE, +BIG_VALUE]
+ [ 1, 5, 3]].
```
The data must be at least rank 1. The indices can be of shape (?,2) where the
diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
index 09527e8473..0e62b315b6 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
@@ -157,6 +157,21 @@ class RNNCellTest(test.TestCase):
# Smoke test
self.assertAllClose(res[0], [[0.509682, 0.509682]])
+ def testSRUCellWithDiffSize(self):
+ with self.test_session() as sess:
+ with variable_scope.variable_scope(
+ "root", initializer=init_ops.constant_initializer(0.5)):
+ x = array_ops.zeros([1, 3])
+ m = array_ops.zeros([1, 2])
+ g, _ = contrib_rnn_cell.SRUCell(2)(x, m)
+ sess.run([variables_lib.global_variables_initializer()])
+ res = sess.run([g], {
+ x.name: np.array([[1., 1., 1.]]),
+ m.name: np.array([[0.1, 0.1]])
+ })
+ # Smoke test
+ self.assertAllClose(res[0], [[0.55255556, 0.55255556]])
+
def testBasicLSTMCell(self):
for dtype in [dtypes.float16, dtypes.float32]:
np_dtype = dtype.as_numpy_dtype
diff --git a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
index c780e85d72..51933be29d 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
@@ -1635,6 +1635,5 @@ class WeightNormLSTMCellTest(test.TestCase):
self.assertAllClose(expected_c, actual_c, 1e-5)
self.assertAllClose(expected_h, actual_h, 1e-5)
-
if __name__ == "__main__":
test.main()
diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
index 124e841fc2..fe07493d0f 100644
--- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py
+++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
@@ -2731,25 +2731,9 @@ class SRUCell(rnn_cell_impl._LayerRNNCell):
input_depth = inputs_shape[1].value
- # Here the contributor believes that the following constraints
- # are implied. The reasoning is explained here with reference to
- # the paper https://arxiv.org/pdf/1709.02755.pdf upon which this
- # implementation is based.
- # In section 2.1 Equation 5, specifically:
- # h_t = r_t \odot g(c_t) + (1 - r_t) \odot x_t
- # the pointwise operation between r_t and x_t means they have
- # the same shape (since we are implementing an RNN cell, braodcasting
- # does not happen to input of a single timestep); by the same
- # reasons, x_t has the same shape as h_t, essentially mandating that
- # input_depth = unit_num.
- if input_depth != self._num_units:
- raise ValueError("SRU requires input_depth == num_units, got "
- "input_depth = %s, num_units = %s" % (input_depth,
- self._num_units))
-
self._kernel = self.add_variable(
rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
- shape=[input_depth, 3 * self._num_units])
+ shape=[input_depth, 4 * self._num_units])
self._bias = self.add_variable(
rnn_cell_impl._BIAS_VARIABLE_NAME,
@@ -2762,8 +2746,8 @@ class SRUCell(rnn_cell_impl._LayerRNNCell):
"""Simple recurrent unit (SRU) with num_units cells."""
U = math_ops.matmul(inputs, self._kernel)
- x_bar, f_intermediate, r_intermediate = array_ops.split(
- value=U, num_or_size_splits=3, axis=1)
+ x_bar, f_intermediate, r_intermediate, x_tx = array_ops.split(
+ value=U, num_or_size_splits=4, axis=1)
f_r = math_ops.sigmoid(
nn_ops.bias_add(
@@ -2771,7 +2755,7 @@ class SRUCell(rnn_cell_impl._LayerRNNCell):
f, r = array_ops.split(value=f_r, num_or_size_splits=2, axis=1)
c = f * state + (1.0 - f) * x_bar
- h = r * self._activation(c) + (1.0 - r) * inputs
+ h = r * self._activation(c) + (1.0 - r) * x_tx
return h, c
diff --git a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py
index 95dea312f3..d6b5eceb47 100644
--- a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py
+++ b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py
@@ -924,8 +924,7 @@ class LuongMonotonicAttention(_BaseMonotonicAttentionMechanism):
_monotonic_probability_fn, sigmoid_noise=sigmoid_noise, mode=mode,
seed=sigmoid_noise_seed)
super(LuongMonotonicAttention, self).__init__(
- query_layer=layers_core.Dense(
- num_units, name="query_layer", use_bias=False, dtype=dtype),
+ query_layer=None,
memory_layer=layers_core.Dense(
num_units, name="memory_layer", use_bias=False, dtype=dtype),
memory=memory,
diff --git a/tensorflow/contrib/session_bundle/bundle_shim.py b/tensorflow/contrib/session_bundle/bundle_shim.py
index 062c9cc680..1db97020a2 100644
--- a/tensorflow/contrib/session_bundle/bundle_shim.py
+++ b/tensorflow/contrib/session_bundle/bundle_shim.py
@@ -82,7 +82,8 @@ def _convert_default_signature_to_signature_def(signatures):
"""
default_signature = signatures.default_signature
signature_def = meta_graph_pb2.SignatureDef()
- if default_signature.WhichOneof("type") == "regression_signature":
+ if (default_signature.WhichOneof("type") ==
+ legacy_constants.REGRESSION_SIGNATURE):
regression_signature = default_signature.regression_signature
signature_def.method_name = signature_constants.REGRESS_METHOD_NAME
_add_input_to_signature_def(regression_signature.input.tensor_name,
@@ -91,7 +92,8 @@ def _convert_default_signature_to_signature_def(signatures):
_add_output_to_signature_def(regression_signature.output.tensor_name,
signature_constants.REGRESS_OUTPUTS,
signature_def)
- elif default_signature.WhichOneof("type") == "classification_signature":
+ elif (default_signature.WhichOneof("type") ==
+ legacy_constants.CLASSIFICATION_SIGNATURE):
classification_signature = default_signature.classification_signature
signature_def.method_name = signature_constants.CLASSIFY_METHOD_NAME
_add_input_to_signature_def(classification_signature.input.tensor_name,
@@ -132,8 +134,9 @@ def _convert_named_signatures_to_signature_def(signatures):
signature_constants.PREDICT_OUTPUTS]
# TODO(pdudnik): what if there are other signatures? Mimic cr/140900781 once
# it is submitted.
- if (input_signature.WhichOneof("type") != "generic_signature" or
- output_signature.WhichOneof("type") != "generic_signature"):
+ if (input_signature.WhichOneof("type") != legacy_constants.GENERIC_SIGNATURE
+ or output_signature.WhichOneof("type") !=
+ legacy_constants.GENERIC_SIGNATURE):
raise RuntimeError("Named input and output signatures can only be "
"up-converted if they are generic signature. "
"Input signature type is %s, output signature type is "
diff --git a/tensorflow/contrib/session_bundle/constants.py b/tensorflow/contrib/session_bundle/constants.py
index 6ced73241a..e833baee79 100644
--- a/tensorflow/contrib/session_bundle/constants.py
+++ b/tensorflow/contrib/session_bundle/constants.py
@@ -32,3 +32,6 @@ INIT_OP_KEY = "serving_init_op"
SIGNATURES_KEY = "serving_signatures"
ASSETS_KEY = "serving_assets"
GRAPH_KEY = "serving_graph"
+REGRESSION_SIGNATURE = "regression_signature"
+CLASSIFICATION_SIGNATURE = "classification_signature"
+GENERIC_SIGNATURE = "generic_signature"
diff --git a/tensorflow/contrib/slim/python/slim/evaluation_test.py b/tensorflow/contrib/slim/python/slim/evaluation_test.py
index 870f504d10..8a267ddac7 100644
--- a/tensorflow/contrib/slim/python/slim/evaluation_test.py
+++ b/tensorflow/contrib/slim/python/slim/evaluation_test.py
@@ -29,7 +29,6 @@ from tensorflow.contrib.framework.python.ops import variables as variables_lib
from tensorflow.contrib.metrics.python.ops import metric_ops
from tensorflow.contrib.slim.python.slim import evaluation
from tensorflow.contrib.training.python.training import evaluation as evaluation_lib
-from tensorflow.core.protobuf import saver_pb2
from tensorflow.python.debug.lib import debug_data
from tensorflow.python.debug.wrappers import hooks
from tensorflow.python.framework import constant_op
@@ -236,7 +235,7 @@ class SingleEvaluationTest(test.TestCase):
def _prepareCheckpoint(self, checkpoint_path):
init_op = control_flow_ops.group(variables.global_variables_initializer(),
variables.local_variables_initializer())
- saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V1)
+ saver = saver_lib.Saver()
with self.test_session() as sess:
sess.run(init_op)
saver.save(sess, checkpoint_path)
diff --git a/tensorflow/contrib/solvers/python/kernel_tests/linear_equations_test.py b/tensorflow/contrib/solvers/python/kernel_tests/linear_equations_test.py
index 930df2414b..a1282847be 100644
--- a/tensorflow/contrib/solvers/python/kernel_tests/linear_equations_test.py
+++ b/tensorflow/contrib/solvers/python/kernel_tests/linear_equations_test.py
@@ -45,32 +45,67 @@ def _get_linear_equations_tests(dtype_, use_static_shape_, shape_):
low=-1.0, high=1.0, size=np.prod(shape_)).reshape(shape_).astype(dtype_)
# Make a selfadjoint, positive definite.
a_np = np.dot(a_np.T, a_np)
+ # jacobi preconditioner
+ jacobi_np = np.zeros_like(a_np)
+ jacobi_np[range(a_np.shape[0]), range(a_np.shape[1])] = (
+ 1.0 / a_np.diagonal())
rhs_np = np.random.uniform(
low=-1.0, high=1.0, size=shape_[0]).astype(dtype_)
+ x_np = np.zeros_like(rhs_np)
tol = 1e-6 if dtype_ == np.float64 else 1e-3
max_iter = 20
with self.test_session() as sess:
if use_static_shape_:
a = constant_op.constant(a_np)
rhs = constant_op.constant(rhs_np)
+ x = constant_op.constant(x_np)
+ jacobi = constant_op.constant(jacobi_np)
else:
a = array_ops.placeholder(dtype_)
rhs = array_ops.placeholder(dtype_)
+ x = array_ops.placeholder(dtype_)
+ jacobi = array_ops.placeholder(dtype_)
operator = util.create_operator(a)
- cg_graph = linear_equations.conjugate_gradient(
- operator, rhs, tol=tol, max_iter=max_iter)
- if use_static_shape_:
- cg_val = sess.run(cg_graph)
- else:
- cg_val = sess.run(cg_graph, feed_dict={a: a_np, rhs: rhs_np})
- norm_r0 = np.linalg.norm(rhs_np)
- norm_r = np.sqrt(cg_val.gamma)
- self.assertLessEqual(norm_r, tol * norm_r0)
- # Validate that we get an equally small residual norm with numpy
- # using the computed solution.
- r_np = rhs_np - np.dot(a_np, cg_val.x)
- norm_r_np = np.linalg.norm(r_np)
- self.assertLessEqual(norm_r_np, tol * norm_r0)
+ preconditioners = [
+ None, util.identity_operator(a),
+ util.create_operator(jacobi)
+ ]
+ cg_results = []
+ for preconditioner in preconditioners:
+ cg_graph = linear_equations.conjugate_gradient(
+ operator,
+ rhs,
+ preconditioner=preconditioner,
+ x=x,
+ tol=tol,
+ max_iter=max_iter)
+ if use_static_shape_:
+ cg_val = sess.run(cg_graph)
+ else:
+ cg_val = sess.run(
+ cg_graph,
+ feed_dict={
+ a: a_np,
+ rhs: rhs_np,
+ x: x_np,
+ jacobi: jacobi_np
+ })
+ norm_r0 = np.linalg.norm(rhs_np)
+ norm_r = np.linalg.norm(cg_val.r)
+ self.assertLessEqual(norm_r, tol * norm_r0)
+ # Validate that we get an equally small residual norm with numpy
+ # using the computed solution.
+ r_np = rhs_np - np.dot(a_np, cg_val.x)
+ norm_r_np = np.linalg.norm(r_np)
+ self.assertLessEqual(norm_r_np, tol * norm_r0)
+ cg_results.append(cg_val)
+ # Validate that we get same results using identity_preconditioner
+ # and None
+ self.assertEqual(cg_results[0].i, cg_results[1].i)
+ self.assertAlmostEqual(cg_results[0].gamma, cg_results[1].gamma)
+ self.assertAllClose(cg_results[0].r, cg_results[1].r, rtol=tol)
+ self.assertAllClose(cg_results[0].x, cg_results[1].x, rtol=tol)
+ self.assertAllClose(cg_results[0].p, cg_results[1].p, rtol=tol)
return [test_conjugate_gradient]
diff --git a/tensorflow/contrib/solvers/python/kernel_tests/util_test.py b/tensorflow/contrib/solvers/python/kernel_tests/util_test.py
index 1566984b27..5d7534657b 100644
--- a/tensorflow/contrib/solvers/python/kernel_tests/util_test.py
+++ b/tensorflow/contrib/solvers/python/kernel_tests/util_test.py
@@ -63,6 +63,43 @@ class UtilTest(test.TestCase):
def testCreateOperatorUnknownShape(self):
self._testCreateOperator(False)
+ def _testIdentityOperator(self, use_static_shape_):
+ for dtype in np.float32, np.float64:
+ a_np = np.array([[1., 2.], [3., 4.], [5., 6.]], dtype=dtype)
+ x_np = np.array([[2.], [-3.]], dtype=dtype)
+ y_np = np.array([[2], [-3.], [5.]], dtype=dtype)
+ with self.test_session() as sess:
+ if use_static_shape_:
+ a = constant_op.constant(a_np, dtype=dtype)
+ x = constant_op.constant(x_np, dtype=dtype)
+ y = constant_op.constant(y_np, dtype=dtype)
+ else:
+ a = array_ops.placeholder(dtype)
+ x = array_ops.placeholder(dtype)
+ y = array_ops.placeholder(dtype)
+ id_op = util.identity_operator(a)
+ ax = id_op.apply(x)
+ aty = id_op.apply_adjoint(y)
+ op_shape = ops.convert_to_tensor(id_op.shape)
+ if use_static_shape_:
+ op_shape_val, ax_val, aty_val = sess.run([op_shape, ax, aty])
+ else:
+ op_shape_val, ax_val, aty_val = sess.run(
+ [op_shape, ax, aty], feed_dict={
+ a: a_np,
+ x: x_np,
+ y: y_np
+ })
+ self.assertAllEqual(op_shape_val, [3, 2])
+ self.assertAllClose(ax_val, x_np)
+ self.assertAllClose(aty_val, y_np)
+
+ def testIdentityOperator(self):
+ self._testIdentityOperator(True)
+
+ def testIdentityOperatorUnknownShape(self):
+ self._testIdentityOperator(False)
+
def testL2Norm(self):
with self.test_session():
x_np = np.array([[2], [-3.], [5.]])
diff --git a/tensorflow/contrib/solvers/python/ops/linear_equations.py b/tensorflow/contrib/solvers/python/ops/linear_equations.py
index 8cba56eba6..2395707257 100644
--- a/tensorflow/contrib/solvers/python/ops/linear_equations.py
+++ b/tensorflow/contrib/solvers/python/ops/linear_equations.py
@@ -26,11 +26,14 @@ from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import linalg_ops
from tensorflow.python.ops import math_ops
def conjugate_gradient(operator,
rhs,
+ preconditioner=None,
+ x=None,
tol=1e-4,
max_iter=20,
name="conjugate_gradient"):
@@ -55,6 +58,15 @@ def conjugate_gradient(operator,
vector with the result of applying the operator to `x`, i.e. if
`operator` represents matrix `A`, `apply` should return `A * x`.
rhs: A rank-1 `Tensor` of shape `[N]` containing the right-hand size vector.
+ preconditioner: An object representing a linear operator, see `operator`
+ for detail. The preconditioner should approximate the inverse of `A`.
+ An efficient preconditioner could dramatically improve the rate of
+ convergence. If `preconditioner` represents matrix `M`(`M` approximates
+ `A^{-1}`), the algorithm uses `preconditioner.apply(x)` to estimate
+ `A^{-1}x`. For this to be useful, the cost of applying `M` should be
+ much lower than computing `A^{-1}` directly.
+ x: A rank-1 `Tensor` of shape `[N]` containing the initial guess for the
+ solution.
tol: A float scalar convergence tolerance.
max_iter: An integer giving the maximum number of iterations.
name: A name scope for the operation.
@@ -65,35 +77,49 @@ def conjugate_gradient(operator,
- x: A rank-1 `Tensor` of shape `[N]` containing the computed solution.
- r: A rank-1 `Tensor` of shape `[M]` containing the residual vector.
- p: A rank-1 `Tensor` of shape `[N]`. `A`-conjugate basis vector.
- - gamma: \\(||r||_2^2\\)
+ - gamma: \\(r \dot M \dot r\\), equivalent to \\(||r||_2^2\\) when
+ `preconditioner=None`.
"""
# ephemeral class holding CG state.
cg_state = collections.namedtuple("CGState", ["i", "x", "r", "p", "gamma"])
def stopping_criterion(i, state):
- return math_ops.logical_and(i < max_iter, state.gamma > tol)
+ return math_ops.logical_and(i < max_iter, linalg_ops.norm(state.r) > tol)
- # TODO(rmlarsen): add preconditioning
- def cg_step(i, state):
+ def cg_step(i, state): # pylint: disable=missing-docstring
z = operator.apply(state.p)
alpha = state.gamma / util.dot(state.p, z)
x = state.x + alpha * state.p
r = state.r - alpha * z
- gamma = util.l2norm_squared(r)
- beta = gamma / state.gamma
- p = r + beta * state.p
+ if preconditioner is None:
+ gamma = util.dot(r, r)
+ beta = gamma / state.gamma
+ p = r + beta * state.p
+ else:
+ q = preconditioner.apply(r)
+ gamma = util.dot(r, q)
+ beta = gamma / state.gamma
+ p = q + beta * state.p
return i + 1, cg_state(i + 1, x, r, p, gamma)
with ops.name_scope(name):
n = operator.shape[1:]
rhs = array_ops.expand_dims(rhs, -1)
- gamma0 = util.l2norm_squared(rhs)
- tol = tol * tol * gamma0
- x = array_ops.expand_dims(
- array_ops.zeros(
- n, dtype=rhs.dtype.base_dtype), -1)
+ if x is None:
+ x = array_ops.expand_dims(
+ array_ops.zeros(n, dtype=rhs.dtype.base_dtype), -1)
+ r0 = rhs
+ else:
+ x = array_ops.expand_dims(x, -1)
+ r0 = rhs - operator.apply(x)
+ if preconditioner is None:
+ p0 = r0
+ else:
+ p0 = preconditioner.apply(r0)
+ gamma0 = util.dot(r0, p0)
+ tol *= linalg_ops.norm(r0)
i = constant_op.constant(0, dtype=dtypes.int32)
- state = cg_state(i=i, x=x, r=rhs, p=rhs, gamma=gamma0)
+ state = cg_state(i=i, x=x, r=r0, p=p0, gamma=gamma0)
_, state = control_flow_ops.while_loop(stopping_criterion, cg_step,
[i, state])
return cg_state(
diff --git a/tensorflow/contrib/solvers/python/ops/util.py b/tensorflow/contrib/solvers/python/ops/util.py
index 777e0c185d..96947e8eea 100644
--- a/tensorflow/contrib/solvers/python/ops/util.py
+++ b/tensorflow/contrib/solvers/python/ops/util.py
@@ -45,6 +45,23 @@ def create_operator(matrix):
apply_adjoint=lambda v: math_ops.matmul(matrix, v, adjoint_a=True))
+def identity_operator(matrix):
+ """Creates a linear operator from a rank-2 identity tensor."""
+
+ linear_operator = collections.namedtuple(
+ "LinearOperator", ["shape", "dtype", "apply", "apply_adjoint"])
+ shape = matrix.get_shape()
+ if shape.is_fully_defined():
+ shape = shape.as_list()
+ else:
+ shape = array_ops.shape(matrix)
+ return linear_operator(
+ shape=shape,
+ dtype=matrix.dtype,
+ apply=lambda v: v,
+ apply_adjoint=lambda v: v)
+
+
# TODO(rmlarsen): Measure if we should just call matmul.
def dot(x, y):
return math_ops.reduce_sum(math_ops.conj(x) * y)
diff --git a/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py b/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py
index 7970c20a26..78d237e6a2 100644
--- a/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py
+++ b/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py
@@ -17,6 +17,7 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
+from absl import flags
import os
import subprocess
@@ -24,13 +25,21 @@ import sys
import tensorflow as tf
-tf.flags.DEFINE_string('service_addr', '',
- 'Address of TPU profiler service e.g. localhost:8466')
-tf.flags.DEFINE_string('logdir', '',
- 'Path of TensorBoard log directory e.g. /tmp/tb_log')
-tf.flags.DEFINE_integer('duration_ms', 2000, 'Duration of tracing in ms.')
+flags.DEFINE_string(
+ 'service_addr', None, 'Address of TPU profiler service e.g. '
+ 'localhost:8466')
+flags.DEFINE_string(
+ 'logdir', None, 'Path of TensorBoard log directory e.g. /tmp/tb_log, '
+ 'gs://tb_bucket')
+flags.DEFINE_integer('duration_ms', 2000, 'Duration of tracing in ms.')
+flags.DEFINE_integer(
+ 'num_tracing_attempts', 3, 'Automatically retry N times when no trace '
+ 'event is collected.')
+flags.DEFINE_boolean(
+ 'include_dataset_ops', True, 'Set to false to profile longer TPU '
+ 'device traces.')
-FLAGS = tf.flags.FLAGS
+FLAGS = flags.FLAGS
EXECUTABLE = 'data/capture_tpu_profile'
@@ -42,10 +51,13 @@ def main(unused_argv=None):
if not FLAGS.service_addr or not FLAGS.logdir:
sys.exit('service_addr and logdir must be provided.')
executable_path = os.path.join(os.path.dirname(__file__), EXECUTABLE)
+ logdir = os.path.expandvars(os.path.expanduser(FLAGS.logdir))
cmd = [executable_path]
- cmd.append('--logdir='+FLAGS.logdir)
+ cmd.append('--logdir='+logdir)
cmd.append('--service_addr='+FLAGS.service_addr)
cmd.append('--duration_ms='+str(FLAGS.duration_ms))
+ cmd.append('--num_tracing_attempts='+str(FLAGS.num_tracing_attempts))
+ cmd.append('--include_dataset_ops='+str(FLAGS.include_dataset_ops).lower())
subprocess.call(cmd)
diff --git a/tensorflow/contrib/tpu/profiler/pip_package/setup.py b/tensorflow/contrib/tpu/profiler/pip_package/setup.py
index 179d29602b..33ade16003 100644
--- a/tensorflow/contrib/tpu/profiler/pip_package/setup.py
+++ b/tensorflow/contrib/tpu/profiler/pip_package/setup.py
@@ -20,16 +20,12 @@ from __future__ import print_function
from setuptools import setup
-_VERSION = '1.3.0-a1'
+_VERSION = '1.5.0-rc1'
CONSOLE_SCRIPTS = [
'capture_tpu_profile=cloud_tpu_profiler.main:run_main',
]
-REQUIRED_PACKAGES = [
- 'tensorflow >= 1.2.0',
-]
-
setup(
name='cloud_tpu_profiler',
version=_VERSION.replace('-', ''),
@@ -45,27 +41,22 @@ setup(
entry_points={
'console_scripts': CONSOLE_SCRIPTS,
},
- install_requires=REQUIRED_PACKAGES,
classifiers=[
# How mature is this project? Common values are
# 3 - Alpha
# 4 - Beta
# 5 - Production/Stable
- 'Development Status :: 3 - Alpha',
-
+ 'Development Status :: 4 - Beta',
'Intended Audience :: Developers',
'Intended Audience :: Education',
'Intended Audience :: Science/Research',
-
'License :: OSI Approved :: Apache Software License',
-
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
-
'Topic :: Scientific/Engineering',
'Topic :: Scientific/Engineering :: Mathematics',
'Topic :: Scientific/Engineering :: Artificial Intelligence',
@@ -74,4 +65,5 @@ setup(
'Topic :: Software Development :: Libraries :: Python Modules',
],
license='Apache 2.0',
- keywords='tensorflow performance tpu',)
+ keywords='tensorflow performance tpu',
+)
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index c25aac3acf..7fa0b79766 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -454,6 +454,7 @@ tf_cuda_library(
"framework/reader_interface.h",
"framework/reader_op_kernel.h",
"framework/register_types.h",
+ "framework/register_types_traits.h",
"framework/resource_mgr.h",
"framework/resource_op_kernel.h",
"framework/selective_registration.h",
@@ -611,6 +612,7 @@ tf_gen_op_libs(
"list_ops",
"lookup_ops",
"logging_ops",
+ "manip_ops",
"math_ops",
"nn_ops",
"no_op",
@@ -693,6 +695,7 @@ cc_library(
":list_ops_op_lib",
":logging_ops_op_lib",
":lookup_ops_op_lib",
+ ":manip_ops_op_lib",
":math_ops_op_lib",
":nn_ops_op_lib",
":no_op_op_lib",
@@ -831,6 +834,7 @@ cc_library(
"//tensorflow/core/kernels:list_kernels",
"//tensorflow/core/kernels:lookup",
"//tensorflow/core/kernels:logging",
+ "//tensorflow/core/kernels:manip",
"//tensorflow/core/kernels:math",
"//tensorflow/core/kernels:multinomial_op",
"//tensorflow/core/kernels:nn",
@@ -1153,6 +1157,7 @@ cc_library(
deps = [
":protos_all_cc_impl",
"//third_party/eigen3",
+ "@nsync//:nsync_cpp",
"@protobuf_archive//:protobuf",
],
alwayslink = 1,
diff --git a/tensorflow/core/api_def/base_api/api_def_MatchingFiles.pbtxt b/tensorflow/core/api_def/base_api/api_def_MatchingFiles.pbtxt
index 8da76684e5..97fd39f647 100644
--- a/tensorflow/core/api_def/base_api/api_def_MatchingFiles.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_MatchingFiles.pbtxt
@@ -16,5 +16,6 @@ END
description: <<END
Note that this routine only supports wildcard characters in the
basename portion of the pattern, not in the directory portion.
+Note also that the order of filenames returned can be non-deterministic.
END
}
diff --git a/tensorflow/core/api_def/base_api/api_def_Roll.pbtxt b/tensorflow/core/api_def/base_api/api_def_Roll.pbtxt
new file mode 100644
index 0000000000..b308ad1f9d
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_Roll.pbtxt
@@ -0,0 +1,52 @@
+op {
+ graph_op_name: "Roll"
+ in_arg {
+ name: "shift"
+ description: <<END
+Dimension must be 0-D or 1-D. `shift[i]` specifies the number of places by which
+elements are shifted positively (towards larger indices) along the dimension
+specified by `axis[i]`. Negative shifts will roll the elements in the opposite
+direction.
+END
+ }
+ in_arg {
+ name: "axis"
+ description: <<END
+Dimension must be 0-D or 1-D. `axis[i]` specifies the dimension that the shift
+`shift[i]` should occur. If the same axis is referenced more than once, the
+total shift for that axis will be the sum of all the shifts that belong to that
+axis.
+END
+ }
+ out_arg {
+ name: "output"
+ description: <<END
+Has the same shape and size as the input. The elements are shifted
+positively (towards larger indices) by the offsets of `shift` along the
+dimensions of `axis`.
+END
+ }
+ summary: "Rolls the elements of a tensor along an axis."
+ description: <<END
+The elements are shifted positively (towards larger indices) by the offset of
+`shift` along the dimension of `axis`. Negative `shift` values will shift
+elements in the opposite direction. Elements that roll passed the last position
+will wrap around to the first and vice versa. Multiple shifts along multiple
+axes may be specified.
+
+For example:
+
+```
+# 't' is [0, 1, 2, 3, 4]
+roll(t, shift=2, axis=0) ==> [3, 4, 0, 1, 2]
+
+# shifting along multiple dimensions
+# 't' is [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]
+roll(t, shift=[1, -2], axis=[0, 1]) ==> [[7, 8, 9, 5, 6], [2, 3, 4, 0, 1]]
+
+# shifting along the same axis multiple times
+# 't' is [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]
+roll(t, shift=[2, -3], axis=[1, 1]) ==> [[1, 2, 3, 4, 0], [6, 7, 8, 9, 5]]
+```
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_UnravelIndex.pbtxt b/tensorflow/core/api_def/base_api/api_def_UnravelIndex.pbtxt
new file mode 100644
index 0000000000..97c380700a
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_UnravelIndex.pbtxt
@@ -0,0 +1,32 @@
+op {
+ graph_op_name: "UnravelIndex"
+ in_arg {
+ name: "indices"
+ description: <<END
+An 0-D or 1-D `int` Tensor whose elements are indices into the
+flattened version of an array of dimensions dims.
+END
+ }
+ in_arg {
+ name: "dims"
+ description: <<END
+An 1-D `int` Tensor. The shape of the array to use for unraveling
+indices.
+END
+ }
+ out_arg {
+ name: "output"
+ description: <<END
+An 2-D (or 1-D if indices is 0-D) tensor where each row has the
+same shape as the indices array.
+END
+ }
+ summary: "Converts a flat index or array of flat indices into a tuple of"
+ description: <<END
+coordinate arrays.
+
+@compatibility(numpy)
+Equivalent to np.unravel_index
+@end_compatibility
+END
+}
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc
index 04b5541863..a9485a835e 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc
@@ -762,7 +762,8 @@ int64 MinSystemMemory(int64 available_memory) {
// is necessary.
min_system_memory *= 2;
#endif
-#if defined(NVIDIA_TEGRA)
+
+#if defined(ANDROID_TEGRA)
// 1GB system mem for NVIDIA Tegra devices since they use the same mem for RAM
// and Video RAM
min_system_memory = 1 << 30;
diff --git a/tensorflow/core/distributed_runtime/BUILD b/tensorflow/core/distributed_runtime/BUILD
index f4ee841032..9e152aa082 100644
--- a/tensorflow/core/distributed_runtime/BUILD
+++ b/tensorflow/core/distributed_runtime/BUILD
@@ -145,6 +145,7 @@ cc_library(
"//tensorflow/core:core_cpu_internal",
"//tensorflow/core:lib",
"//tensorflow/core:protos_all_cc",
+ "//tensorflow/core:worker_proto_cc",
],
)
diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc
index dcc25e4426..878a1398c9 100644
--- a/tensorflow/core/distributed_runtime/master_session.cc
+++ b/tensorflow/core/distributed_runtime/master_session.cc
@@ -1448,6 +1448,7 @@ Status MasterSession::DoPartialRun(CallOptions* opts,
const auto count = run_state->count;
pss.collect_timeline =
req.options().trace_level() == RunOptions::FULL_TRACE;
+ pss.collect_rpcs = req.options().trace_level() == RunOptions::FULL_TRACE;
pss.report_tensor_allocations_upon_oom =
req.options().report_tensor_allocations_upon_oom();
@@ -1610,6 +1611,7 @@ Status MasterSession::DoRunWithLocalExecution(
TRACEPRINTF("stepid %llu", step_id);
pss.collect_timeline = req.options().trace_level() == RunOptions::FULL_TRACE;
+ pss.collect_rpcs = req.options().trace_level() == RunOptions::FULL_TRACE;
pss.report_tensor_allocations_upon_oom =
req.options().report_tensor_allocations_upon_oom();
// Build the cost model every 'build_cost_model_every' steps after skipping an
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc
index 95811476f7..b20e744a97 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc
@@ -444,6 +444,24 @@ void GrpcWorker::GrpcRecvTensorAsync(CallOptions* opts,
});
}
+void GrpcWorker::LoggingAsync(const LoggingRequest* request,
+ LoggingResponse* response, StatusCallback done) {
+ auto env = this->env();
+ if (env) {
+ auto session_mgr = (SessionMgr*)env->session_mgr;
+ if (session_mgr) {
+ session_mgr->SetLogging(request->rpc_logging());
+ for (const auto& step_id : request->fetch_step_id()) {
+ session_mgr->RetrieveLogs(step_id, response);
+ }
+ if (request->clear()) {
+ session_mgr->ClearLogs();
+ }
+ }
+ }
+ done(Status::OK());
+}
+
WorkerEnv* GrpcWorker::env() { return env_; }
std::unique_ptr<GrpcWorker> NewGrpcWorker(WorkerEnv* env) {
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.h b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.h
index 78a21fd9f6..fbddbda9e6 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.h
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.h
@@ -40,6 +40,9 @@ class GrpcWorker : public Worker {
::grpc::ByteBuffer* response,
StatusCallback done);
+ virtual void LoggingAsync(const LoggingRequest* request,
+ LoggingResponse* response, StatusCallback done);
+
WorkerEnv* env();
private:
diff --git a/tensorflow/core/distributed_runtime/session_mgr.cc b/tensorflow/core/distributed_runtime/session_mgr.cc
index 8db49e7f15..90664c3612 100644
--- a/tensorflow/core/distributed_runtime/session_mgr.cc
+++ b/tensorflow/core/distributed_runtime/session_mgr.cc
@@ -64,8 +64,13 @@ Status SessionMgr::CreateSession(const string& session,
TF_RETURN_IF_ERROR(worker_cache_factory_(server_def, &worker_cache));
}
+ if (worker_cache != nullptr & default_worker_cache_.get() != nullptr) {
+ worker_cache->SetLogging(this->is_logging_active_);
+ }
+
CHECK(!worker_env_->local_devices.empty())
<< "The WorkerEnv must have at least one device in `local_devices`.";
+
std::vector<Device*> renamed_devices;
for (Device* d : worker_env_->local_devices) {
renamed_devices.push_back(RenamedDevice::NewRenamedDevice(
@@ -113,4 +118,77 @@ std::shared_ptr<WorkerSession> SessionMgr::LegacySession() {
return legacy_session_;
}
+void SessionMgr::SetLogging(bool active) {
+ mutex_lock l(mu_);
+ this->is_logging_active_ = active;
+ // Legacy Session
+ if (legacy_session_) {
+ auto* worker_cache = legacy_session_->worker_cache.get();
+ if (worker_cache) {
+ worker_cache->SetLogging(active);
+ }
+ }
+
+ for (const auto& session_kv : sessions_) {
+ auto session = session_kv.second.get();
+ if (session) {
+ auto* worker_cache = session->worker_cache.get();
+ if (worker_cache) {
+ worker_cache->SetLogging(active);
+ }
+ }
+ }
+}
+
+void SessionMgr::RetrieveLogs(tensorflow::int64 step_id,
+ LoggingResponse* response) {
+ mutex_lock l(mu_);
+ // Legacy Session
+ if (legacy_session_) {
+ auto* worker_cache = legacy_session_->worker_cache.get();
+ if (worker_cache) {
+ auto step_stats = StepStats();
+ if (worker_cache->RetrieveLogs(step_id, &step_stats)) {
+ auto* labeled_step_stats = response->add_step();
+ labeled_step_stats->set_step_id(step_id);
+ labeled_step_stats->mutable_step_stats()->Swap(&step_stats);
+ }
+ }
+ }
+ for (const auto& session_kv : sessions_) {
+ auto session = session_kv.second.get();
+ if (session) {
+ auto* worker_cache = session->worker_cache.get();
+ if (worker_cache) {
+ auto step_stats = StepStats();
+ if (worker_cache->RetrieveLogs(step_id, &step_stats)) {
+ auto* labeled_step_stats = response->add_step();
+ labeled_step_stats->set_step_id(step_id);
+ labeled_step_stats->mutable_step_stats()->Swap(&step_stats);
+ }
+ }
+ }
+ }
+}
+
+void SessionMgr::ClearLogs() {
+ mutex_lock l(mu_);
+ // Legacy Session
+ if (legacy_session_) {
+ auto* worker_cache = legacy_session_->worker_cache.get();
+ if (worker_cache) {
+ worker_cache->ClearLogs();
+ }
+ }
+
+ for (const auto& session_kv : sessions_) {
+ auto session = session_kv.second.get();
+ if (session) {
+ auto* worker_cache = session->worker_cache.get();
+ if (worker_cache) {
+ worker_cache->ClearLogs();
+ }
+ }
+ }
+}
} // namespace tensorflow
diff --git a/tensorflow/core/distributed_runtime/session_mgr.h b/tensorflow/core/distributed_runtime/session_mgr.h
index 3ce260d12e..4c9702d522 100644
--- a/tensorflow/core/distributed_runtime/session_mgr.h
+++ b/tensorflow/core/distributed_runtime/session_mgr.h
@@ -22,6 +22,7 @@ limitations under the License.
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/platform/mutex.h"
#include "tensorflow/core/protobuf/tensorflow_server.pb.h"
+#include "tensorflow/core/protobuf/worker.pb.h"
namespace tensorflow {
@@ -56,6 +57,12 @@ class SessionMgr {
static string WorkerNameFromServerDef(const ServerDef& server_def);
+ void SetLogging(bool active);
+
+ void RetrieveLogs(tensorflow::int64 step_id, LoggingResponse* response);
+
+ void ClearLogs();
+
private:
const WorkerEnv* const worker_env_; // Not owned.
@@ -75,6 +82,8 @@ class SessionMgr {
std::unique_ptr<WorkerCacheInterface> default_worker_cache_;
std::shared_ptr<WorkerSession> legacy_session_;
+ bool is_logging_active_ = false;
+
const WorkerCacheFactory worker_cache_factory_;
std::shared_ptr<WorkerSession> WorkerSessionForSessionUnlocked(
diff --git a/tensorflow/core/framework/register_types.h b/tensorflow/core/framework/register_types.h
index e448a60f5e..e90596980f 100644
--- a/tensorflow/core/framework/register_types.h
+++ b/tensorflow/core/framework/register_types.h
@@ -53,7 +53,7 @@ limitations under the License.
*/
#if !defined(IS_MOBILE_PLATFORM) || defined(SUPPORT_SELECTIVE_REGISTRATION) || \
- defined(NVIDIA_TEGRA)
+ defined(ANDROID_TEGRA)
// All types are supported, so all macros are invoked.
//
diff --git a/tensorflow/core/framework/variant_op_registry.cc b/tensorflow/core/framework/variant_op_registry.cc
index 395329da3b..ee07db1aee 100644
--- a/tensorflow/core/framework/variant_op_registry.cc
+++ b/tensorflow/core/framework/variant_op_registry.cc
@@ -182,7 +182,7 @@ Status VariantDeviceCopy(
// Special casing UnaryOpFn per op and per device.
UnaryVariantOpRegistry::VariantUnaryOpFn* UnaryVariantOpRegistry::GetUnaryOpFn(
VariantUnaryOp op, StringPiece device, StringPiece type_name) {
- auto found = unary_op_fns.find(std::make_tuple(op, device, type_name));
+ auto found = unary_op_fns.find({op, device, type_name});
if (found == unary_op_fns.end()) return nullptr;
return &found->second;
}
@@ -195,12 +195,10 @@ void UnaryVariantOpRegistry::RegisterUnaryOpFn(
CHECK_EQ(existing, nullptr)
<< "Unary VariantUnaryOpFn for type_name: " << type_name
<< " already registered for device type: " << device;
- unary_op_fns.insert(
- std::pair<std::tuple<VariantUnaryOp, StringPiece, StringPiece>,
- VariantUnaryOpFn>(
- std::make_tuple(op, GetPersistentStringPiece(device),
- GetPersistentStringPiece(type_name)),
- unary_op_fn));
+ unary_op_fns.insert(std::pair<FuncTuple<VariantUnaryOp>, VariantUnaryOpFn>(
+ {op, GetPersistentStringPiece(device),
+ GetPersistentStringPiece(type_name)},
+ unary_op_fn));
}
namespace {
@@ -229,7 +227,7 @@ REGISTER_VARIANT_ZEROS_LIKE_TYPE(bool);
UnaryVariantOpRegistry::VariantBinaryOpFn*
UnaryVariantOpRegistry::GetBinaryOpFn(VariantBinaryOp op, StringPiece device,
StringPiece type_name) {
- auto found = binary_op_fns.find(std::make_tuple(op, device, type_name));
+ auto found = binary_op_fns.find({op, device, type_name});
if (found == binary_op_fns.end()) return nullptr;
return &found->second;
}
@@ -242,12 +240,10 @@ void UnaryVariantOpRegistry::RegisterBinaryOpFn(
CHECK_EQ(existing, nullptr)
<< "Unary VariantBinaryOpFn for type_name: " << type_name
<< " already registered for device type: " << device;
- binary_op_fns.insert(
- std::pair<std::tuple<VariantBinaryOp, StringPiece, StringPiece>,
- VariantBinaryOpFn>(
- std::make_tuple(op, GetPersistentStringPiece(device),
- GetPersistentStringPiece(type_name)),
- add_fn));
+ binary_op_fns.insert(std::pair<FuncTuple<VariantBinaryOp>, VariantBinaryOpFn>(
+ {op, GetPersistentStringPiece(device),
+ GetPersistentStringPiece(type_name)},
+ add_fn));
}
namespace {
diff --git a/tensorflow/core/framework/variant_op_registry.h b/tensorflow/core/framework/variant_op_registry.h
index 13f6908cae..e94100e994 100644
--- a/tensorflow/core/framework/variant_op_registry.h
+++ b/tensorflow/core/framework/variant_op_registry.h
@@ -166,6 +166,21 @@ class UnaryVariantOpRegistry {
device_copy_fns;
// Map std::tuple<Op, device, type_name> to function.
+
+ // this breaks by falling victim to "too perfect forwarding"
+ // see https://stackoverflow.com/questions/44475317/variadic-template-issue
+ // and references therein
+ template <typename Op>
+ struct FuncTuple {
+ FuncTuple(const Op& op, const StringPiece& dev, const StringPiece& tname)
+ : op_type_(op), device_(dev), typename_(tname){};
+ Op op_type_;
+ StringPiece device_, typename_;
+ };
+ // friend declaration for operator==
+ // needed for clang
+ template <typename Op>
+ friend bool operator==(const FuncTuple<Op>& l, const FuncTuple<Op>& r);
struct TupleHash {
template <typename Op>
std::size_t operator()(
@@ -176,18 +191,25 @@ class UnaryVariantOpRegistry {
ret = Hash64Combine(ret, sp_hasher_(std::get<2>(x)));
return ret;
}
+
+ template <typename Op>
+ std::size_t operator()(const FuncTuple<Op>& x) const {
+ // The hash of an enum is just its value as a std::size_t.
+ std::size_t ret = static_cast<std::size_t>(x.op_type_);
+ ret = Hash64Combine(ret, sp_hasher_(x.device_));
+ ret = Hash64Combine(ret, sp_hasher_(x.typename_));
+ return ret;
+ }
StringPieceHasher sp_hasher_;
};
- std::unordered_map<std::tuple<VariantUnaryOp, StringPiece, StringPiece>,
- VariantUnaryOpFn, TupleHash>
+ std::unordered_map<FuncTuple<VariantUnaryOp>, VariantUnaryOpFn, TupleHash>
unary_op_fns;
- std::unordered_map<std::tuple<VariantBinaryOp, StringPiece, StringPiece>,
- VariantBinaryOpFn, TupleHash>
+ std::unordered_map<FuncTuple<VariantBinaryOp>, VariantBinaryOpFn, TupleHash>
binary_op_fns;
// Find or insert a string into a persistent string storage
- // container; return the StringPiece pointing to the permanent
- // string location.
+ // container; return the StringPiece pointing to the permanent string
+ // location.
static StringPiece GetPersistentStringPiece(const string& str) {
const auto string_storage = PersistentStringStorage();
auto found = string_storage->find(str);
@@ -199,7 +221,12 @@ class UnaryVariantOpRegistry {
}
}
};
-
+template <typename Op>
+inline bool operator==(const UnaryVariantOpRegistry::FuncTuple<Op>& lhs,
+ const UnaryVariantOpRegistry::FuncTuple<Op>& rhs) {
+ return (lhs.op_type_ == rhs.op_type_) && (lhs.device_ == rhs.device_) &&
+ (lhs.typename_ == rhs.typename_);
+}
// Gets a TensorShape from a Tensor containing a scalar Variant.
// Returns an Internal error if the Variant does not have a registered shape
// function, or if it's a serialized Variant that cannot be decoded.
diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index 68c3136019..7d3be15299 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -42,7 +42,7 @@ limitations under the License.
namespace tensorflow {
-#ifndef INTEL_MKL_DNN
+#ifdef INTEL_MKL_ML
// This pass implements rewriting of graph to support following scenarios:
// (A) Merging nodes in the graph
@@ -2211,7 +2211,7 @@ Status MklLayoutRewritePass::Run(const GraphOptimizationPassOptions& options) {
return Status::OK();
}
-#else // INTEL_MKL_DNN
+#else // INTEL_MKL_ML
// This pass implements rewriting of graph to support following scenarios:
// (A) Merging nodes in the graph
@@ -2452,9 +2452,8 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
// NOTE: names are alphabetically sorted.
rinfo_.push_back({csinfo_.addn, mkl_op_registry::GetMklOpName(csinfo_.addn),
CopyAttrsAddN, AddNRewrite});
- /* rinfo_.push_back({csinfo_.add,
- mkl_op_registry::GetMklOpName(csinfo_.add),
- CopyAttrsDataType, AlwaysRewrite}); */
+ rinfo_.push_back({csinfo_.add, mkl_op_registry::GetMklOpName(csinfo_.add),
+ CopyAttrsDataType, AlwaysRewrite});
rinfo_.push_back({csinfo_.avg_pool,
mkl_op_registry::GetMklOpName(csinfo_.avg_pool),
CopyAttrsPooling, AlwaysRewrite});
@@ -2502,14 +2501,13 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
rinfo_.push_back({csinfo_.max_pool_grad,
mkl_op_registry::GetMklOpName(csinfo_.max_pool_grad),
CopyAttrsPooling, AlwaysRewrite});
- /*
+
rinfo_.push_back({csinfo_.maximum,
mkl_op_registry::GetMklOpName(csinfo_.maximum),
CopyAttrsDataType, AlwaysRewrite});
rinfo_.push_back({csinfo_.mul,
mkl_op_registry::GetMklOpName(csinfo_.mul),
CopyAttrsDataType, AlwaysRewrite});
- */
rinfo_.push_back({csinfo_.relu, mkl_op_registry::GetMklOpName(csinfo_.relu),
CopyAttrsDataType, AlwaysRewrite});
rinfo_.push_back({csinfo_.relu_grad,
@@ -2529,14 +2527,13 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
rinfo_.push_back({csinfo_.softmax,
mkl_op_registry::GetMklOpName(csinfo_.softmax),
CopyAttrsDataType, AlwaysRewrite});
- /*
+
rinfo_.push_back({csinfo_.squared_difference,
mkl_op_registry::GetMklOpName(csinfo_.squared_difference),
CopyAttrsDataType, AlwaysRewrite});
rinfo_.push_back({csinfo_.sub,
mkl_op_registry::GetMklOpName(csinfo_.sub),
CopyAttrsDataType, AlwaysRewrite});
- */
// Add info about which ops to add workspace edge to and the slots.
wsinfo_.push_back({csinfo_.lrn, csinfo_.lrn_grad, 0, 2, 1, 3});
@@ -4317,7 +4314,7 @@ Status MklLayoutRewritePass::Run(const GraphOptimizationPassOptions& options) {
return Status::OK();
}
-#endif // INTEL_MKL_DNN
+#endif // INTEL_MKL_ML
} // namespace tensorflow
#endif
diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc
index 320d5a48c7..5e2a465e22 100644
--- a/tensorflow/core/graph/mkl_layout_pass_test.cc
+++ b/tensorflow/core/graph/mkl_layout_pass_test.cc
@@ -38,7 +38,7 @@ limitations under the License.
namespace tensorflow {
-#ifndef INTEL_MKL_DNN
+#ifdef INTEL_MKL_ML
namespace {
@@ -1899,7 +1899,7 @@ BENCHMARK(BM_MklLayoutRewritePass)->Arg(1000)->Arg(10000);
} // namespace
-#else // INTEL_MKL_DNN
+#else // INTEL_MKL_ML
namespace {
@@ -3532,7 +3532,7 @@ BENCHMARK(BM_MklLayoutRewritePass)->Arg(1000)->Arg(10000);
} // namespace
-#endif // INTEL_MKL_DNN
+#endif // INTEL_MKL_ML
} // namespace tensorflow
diff --git a/tensorflow/core/graph/testlib.cc b/tensorflow/core/graph/testlib.cc
index d5b026eae3..0d88d1ff72 100644
--- a/tensorflow/core/graph/testlib.cc
+++ b/tensorflow/core/graph/testlib.cc
@@ -273,6 +273,16 @@ Node* Reverse(Graph* g, Node* tensor, Node* axis) {
return Binary(g, "ReverseV2", tensor, axis);
}
+Node* Roll(Graph* g, Node* input, Node* shift, Node* axis) {
+ Node* ret;
+ TF_CHECK_OK(NodeBuilder(g->NewName("n"), "Roll", g->op_registry())
+ .Input(input)
+ .Input(shift)
+ .Input(axis)
+ .Finalize(g, &ret));
+ return ret;
+}
+
Node* Error(Graph* g, Node* input, const string& errmsg) {
Node* ret;
TF_CHECK_OK(NodeBuilder(g->NewName("n"), "Error")
diff --git a/tensorflow/core/graph/testlib.h b/tensorflow/core/graph/testlib.h
index 06597778bb..eb9038d619 100644
--- a/tensorflow/core/graph/testlib.h
+++ b/tensorflow/core/graph/testlib.h
@@ -117,6 +117,10 @@ Node* RandomGamma(Graph* g, Node* shape, Node* alpha);
// Output dtype determined by lam.
Node* RandomPoisson(Graph* g, Node* shape, Node* lam);
+// Rolls tensor by an offset of <shift> along the corresponding
+// <axis> dimensions.
+Node* Roll(Graph* g, Node* input, Node* shift, Node* axis);
+
// Generates random parameters from the truncated standard normal distribution
// of the nput shape
Node* TruncatedNormal(Graph* g, Node* input, DataType dtype);
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index fd99409c9b..e7192ec42f 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -629,6 +629,7 @@ cc_library(
":transpose_op",
":unique_op",
":unpack_op",
+ ":unravel_index_op",
":where_op",
],
)
@@ -884,6 +885,12 @@ tf_kernel_library(
)
tf_kernel_library(
+ name = "unravel_index_op",
+ prefix = "unravel_index_op",
+ deps = ARRAY_DEPS,
+)
+
+tf_kernel_library(
name = "where_op",
srcs = ["where_op.cc"],
hdrs = ["where_op.h"],
@@ -2582,6 +2589,45 @@ tf_cc_tests(
],
)
+cc_library(
+ name = "manip",
+ deps = [
+ ":roll_op",
+ ],
+)
+
+MANIP_DEPS = [
+ "//tensorflow/core:framework",
+ "//tensorflow/core:lib",
+ "//tensorflow/core:manip_ops_op_lib",
+ "//third_party/eigen3",
+]
+
+tf_kernel_library(
+ name = "roll_op",
+ prefix = "roll_op",
+ deps = MANIP_DEPS,
+)
+
+tf_cc_test(
+ name = "roll_op_test",
+ size = "small",
+ srcs = ["roll_op_test.cc"],
+ deps = [
+ ":ops_testutil",
+ ":ops_util",
+ ":roll_op",
+ "//tensorflow/core:core_cpu",
+ "//tensorflow/core:core_cpu_internal",
+ "//tensorflow/core:framework",
+ "//tensorflow/core:lib",
+ "//tensorflow/core:protos_all_cc",
+ "//tensorflow/core:test",
+ "//tensorflow/core:test_main",
+ "//tensorflow/core:testlib",
+ ],
+)
+
MATH_DEPS = [
":bounds_check",
":fill_functor",
diff --git a/tensorflow/core/kernels/compare_and_bitpack_op.cc b/tensorflow/core/kernels/compare_and_bitpack_op.cc
index 9f626a274a..224fe534e3 100644
--- a/tensorflow/core/kernels/compare_and_bitpack_op.cc
+++ b/tensorflow/core/kernels/compare_and_bitpack_op.cc
@@ -110,7 +110,19 @@ struct ComputeShard<T,
typename TTypes<bool>::ConstMatrix input,
typename TTypes<uint8>::Matrix output, bool /*thresh*/, int64 start,
int64 limit) {
- // NOTE(ebrevdo): This assumes memory is little-endian.
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ for (int64 i = start; i < limit; ++i) {
+ uint8* out = output.data() + i;
+ const int64 block = *reinterpret_cast<const int64*>(input.data() + 8 * i);
+ *out = ((((block & (1LL << (7 * 8))) >> (7 * 8 - 7))) |
+ (((block & (1LL << (6 * 8))) >> (6 * 8 - 6))) |
+ (((block & (1LL << (5 * 8))) >> (5 * 8 - 5))) |
+ (((block & (1LL << (4 * 8))) >> (4 * 8 - 4))) |
+ (((block & (1LL << (3 * 8))) >> (3 * 8 - 3))) |
+ (((block & (1LL << (2 * 8))) >> (2 * 8 - 2))) |
+ (((block & (1LL << 8)) >> (1 * 8 - 1))) | (((block & (1LL)))));
+ }
+#else
for (int64 i = start; i < limit; ++i) {
uint8* out = output.data() + i;
const int64 block = *reinterpret_cast<const int64*>(input.data() + 8 * i);
@@ -123,6 +135,7 @@ struct ComputeShard<T,
(((block & (1LL << (2 * 8))) >> (2 * 8 - 5))) |
(((block & (1LL << 8)) >> (1 * 8 - 6))) | (((block & (1LL)) << 7)));
}
+#endif
}
};
diff --git a/tensorflow/core/kernels/decode_bmp_op.cc b/tensorflow/core/kernels/decode_bmp_op.cc
index c778278e8f..b7d120a617 100644
--- a/tensorflow/core/kernels/decode_bmp_op.cc
+++ b/tensorflow/core/kernels/decode_bmp_op.cc
@@ -39,6 +39,13 @@ class DecodeBmpOp : public OpKernel {
errors::InvalidArgument("channels must be 0, 1, 3 or 4, got ",
channels_));
}
+ inline int32 ByteSwapInt32ForBigEndian(int32 x) {
+#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+ return le32toh(x);
+#else
+ return x;
+#endif
+ }
void Compute(OpKernelContext* context) override {
const Tensor& contents = context->input(0);
@@ -56,14 +63,18 @@ class DecodeBmpOp : public OpKernel {
input.size(), " bytes"));
const uint8* img_bytes = reinterpret_cast<const uint8*>(input.data());
- const int32 header_size = internal::SubtleMustCopy(
+ int32 header_size_ = internal::SubtleMustCopy(
*(reinterpret_cast<const int32*>(img_bytes + 10)));
- const int32 width = internal::SubtleMustCopy(
+ const int32 header_size = ByteSwapInt32ForBigEndian(header_size_);
+ int32 width_ = internal::SubtleMustCopy(
*(reinterpret_cast<const int32*>(img_bytes + 18)));
- const int32 height = internal::SubtleMustCopy(
+ const int32 width = ByteSwapInt32ForBigEndian(width_);
+ int32 height_ = internal::SubtleMustCopy(
*(reinterpret_cast<const int32*>(img_bytes + 22)));
- const int32 bpp = internal::SubtleMustCopy(
+ const int32 height = ByteSwapInt32ForBigEndian(height_);
+ int32 bpp_ = internal::SubtleMustCopy(
*(reinterpret_cast<const int32*>(img_bytes + 28)));
+ const int32 bpp = ByteSwapInt32ForBigEndian(bpp_);
if (channels_) {
OP_REQUIRES(context, (channels_ == bpp / 8),
diff --git a/tensorflow/core/kernels/fractional_pool_common.h b/tensorflow/core/kernels/fractional_pool_common.h
index df0bbbfa06..2d7a230fc0 100644
--- a/tensorflow/core/kernels/fractional_pool_common.h
+++ b/tensorflow/core/kernels/fractional_pool_common.h
@@ -57,7 +57,7 @@ static inline void RandomShuffle(Iter first, Iter last, const Random& uniform) {
// * sum(generated_diff_pooling_sequence) = input_length
// * Let's define floor(input_length / output_length) = K, then
// K <= generated_diff_pooling_sequence[i] <= K+1
-// For example, when input_length = 10, output_length = 6, the followings are
+// For example, when input_length = 10, output_length = 6, the following are
// valid pooling sequence:
// * [1, 2, 2, 1, 2, 2]
// * [1, 1, 2, 2, 2, 2]
diff --git a/tensorflow/core/kernels/mkl_aggregate_ops.cc b/tensorflow/core/kernels/mkl_aggregate_ops.cc
index 89d37d2f87..b539b00009 100644
--- a/tensorflow/core/kernels/mkl_aggregate_ops.cc
+++ b/tensorflow/core/kernels/mkl_aggregate_ops.cc
@@ -28,7 +28,7 @@ limitations under the License.
#include "mkl_dnn_types.h"
#include "tensorflow/core/util/mkl_util.h"
-#ifdef INTEL_MKL_DNN
+#ifndef INTEL_MKL_ML
#include "mkldnn.hpp"
using mkldnn::stream;
using mkldnn::sum;
@@ -37,7 +37,7 @@ using mkldnn::sum;
namespace tensorflow {
typedef Eigen::ThreadPoolDevice CPUDevice;
-#ifndef INTEL_MKL_DNN
+#ifdef INTEL_MKL_ML
template <typename Device, typename T>
class MklAddNOp : public OpKernel {
@@ -285,7 +285,7 @@ class MklAddNOp : public OpKernel {
} MklAddNOpContext;
};
-#else // INTEL_MKL_DNN
+#else // INTEL_MKL_ML
template <typename Device, typename T>
class MklAddNOp : public OpKernel {
public:
@@ -317,8 +317,11 @@ class MklAddNOp : public OpKernel {
: src2_tensor.dims();
// if the shapes of two tensors are not same raise op error
TensorShape src1_shape, src2_shape;
- src1_shape = src1_tensor.shape();
- src2_shape = src2_tensor.shape();
+ src1_shape = input1_in_mkl_format ? src1_mkl_shape.GetTfShape()
+ : src1_tensor.shape();
+ src2_shape = input2_in_mkl_format ? src2_mkl_shape.GetTfShape()
+ : src2_tensor.shape();
+
if (!src1_shape.IsSameSize(src2_shape)) {
ctx->SetStatus(errors::InvalidArgument(
"Inputs to operation ", this->name(), " of type ",
diff --git a/tensorflow/core/kernels/mkl_avgpooling_op.cc b/tensorflow/core/kernels/mkl_avgpooling_op.cc
index a7c569ee05..d545d34fdf 100644
--- a/tensorflow/core/kernels/mkl_avgpooling_op.cc
+++ b/tensorflow/core/kernels/mkl_avgpooling_op.cc
@@ -24,7 +24,7 @@
#include "tensorflow/core/kernels/mkl_pooling_ops_common.h"
-#ifdef INTEL_MKL_DNN
+#ifndef INTEL_MKL_ML
#include "mkldnn.hpp"
using mkldnn::algorithm;
using mkldnn::engine;
@@ -40,8 +40,7 @@ namespace tensorflow {
typedef Eigen::ThreadPoolDevice CPUDevice;
-// For now, MKL-ML is default. So making MKL-DNN not a default choice.
-#ifndef INTEL_MKL_DNN
+#ifdef INTEL_MKL_ML
template <typename Device, typename T>
class MklAvgPoolingOp : public OpKernel {
@@ -429,7 +428,7 @@ class MklAvgPoolingGradOp : public OpKernel {
TensorFormat data_format_;
}; // MklAvgPoolingGradOp
-#else // INTEL_MKL_DNN is defined
+#else
template <typename Device, typename T>
class MklAvgPoolingOp : public MklPoolingForwardOpBase<T> {
@@ -466,6 +465,28 @@ class MklAvgPoolingOp : public MklPoolingForwardOpBase<T> {
memory::dims output_dims_mkl_order;
this->GetOutputDims(pool_params, &output_dims_mkl_order);
+ // If input is an empty tensor, allocate an empty output tensor and return
+ if (input_tensor.NumElements() == 0) {
+ MklDnnShape output_mkl_shape;
+ output_mkl_shape.SetMklTensor(false);
+ TensorShape output_tf_shape;
+ if (pool_params.data_format == TensorFormat::FORMAT_NCHW) {
+ output_tf_shape = MklDnnDimsToTFShape(output_dims_mkl_order);
+ } else {
+ memory::dims output_dims_NHWC_order;
+ output_dims_NHWC_order = {pool_params.tensor_in_batch,
+ static_cast<int>(pool_params.out_height),
+ static_cast<int>(pool_params.out_width),
+ pool_params.out_depth};
+ output_tf_shape = MklDnnDimsToTFShape(output_dims_NHWC_order);
+ }
+ const int kOutputIndex = 0;
+ AllocateOutputSetMklShape(context, kOutputIndex, &output_tensor,
+ output_tf_shape, output_mkl_shape);
+ CHECK_NOTNULL(output_tensor);
+ return;
+ }
+
// If input is in Mkl layout, then just get the memory format from it
// directly, instead of using input data_format to AvgPool.
if (dnn_shape_input.IsMklTensor()) {
@@ -678,7 +699,7 @@ class MklAvgPoolingGradOp : public MklPoolingBackwardOpBase<T> {
}
}; // MklAvgPoolingGradOp
-#endif // INTEL_MKL_DNN
+#endif // INTEL_MKL_ML
REGISTER_KERNEL_BUILDER(Name("_MklAvgPool")
.Device(DEVICE_CPU)
diff --git a/tensorflow/core/kernels/mkl_concat_op.cc b/tensorflow/core/kernels/mkl_concat_op.cc
index 7da63604d2..f1f267e849 100644
--- a/tensorflow/core/kernels/mkl_concat_op.cc
+++ b/tensorflow/core/kernels/mkl_concat_op.cc
@@ -30,7 +30,7 @@ limitations under the License.
#include "mkl_dnn_types.h"
#include "tensorflow/core/util/mkl_util.h"
-#ifdef INTEL_MKL_DNN
+#ifndef INTEL_MKL_ML
#include "mkldnn.hpp"
using mkldnn::concat;
@@ -62,7 +62,7 @@ class EigenConcatBaseOp : public OpKernel {
// we need to have empty Compute because Compute is pure virtual function.
void Compute(OpKernelContext* c) {}
-#ifndef INTEL_MKL_DNN
+#ifdef INTEL_MKL_ML
void Compute(OpKernelContext* c, const std::vector<Tensor>& values) {
const Tensor* concat_dim_tensor;
@@ -230,7 +230,7 @@ class EigenConcatBaseOp : public OpKernel {
#endif
};
-#ifndef INTEL_MKL_DNN
+#ifdef INTEL_MKL_ML
// --------------------------------------------------------------------------
// Mkl Concat Op
diff --git a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
index ef3f8cfec1..1401bc65a4 100644
--- a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
@@ -42,7 +42,7 @@ limitations under the License.
#include "mkl_dnn_types.h"
#include "tensorflow/core/util/mkl_util.h"
-#ifdef INTEL_MKL_DNN
+#ifndef INTEL_MKL_ML
#include "mkldnn.hpp"
using mkldnn::convolution_backward_weights;
@@ -55,7 +55,7 @@ namespace tensorflow {
typedef Eigen::ThreadPoolDevice CPUDevice;
-#ifndef INTEL_MKL_DNN
+#ifdef INTEL_MKL_ML
template <typename Device, class T>
class MklConv2DCustomBackpropFilterOp : public OpKernel {
@@ -655,7 +655,7 @@ class MklConv2DCustomBackpropFilterOp
TF_CALL_float(REGISTER_MKL_FILTER_KERNELS);
#undef REGISTER_MKL_FILTER_KERNELS
-#endif // INTEL_MKL_DNN
+#endif // INTEL_MKL_ML
} // namespace tensorflow
diff --git a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
index a6745489f4..eeed009531 100644
--- a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
@@ -44,7 +44,7 @@ limitations under the License.
#include "tensorflow/core/util/use_cudnn.h"
#include "tensorflow/core/util/work_sharder.h"
-#ifdef INTEL_MKL_DNN
+#ifndef INTEL_MKL_ML
#include "mkldnn.hpp"
using mkldnn::convolution_backward_data;
@@ -56,7 +56,7 @@ namespace tensorflow {
typedef Eigen::ThreadPoolDevice CPUDevice;
-#ifndef INTEL_MKL_DNN
+#ifdef INTEL_MKL_ML
template <typename Device, class T>
class MklConv2DCustomBackpropInputOp : public OpKernel {
@@ -493,7 +493,7 @@ class MklConv2DCustomBackpropInputOp
}
};
-#endif // INTEL_MKL_DNN
+#endif // INTEL_MKL_ML
#define REGISTER_MKL_CPU_KERNELS(T) \
REGISTER_KERNEL_BUILDER(Name("_MklConv2DBackpropInput") \
diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc
index e44fba754b..2953426d58 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_ops.cc
@@ -41,7 +41,8 @@ limitations under the License.
#include "tensorflow/core/util/mkl_util.h"
-#ifdef INTEL_MKL_DNN
+#ifndef INTEL_MKL_ML
+
#include "mkldnn.hpp"
using mkldnn::prop_kind;
@@ -58,8 +59,8 @@ namespace tensorflow {
typedef Eigen::ThreadPoolDevice CPUDevice;
-// For now, MKL-ML is default. So making MKL-DNN not a default choice.
-#ifndef INTEL_MKL_DNN
+// MKL-DNN is now default. MKL-ML must be specified explicitly.
+#ifdef INTEL_MKL_ML
template <typename Device, typename T, bool biasEnabled>
class MklConv2DOp : public OpKernel {
diff --git a/tensorflow/core/kernels/mkl_conv_ops.h b/tensorflow/core/kernels/mkl_conv_ops.h
index 8b65eaea0d..9dd88221a8 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.h
+++ b/tensorflow/core/kernels/mkl_conv_ops.h
@@ -40,7 +40,7 @@ limitations under the License.
#include "tensorflow/core/util/mkl_util.h"
-#ifdef INTEL_MKL_DNN
+#ifndef INTEL_MKL_ML
#include "mkldnn.hpp"
using mkldnn::prop_kind;
@@ -52,7 +52,7 @@ using mkldnn::convolution_forward;
namespace tensorflow {
-#ifdef INTEL_MKL_DNN
+#ifndef INTEL_MKL_ML
class MklDnnConvUtil {
protected:
@@ -553,7 +553,7 @@ class MklConv2DBackpropCommonOp : public OpKernel {
Padding padding_;
TensorFormat data_format_;
};
-#endif // INTEL_MKL_DNN
+#endif // INTEL_MKL_ML
/////////////////////////////////////////////////////////////////////
/// Dummy Mkl op that is just used for operators that are intermediate
diff --git a/tensorflow/core/kernels/mkl_cwise_ops_common.cc b/tensorflow/core/kernels/mkl_cwise_ops_common.cc
index c065724e0d..58f0c30f32 100644
--- a/tensorflow/core/kernels/mkl_cwise_ops_common.cc
+++ b/tensorflow/core/kernels/mkl_cwise_ops_common.cc
@@ -1,4 +1,4 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0(the "License");
you may not use this file except in compliance with the License.
diff --git a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc
index 0b6d838e09..8313224d7f 100644
--- a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc
+++ b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc
@@ -25,7 +25,7 @@ limitations under the License.
#include "mkl_dnn_types.h"
#include "tensorflow/core/util/mkl_util.h"
-#ifdef INTEL_MKL_DNN
+#ifndef INTEL_MKL_ML
#include "mkldnn.hpp"
using mkldnn::batch_normalization_backward;
@@ -41,7 +41,7 @@ using mkldnn::use_scale_shift;
namespace tensorflow {
using CPUDevice = Eigen::ThreadPoolDevice;
-#ifndef INTEL_MKL_DNN
+#ifdef INTEL_MKL_ML
template <typename Device, typename T>
class MklFusedBatchNormOp : public OpKernel {
@@ -683,7 +683,7 @@ class MklFusedBatchNormGradOp : public OpKernel {
};
#endif
-#ifdef INTEL_MKL_DNN
+#ifndef INTEL_MKL_ML
template <typename Device, typename T>
class MklFusedBatchNormOp : public OpKernel {
diff --git a/tensorflow/core/kernels/mkl_identity_op.cc b/tensorflow/core/kernels/mkl_identity_op.cc
index 9ee27ee21c..6c027f8e72 100644
--- a/tensorflow/core/kernels/mkl_identity_op.cc
+++ b/tensorflow/core/kernels/mkl_identity_op.cc
@@ -28,14 +28,14 @@ limitations under the License.
#include "mkl_dnn_types.h"
#include "tensorflow/core/util/mkl_util.h"
-#ifdef INTEL_MKL_DNN
+#ifndef INTEL_MKL_ML
#include "mkldnn.hpp"
#endif
namespace tensorflow {
typedef Eigen::ThreadPoolDevice CPUDevice;
-#ifndef INTEL_MKL_DNN
+#ifdef INTEL_MKL_ML
template <typename Device, typename T>
class MklIdentityOp : public OpKernel {
diff --git a/tensorflow/core/kernels/mkl_input_conversion_op.cc b/tensorflow/core/kernels/mkl_input_conversion_op.cc
index 73d41efce1..5a8799ae93 100644
--- a/tensorflow/core/kernels/mkl_input_conversion_op.cc
+++ b/tensorflow/core/kernels/mkl_input_conversion_op.cc
@@ -31,7 +31,7 @@ limitations under the License.
#include "tensorflow/core/kernels/mkl_tfconv_op.h"
#include "tensorflow/core/util/mkl_util.h"
-#ifdef INTEL_MKL_DNN
+#ifndef INTEL_MKL_ML
#include "mkldnn.hpp"
using mkldnn::stream;
@@ -59,7 +59,7 @@ typedef Eigen::ThreadPoolDevice CPUDevice;
// convert the TF format input to MKL format
///////////////////////////////////////////////////////////
-#ifndef INTEL_MKL_DNN
+#ifdef INTEL_MKL_ML
template <typename Device, typename T>
class MklInputConversionOp : public OpKernel {
public:
@@ -293,14 +293,58 @@ class MklInputConversionOp : public OpKernel {
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// If both inputs are in MKL format
if (input_shape_0.IsMklTensor() && input_shape_1.IsMklTensor()) {
- // If both have the same shape, pass them through
if (tf_shapes_are_same) {
- VLOG(1) << "MklInputConversionOp: No conversion needed, "
- << "copying MKL inputs with identical shapes to output";
-
- ForwardMklTensorInToOut(context, 0, 0);
- ForwardMklTensorInToOut(context, 1, 1);
- return;
+ auto input0_md = input_shape_0.GetMklLayout();
+ auto input1_md = input_shape_1.GetMklLayout();
+
+ // If both have the same shape and same format, pass them through
+ if (input0_md.data.format == input1_md.data.format) {
+ VLOG(1) << "MklInputConversionOp: No conversion needed, "
+ << "copying MKL inputs with identical shapes to output";
+
+ ForwardMklTensorInToOut(context, 0, 0);
+ ForwardMklTensorInToOut(context, 1, 1);
+ return;
+ } else {
+ VLOG(1) << "MklInputConversionOp: Shape is same, but format is "
+ "different, "
+ << "need to convert to same format";
+
+ // Convert input0, and keep input1 unchanged
+ // Create MklDnnShape for output mkl tensor based on input0
+ Tensor* tensor_out;
+ MklDnnShape mkl_output_mkl_shape;
+ mkl_output_mkl_shape.SetMklTensor(true);
+ mkl_output_mkl_shape.SetElemType(MklDnnType<T>());
+ mkl_output_mkl_shape.SetTfLayout(input_shape_0.GetDimension(),
+ input_shape_0.GetSizesAsMklDnnDims(),
+ input_shape_0.GetTfDataFormat());
+
+ // Get MKL layout from input1 as destination layout
+ mkl_output_mkl_shape.SetMklLayout(&input1_md);
+
+ // Create output Mkl tensor for index 0
+ AllocateOutputSetMklShape(context, 0, &tensor_out,
+ input_tensor_0.shape(),
+ mkl_output_mkl_shape);
+
+ // Create MklDnnData object for input0 tesnsor
+ auto cpu_engine = engine(engine::cpu, 0);
+ MklDnnData<T> input(&cpu_engine);
+ input.SetUsrMem(input0_md, &input_tensor_0);
+
+ // Create reorder from input0's layout to input1's layout
+ std::vector<primitive> net;
+ CHECK_EQ(input.CheckReorderToOpMem(
+ memory::primitive_desc(input1_md, cpu_engine),
+ tensor_out, &net),
+ true);
+ stream(stream::kind::eager).submit(net).wait();
+
+ // Input1 will be passed through
+ ForwardMklTensorInToOut(context, 1, 1);
+ return;
+ }
}
// Sanity check
diff --git a/tensorflow/core/kernels/mkl_lrn_op.cc b/tensorflow/core/kernels/mkl_lrn_op.cc
index a8b45004b7..5f0a12a1fb 100644
--- a/tensorflow/core/kernels/mkl_lrn_op.cc
+++ b/tensorflow/core/kernels/mkl_lrn_op.cc
@@ -38,7 +38,7 @@ limitations under the License.
#include "tensorflow/core/util/work_sharder.h"
#endif
-#ifdef INTEL_MKL_DNN
+#ifndef INTEL_MKL_ML
#include "mkldnn.hpp"
using mkldnn::lrn_across_channels;
using mkldnn::lrn_backward;
@@ -67,7 +67,7 @@ void GetBandMatrix(int depth, int depth_radius,
} // namespace
-#ifndef INTEL_MKL_DNN
+#ifdef INTEL_MKL_ML
template <typename T>
class MklLRNOp : public OpKernel {
@@ -1343,7 +1343,7 @@ class MklLRNGradOp : public OpKernel {
float beta_;
};
-#endif // INTEL_MKL_DNN
+#endif // INTEL_MKL_ML
#define REGISTER_MKL_LRN_CPU(T) \
REGISTER_KERNEL_BUILDER(Name("_MklLRN") \
diff --git a/tensorflow/core/kernels/mkl_maxpooling_op.cc b/tensorflow/core/kernels/mkl_maxpooling_op.cc
index 0de27ccd60..14607f26e0 100644
--- a/tensorflow/core/kernels/mkl_maxpooling_op.cc
+++ b/tensorflow/core/kernels/mkl_maxpooling_op.cc
@@ -22,7 +22,7 @@ limitations under the License.
#include "tensorflow/core/util/mkl_util.h"
#include "tensorflow/core/util/padding.h"
-#ifdef INTEL_MKL_DNN
+#ifndef INTEL_MKL_ML
#include <algorithm>
#include "mkldnn.hpp"
using mkldnn::algorithm;
@@ -39,8 +39,8 @@ namespace tensorflow {
typedef Eigen::ThreadPoolDevice CPUDevice;
-// For now, MKL-ML is default. So making MKL-DNN not a default choice.
-#ifndef INTEL_MKL_DNN
+// MKL-DNN is now default. MKL-ML must be specified explicitly.
+#ifdef INTEL_MKL_ML
// An implementation of MaxPooling (forward).
template <typename Device, typename T>
@@ -494,7 +494,7 @@ class MklMaxPoolingGradOp : public OpKernel {
bool workspace_enabled_;
}; // MklMaxPoolingGradOp
-#else // INTEL_MKL_DNN is defined
+#else
// An implementation of MaxPooling (forward).
template <typename Device, typename T>
@@ -793,7 +793,7 @@ class MklMaxPoolingGradOp : public MklPoolingBackwardOpBase<T> {
}
}; // MklMaxPoolingGradOp
-#endif // INTEL_MKL_DNN
+#endif // INTEL_MKL_ML
REGISTER_KERNEL_BUILDER(Name("_MklMaxPool")
.Device(DEVICE_CPU)
diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.cc b/tensorflow/core/kernels/mkl_pooling_ops_common.cc
index ef8597b057..5ef6ce2a57 100644
--- a/tensorflow/core/kernels/mkl_pooling_ops_common.cc
+++ b/tensorflow/core/kernels/mkl_pooling_ops_common.cc
@@ -42,7 +42,7 @@ void MklPoolParameters::Init(OpKernelContext* context,
Init(context, ksize, stride, padding, data_format);
}
-#ifndef INTEL_MKL_DNN
+#ifdef INTEL_MKL_ML
// Initialization for MKL format
void MklPoolParameters::Init(OpKernelContext* context,
const std::vector<int32>& ksize,
@@ -72,7 +72,7 @@ void MklPoolParameters::Init(OpKernelContext* context,
Init(context, ksize, stride, padding, data_format);
}
-#endif // INTEL_MKL_DNN
+#endif // INTEL_MKL_ML
// Common Initialization for TensorFlow and MKL formats
void MklPoolParameters::Init(OpKernelContext* context,
const std::vector<int32>& ksize,
@@ -107,7 +107,7 @@ void MklPoolParameters::Init(OpKernelContext* context,
OP_REQUIRES_OK(context, GetWindowedOutputSizeVerbose(
tensor_in_cols, window_cols, col_stride,
padding, &out_width, &pad_left, &pad_right));
-#ifdef INTEL_MKL_DNN
+#ifndef INTEL_MKL_ML
// TF can work with int64, but mkldnn only supports int32
// Fail if the height or width are greater than MAX_INT
diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.h b/tensorflow/core/kernels/mkl_pooling_ops_common.h
index 880e45ab1e..279167aba2 100644
--- a/tensorflow/core/kernels/mkl_pooling_ops_common.h
+++ b/tensorflow/core/kernels/mkl_pooling_ops_common.h
@@ -22,7 +22,7 @@ limitations under the License.
#include "tensorflow/core/util/mkl_util.h"
#include "tensorflow/core/util/padding.h"
-#ifdef INTEL_MKL_DNN
+#ifndef INTEL_MKL_ML
#include "mkldnn.hpp"
using mkldnn::memory;
using mkldnn::pooling_backward;
@@ -85,7 +85,7 @@ struct MklPoolParameters {
void Init(OpKernelContext* context, const std::vector<int32>& ksize,
const std::vector<int32>& stride, Padding padding,
TensorFormat data_format, const TensorShape& tensor_in_shape);
-#ifndef INTEL_MKL_DNN
+#ifdef INTEL_MKL_ML
void Init(OpKernelContext* context, const std::vector<int32>& ksize,
const std::vector<int32>& stride, Padding padding,
TensorFormat data_format, const MklShape* mkl_in_shape);
@@ -102,7 +102,7 @@ struct MklPoolParameters {
TensorFormat data_format);
};
-#ifdef INTEL_MKL_DNN
+#ifndef INTEL_MKL_ML
template <class T>
class MklPoolingOpBase : public OpKernel {
@@ -395,7 +395,7 @@ class MklPoolingBackwardOpBase : public MklPoolingOpBase<T> {
return grad_reorder_needed ? target_diff_dst_md : original_input_grad_md;
}
};
-#endif // INTEL_MKL_DNN
+#endif // INTEL_MKL_ML
//-------------------------------------------------------------------
// Utility functions
diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc
index 873aca30ca..51db3991e2 100644
--- a/tensorflow/core/kernels/mkl_relu_op.cc
+++ b/tensorflow/core/kernels/mkl_relu_op.cc
@@ -28,7 +28,7 @@ limitations under the License.
#include "tensorflow/core/platform/default/logging.h"
#include "tensorflow/core/util/mkl_util.h"
-#ifdef INTEL_MKL_DNN
+#ifndef INTEL_MKL_ML
#include "mkldnn.hpp"
using mkldnn::algorithm;
@@ -58,7 +58,7 @@ struct MklReluHelpers {
}
};
-#ifndef INTEL_MKL_DNN
+#ifdef INTEL_MKL_ML
template <typename Device, typename T>
class MklReluOp : public OpKernel {
@@ -368,7 +368,7 @@ void MklReluGradOp<Device, T>::Compute(OpKernelContext* context) {
mkl_context.MklCleanup();
}
-#else // INTEL_MKL_DNN
+#else // INTEL_MKL_ML
template <typename Device, typename T, algorithm alg_kind>
class MklReluOpBase : public OpKernel {
@@ -849,7 +849,7 @@ class MklTanhGradOp : public MklReluGradOpBase<Device, T, eltwise_tanh> {
MklReluGradOp<CPUDevice, type>);
TF_CALL_float(REGISTER_RELU_MKL_SUPPORTED_KERNELS_TYPES);
-#ifdef INTEL_MKL_DNN
+#ifndef INTEL_MKL_ML
// register dnn kernels for supported operations and supported types
#define REGISTER_ELU_MKL_SUPPORTED_KERNELS_TYPES(type) \
diff --git a/tensorflow/core/kernels/mkl_reshape_op.cc b/tensorflow/core/kernels/mkl_reshape_op.cc
index 7d471e1e4c..5dbc4a2709 100644
--- a/tensorflow/core/kernels/mkl_reshape_op.cc
+++ b/tensorflow/core/kernels/mkl_reshape_op.cc
@@ -28,7 +28,7 @@ limitations under the License.
#include "mkl_dnn_types.h"
#include "tensorflow/core/util/mkl_util.h"
-#ifdef INTEL_MKL_DNN
+#ifndef INTEL_MKL_ML
#include "mkldnn.hpp"
using mkldnn::stream;
#endif
@@ -40,7 +40,7 @@ class MklReshapeOp : public OpKernel {
public:
explicit MklReshapeOp(OpKernelConstruction* context) : OpKernel(context) {}
-#ifndef INTEL_MKL_DNN
+#ifdef INTEL_MKL_ML
void Compute(OpKernelContext* context) override {
const Tensor& input = MklGetInput(context, 0);
const Tensor& sizes = MklGetInput(context, 1);
@@ -312,7 +312,7 @@ class MklReshapeOp : public OpKernel {
}
}
-#endif // INTEL_MKL_DNN
+#endif // INTEL_MKL_ML
private:
const int kInputSlotIdx = 0;
diff --git a/tensorflow/core/kernels/mkl_softmax_op.cc b/tensorflow/core/kernels/mkl_softmax_op.cc
index c46eabdde1..aceef1e234 100644
--- a/tensorflow/core/kernels/mkl_softmax_op.cc
+++ b/tensorflow/core/kernels/mkl_softmax_op.cc
@@ -15,7 +15,7 @@ limitations under the License.
// See docs in ../ops/nn_ops.cc.
#ifdef INTEL_MKL
-#ifdef INTEL_MKL_DNN
+#ifndef INTEL_MKL_ML
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/core/framework/numeric_op.h"
@@ -156,5 +156,5 @@ TF_CALL_float(REGISTER_SOFTMAX_MKL_SUPPORTED_KERNELS_TYPES);
} // namespace tensorflow
-#endif // INTEL_MKL_DNN
+#endif // INTEL_MKL_ML
#endif // INTEL_MKL
diff --git a/tensorflow/core/kernels/mkl_tfconv_op.h b/tensorflow/core/kernels/mkl_tfconv_op.h
index c4d5a45d3c..5fafa14b5d 100644
--- a/tensorflow/core/kernels/mkl_tfconv_op.h
+++ b/tensorflow/core/kernels/mkl_tfconv_op.h
@@ -35,7 +35,7 @@ limitations under the License.
#include "mkl_dnn_types.h"
#include "tensorflow/core/util/mkl_util.h"
-#ifdef INTEL_MKL_DNN
+#ifndef INTEL_MKL_ML
using mkldnn::stream;
#endif
@@ -61,7 +61,7 @@ class MklToTfOp : public OpKernel {
VLOG(1) << "MKLToTFConversion complete successfully.";
}
-#ifdef INTEL_MKL_DNN
+#ifndef INTEL_MKL_ML
static void ConvertMklToTf(OpKernel* op_kernel, OpKernelContext* context,
string data_format_str, DataType op_data_type,
bool has_avx512f, uint input_number) {
diff --git a/tensorflow/core/kernels/roll_op.cc b/tensorflow/core/kernels/roll_op.cc
new file mode 100644
index 0000000000..bcbdbee058
--- /dev/null
+++ b/tensorflow/core/kernels/roll_op.cc
@@ -0,0 +1,334 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/register_types_traits.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/work_sharder.h"
+
+namespace tensorflow {
+
+#define EIGEN_USE_THREADS
+using CPUDevice = Eigen::ThreadPoolDevice;
+
+// dim_size - the size of each dimension
+// dim_range - the number of indices over in the flattened tensor
+// you need to skip in order to make it over from one side of a dimension
+// to the other. Used to make the shifts wrap around after a threshold.
+// threshold - the index for each dimension that the roll starts to wrap
+// back to the front
+template <typename T>
+void DoRoll(OpKernelContext* context, const int64 num_elements,
+ const int num_dims, const gtl::ArraySlice<int>& dim_size,
+ const T* input, T* output, const gtl::ArraySlice<int>& threshold,
+ const gtl::ArraySlice<int64>& dim_range) {
+ auto work = [input, output, num_dims, &dim_size, &threshold, &dim_range](
+ int64 start, int64 end) {
+ // array of indices for each dimension
+ gtl::InlinedVector<int, 4> indices(num_dims);
+ int offset = 0; // the shift along the flattened tensor for current element
+ // initialize indices and offset
+ for (int i = 0; i < num_dims; i++) {
+ // stride is the number of indices over in the flattened tensor
+ // you need to skip in order to make it over to an adjacent element
+ // along a dimension. dim_size[i] != 0 because we set it to max(dim, 1)
+ const int64 stride = dim_range[i] / dim_size[i];
+ const int shift = dim_size[i] - threshold[i];
+ const int indx = (start / stride) % dim_size[i];
+ indices[i] = indx;
+ // calculate dimension index after the shift
+ const int shifted_indx = (indx + shift) % dim_size[i];
+ offset += (shifted_indx - indx) * stride;
+ }
+
+ for (int64 i = start; i < end; i++) {
+ output[i + offset] = input[i];
+ // create next combination of indices
+ // while at it adjust offset if needed
+ for (int j = num_dims - 1; j >= 0; j--) {
+ const int indx = (indices[j] + 1) % dim_size[j];
+ indices[j] = indx;
+ if (indx != 0) {
+ if (indx == threshold[j]) { // we've reached the threshold
+ // dim_range[j] = threshold[j] + shift[j]
+ // offset = shift[j] + ... other offsets
+ // offset - dim_range[j] = -threshold[j] + ... other offsets
+ // thus we undo our previous offset as well as add a new offset of
+ // -threshold[j] in one operation
+ offset -= dim_range[j]; // now wraps around
+ }
+ break; // indx != 0 don't need to carry
+ } else if (threshold[j] != 0) { // if threshold is 0 shift is 0
+ offset += dim_range[j]; // indx became 0 so reverse wrap around
+ }
+ }
+ }
+ };
+ // Shard
+ auto worker_threads = context->device()->tensorflow_cpu_worker_threads();
+ // 15 - expiramentally determined with float and bool types
+ const int cost_per_element = 15 * sizeof(T); // rough esitmate
+ Shard(worker_threads->num_threads, worker_threads->workers, num_elements,
+ cost_per_element, std::move(work));
+}
+
+// dim_size - the size of each dimension
+// dim_range - the number of indices over in the flattened tensor
+// you need to skip in order to make it over from one side of a dimension
+// to the other. Used to make the shifts wrap around after a threshold.
+// threshold - the index for each dimension that the roll starts to wrap
+// back to the front
+// isd - inner shift dimension
+template <typename T>
+// Use memcpy to copy memory in groups when the data type supports memcpy
+void DoRollWithMemcpy(OpKernelContext* context, const int64 num_elements,
+ const int num_dims, const gtl::ArraySlice<int>& dim_size,
+ const T* input, T* output,
+ const gtl::ArraySlice<int>& threshold,
+ const gtl::ArraySlice<int64>& dim_range,
+ const int64 isd) {
+ auto work = [input, output, num_dims, &dim_size, &threshold, &dim_range, isd](
+ int64 start, int64 end) {
+ // the number of indices over in the flattened tensor you need to skip in
+ // order to make it over from one side of the isd to the other
+ const int64 isd_range = std::max<int>(dim_range[isd], 1);
+ // the distance along the flattend tensor to the next element in the isd
+ const int64 isd_stride = isd_range / std::max<int>(dim_size[isd], 1);
+
+ // start and end represent the i-th group currently so we will convert
+ // them into numbers representing the i-th elements.
+ // there are 2 groups per isd one for all elements before threshold[isd]
+ // and another for all elements after threshold[isd].
+ const int64 start_remainder = (start % 2) * threshold[isd] * isd_stride;
+ const int64 end_remainder = (end % 2) * threshold[isd] * isd_stride;
+ start = (start / 2) * isd_range + start_remainder;
+ end = (end / 2) * isd_range + end_remainder;
+
+ const T* in_ptr = &input[0];
+ T* out_ptr = &output[0];
+ in_ptr += start;
+ out_ptr += start;
+
+ // array of indices for each dimension
+ // indicies = [i, j, k, l, m, n]
+ gtl::InlinedVector<int, 4> indicies(num_dims);
+ // the offset needed to make all inner non-shifting dimensions become 0
+ int64 remainder_offset = 0;
+ // initialize indicies
+ for (int i = 0; i < num_dims; i++) {
+ // stride is the number of indices over in the flattened tensor
+ // you need to skip in order to make it over to an adjacent element
+ // along a dimension. dim_size[i] != 0 because we set it to max(dim, 1)
+ const int64 stride = dim_range[i] / dim_size[i];
+ const int shift = dim_size[i] - threshold[i];
+ const int indx = (start / stride) % dim_size[i];
+ indicies[i] = indx;
+ // calculate dimension index after the shift
+ int out_indx = (indx + shift) % dim_size[i];
+ if (i > isd) {
+ // trailing zeroes for indices after the inner shifted dimension
+ out_indx = 0;
+ remainder_offset += (out_indx - indx) * stride;
+ }
+ out_ptr += (out_indx - indx) * stride;
+ }
+ // set trailing zeroes for indices after the inner shifted dimension
+ for (int i = num_dims - 1; i > isd; i--) indicies[i] = 0;
+
+ // the number of indices in the isd dimension the next group will skip
+ // to make it to the next threshold or end point
+ int isd_indx_skip = 0;
+ // the size of the next group
+ int64 group_size = 0;
+ // initialize isd_indx_skip and group_size
+ if (indicies[isd] < threshold[isd]) {
+ isd_indx_skip = threshold[isd] - indicies[isd];
+ group_size = isd_indx_skip * isd_stride + remainder_offset;
+ } else {
+ isd_indx_skip = dim_size[isd] - indicies[isd];
+ group_size = isd_indx_skip * isd_stride + remainder_offset;
+ }
+
+ int64 i = start;
+ while (i < end) {
+ // copy group of elements
+ memcpy(out_ptr, in_ptr, group_size * sizeof(T));
+
+ // shift i and the pointers over to the next group position
+ i += group_size;
+ out_ptr += group_size;
+ in_ptr += group_size;
+
+ // produce next combination of indices and adjust the out_ptr position
+ // to fix the offset if necessary
+ // the isd (inner shift dim) should skip to next threshold or endpoint
+ // all dimensions to the left increment by 1 when a digit is carried
+ // all dimensions to the right remain set to 0
+ // +1 +1 +1 +isd_indx_skip
+ // indicies = [i, j, k, l, 0, 0]
+ // ^isd
+ for (int j = isd; j >= 0; j--) {
+ int inc = 1;
+ if (j == isd) inc = isd_indx_skip;
+ const int indx = (indicies[j] + inc) % dim_size[j];
+ indicies[j] = indx;
+ if (indx != 0) {
+ if (indx == threshold[j]) {
+ out_ptr -= dim_range[j]; // now wraps around
+ }
+ break; // indx != 0 don't need to carry
+ } else if (threshold[j] != 0) { // if threshold is 0 shift is 0
+ out_ptr += dim_range[j]; // indx became 0 so reverse wrap around
+ }
+ }
+
+ // set isd_indx_skip and group_size for next iteration
+ if (indicies[isd] < threshold[isd]) {
+ isd_indx_skip = threshold[isd] - indicies[isd];
+ group_size = isd_indx_skip * isd_stride;
+ } else {
+ isd_indx_skip = dim_size[isd] - indicies[isd];
+ group_size = isd_indx_skip * isd_stride;
+ }
+ }
+ };
+ // Shard
+ auto worker_threads = context->device()->tensorflow_cpu_worker_threads();
+ const int64 ave_group_size = dim_range[isd] / 2;
+ const int total_work = 2 * num_elements / std::max<int>(dim_range[isd], 1);
+ // 25000 - expiramentally determined with float and bool types
+ const int cost_per_group = 25000 * sizeof(T) * ave_group_size;
+ Shard(worker_threads->num_threads, worker_threads->workers, total_work,
+ cost_per_group, std::move(work));
+}
+
+template <typename Device, typename T, typename Tshift, typename Taxis>
+class RollOp : public OpKernel {
+ public:
+ explicit RollOp(OpKernelConstruction* context) : OpKernel(context) {}
+
+ void Compute(OpKernelContext* context) override {
+ // Grab the input tensor
+ const Tensor& input = context->input(0);
+ const Tensor& shift = context->input(1);
+ const Tensor& axis = context->input(2);
+
+ auto shift_flat = shift.flat<Tshift>();
+ auto axis_flat = axis.flat<Taxis>();
+
+ OP_REQUIRES(context, TensorShapeUtils::IsVectorOrHigher(input.shape()),
+ errors::InvalidArgument("input must be 1-D or higher"));
+ OP_REQUIRES(context, shift.shape().dims() <= 1,
+ errors::InvalidArgument(
+ "shift must be a scalar or a 1-D vector. Found: ",
+ shift.shape().DebugString()));
+ OP_REQUIRES(context, axis.shape().dims() <= 1,
+ errors::InvalidArgument(
+ "axis must be a scalar or a 1-D vector. Found: ",
+ axis.shape().DebugString()));
+ OP_REQUIRES(
+ context, shift.shape() == axis.shape(),
+ errors::InvalidArgument("shift and axis must have the same size"));
+ const int64 num_elements = input.NumElements();
+ const int num_shifts = static_cast<int>(shift_flat.size());
+ const int num_dims = input.dims();
+
+ // if there are any duplicate axes, shift_mod_sum will have the
+ // total modulo sum of shifts for each dimension
+ gtl::InlinedVector<int, 4> shift_mod_sum(num_dims, 0);
+ for (int i = 0; i < num_shifts; i++) {
+ const int axis = axis_flat(i);
+ OP_REQUIRES(context, axis < num_dims,
+ errors::InvalidArgument("axis ", axis, " is out of range"));
+ const int ds = std::max<int>(static_cast<int>(input.dim_size(axis)), 1);
+ const int sum = shift_mod_sum[axis] + static_cast<int>(shift_flat(i));
+ // modulo that works with negatives: ((x % y) + y) % y
+ shift_mod_sum[axis] = (sum % ds + ds) % ds;
+ }
+ // the size of each dimension
+ gtl::InlinedVector<int, 4> dim_size(num_dims);
+ // threshold[i] is the index that the roll starts to wrap back to the front
+ gtl::InlinedVector<int, 4> threshold(num_dims);
+ // dim_range is the number of indices over in the flattened tensor
+ // you need to skip in order to make it over from one side of a dimension
+ // to the other. Used to make the shifts wrap around after a threshold.
+ gtl::InlinedVector<int64, 4> dim_range(num_dims);
+ int64 dim_size_prod = 1; // dimension size product
+ // inner shift dimension (inner most shifted dimension)
+ int64 isd = 0;
+ for (int i = num_dims - 1; i >= 0; i--) {
+ if (isd == 0 && shift_mod_sum[i] != 0) isd = i;
+ const int ds = std::max<int>(static_cast<int>(input.dim_size(i)), 1);
+ dim_size[i] = ds;
+ threshold[i] = (ds - shift_mod_sum[i]) % ds;
+ dim_size_prod *= static_cast<int64>(input.dim_size(i));
+ dim_range[i] = dim_size_prod;
+ }
+
+ Tensor* output = NULL;
+ OP_REQUIRES_OK(context,
+ context->allocate_output(0, input.shape(), &output));
+ auto input_flat = input.flat<T>().data();
+ auto output_flat = output->flat<T>().data();
+
+ if (std::is_same<Device, CPUDevice>::value) {
+ if (DataTypeCanUseMemcpy(DataTypeToEnum<T>::v())) {
+ // V2 copies memory in groups instead of element by element
+ DoRollWithMemcpy<T>(context, num_elements, num_dims, dim_size,
+ input_flat, output_flat, threshold, dim_range, isd);
+ } else {
+ // incase memcpy does not work for current data type
+ DoRoll<T>(context, num_elements, num_dims, dim_size, input_flat,
+ output_flat, threshold, dim_range);
+ }
+ }
+ }
+};
+
+// Register the CPU kernels.
+#define REGISTER_CPU(type) \
+ REGISTER_KERNEL_BUILDER(Name("Roll") \
+ .Device(DEVICE_CPU) \
+ .TypeConstraint<type>("T") \
+ .TypeConstraint<int32>("Tshift") \
+ .TypeConstraint<int32>("Taxis"), \
+ RollOp<CPUDevice, type, int32, int32>) \
+ REGISTER_KERNEL_BUILDER(Name("Roll") \
+ .Device(DEVICE_CPU) \
+ .TypeConstraint<type>("T") \
+ .TypeConstraint<int64>("Tshift") \
+ .TypeConstraint<int32>("Taxis"), \
+ RollOp<CPUDevice, type, int64, int32>) \
+ REGISTER_KERNEL_BUILDER(Name("Roll") \
+ .Device(DEVICE_CPU) \
+ .TypeConstraint<type>("T") \
+ .TypeConstraint<int32>("Tshift") \
+ .TypeConstraint<int64>("Taxis"), \
+ RollOp<CPUDevice, type, int32, int64>) \
+ REGISTER_KERNEL_BUILDER(Name("Roll") \
+ .Device(DEVICE_CPU) \
+ .TypeConstraint<type>("T") \
+ .TypeConstraint<int64>("Tshift") \
+ .TypeConstraint<int64>("Taxis"), \
+ RollOp<CPUDevice, type, int64, int64>)
+
+TF_CALL_ALL_TYPES(REGISTER_CPU);
+#undef REGISTER_CPU
+} // namespace tensorflow
diff --git a/tensorflow/core/kernels/roll_op_test.cc b/tensorflow/core/kernels/roll_op_test.cc
new file mode 100644
index 0000000000..90b6f8d0f3
--- /dev/null
+++ b/tensorflow/core/kernels/roll_op_test.cc
@@ -0,0 +1,484 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <functional>
+#include <memory>
+
+#include "tensorflow/core/common_runtime/device.h"
+#include "tensorflow/core/common_runtime/device_factory.h"
+#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
+#include "tensorflow/core/framework/allocator.h"
+#include "tensorflow/core/framework/fake_input.h"
+#include "tensorflow/core/framework/node_def_builder.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/kernels/ops_testutil.h"
+#include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/test_benchmark.h"
+
+namespace tensorflow {
+namespace {
+
+class RollOpTest : public OpsTestBase {
+ protected:
+ void MakeOp(DataType data_type, DataType index_type) {
+ TF_ASSERT_OK(NodeDefBuilder("myop", "Roll")
+ .Input(FakeInput(data_type))
+ .Input(FakeInput(index_type))
+ .Input(FakeInput(index_type))
+ .Finalize(node_def()));
+ TF_ASSERT_OK(InitOp());
+ }
+};
+
+TEST_F(RollOpTest, ScalarIndices) {
+ MakeOp(DT_FLOAT, DT_INT32);
+
+ // Feed and run
+ AddInputFromArray<float>(TensorShape({5}), {0, 1, 2, 3, 4});
+ AddInputFromArray<int32>(TensorShape({}), {3});
+ AddInputFromArray<int32>(TensorShape({}), {0});
+ TF_ASSERT_OK(RunOpKernel());
+
+ // Check the output.
+ Tensor expected(allocator(), DT_FLOAT, TensorShape({5}));
+ test::FillValues<float>(&expected, {2, 3, 4, 0, 1});
+ test::ExpectTensorEqual<float>(expected, *GetOutput(0));
+}
+
+TEST_F(RollOpTest, ScalarIndices_NoMemcpy) {
+ MakeOp(DT_STRING, DT_INT32);
+
+ // Feed and run
+ AddInputFromArray<string>(TensorShape({5}), {"a", "b", "c", "d", "e"});
+ AddInputFromArray<int32>(TensorShape({}), {3});
+ AddInputFromArray<int32>(TensorShape({}), {0});
+ TF_ASSERT_OK(RunOpKernel());
+
+ // Check the output.
+ Tensor expected(allocator(), DT_STRING, TensorShape({5}));
+ test::FillValues<string>(&expected, {"c", "d", "e", "a", "b"});
+ test::ExpectTensorEqual<string>(expected, *GetOutput(0));
+}
+
+TEST_F(RollOpTest, ScalarIndices_Complex) {
+ MakeOp(DT_COMPLEX64, DT_INT32);
+
+ // Feed and run
+ AddInputFromArray<std::complex<float>>(
+ TensorShape({5}), {std::complex<float>(0, 10), std::complex<float>(1, 11),
+ std::complex<float>(2, 12), std::complex<float>(3, 13),
+ std::complex<float>(4, 14)});
+ AddInputFromArray<int32>(TensorShape({}), {3});
+ AddInputFromArray<int32>(TensorShape({}), {0});
+ TF_ASSERT_OK(RunOpKernel());
+
+ // Check the output.
+ Tensor expected(allocator(), DT_COMPLEX64, TensorShape({5}));
+ test::FillValues<std::complex<float>>(
+ &expected, {std::complex<float>(2, 12), std::complex<float>(3, 13),
+ std::complex<float>(4, 14), std::complex<float>(0, 10),
+ std::complex<float>(1, 11)});
+ test::ExpectTensorEqual<std::complex<float>>(expected, *GetOutput(0));
+}
+
+TEST_F(RollOpTest, Simple_TwoD32) {
+ MakeOp(DT_FLOAT, DT_INT32);
+
+ // Feed and run
+ AddInputFromArray<float>(TensorShape({3, 5}),
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14});
+ AddInputFromArray<int32>(TensorShape({2}), {2, -1});
+ AddInputFromArray<int32>(TensorShape({2}), {0, 1});
+ TF_ASSERT_OK(RunOpKernel());
+
+ // Check the output.
+ Tensor expected(allocator(), DT_FLOAT, TensorShape({3, 5}));
+ test::FillValues<float>(&expected,
+ {6, 7, 8, 9, 5, 11, 12, 13, 14, 10, 1, 2, 3, 4, 0});
+ test::ExpectTensorEqual<float>(expected, *GetOutput(0));
+}
+
+TEST_F(RollOpTest, Simple_TwoD32_NoMemcpy) {
+ MakeOp(DT_STRING, DT_INT32);
+
+ // Feed and run
+ AddInputFromArray<string>(TensorShape({3, 5}),
+ {"a", "b", "c", "d", "e", "f", "g", "h", "i", "j",
+ "k", "l", "m", "n", "o"});
+ AddInputFromArray<int32>(TensorShape({2}), {2, -1});
+ AddInputFromArray<int32>(TensorShape({2}), {0, 1});
+ TF_ASSERT_OK(RunOpKernel());
+
+ // Check the output.
+ Tensor expected(allocator(), DT_STRING, TensorShape({3, 5}));
+ test::FillValues<string>(&expected, {"g", "h", "i", "j", "f", "l", "m", "n",
+ "o", "k", "b", "c", "d", "e", "a"});
+ test::ExpectTensorEqual<string>(expected, *GetOutput(0));
+}
+
+TEST_F(RollOpTest, Simple_ThreeD32) {
+ MakeOp(DT_FLOAT, DT_INT32);
+
+ // Feed and run
+ AddInputFromArray<float>(TensorShape({2, 2, 3}),
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11});
+ AddInputFromArray<int32>(TensorShape({3}), {1, -1, -1});
+ AddInputFromArray<int32>(TensorShape({3}), {0, 1, 2});
+ TF_ASSERT_OK(RunOpKernel());
+
+ // Check the output.
+ Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 2, 3}));
+ test::FillValues<float>(&expected, {10, 11, 9, 7, 8, 6, 4, 5, 3, 1, 2, 0});
+ test::ExpectTensorEqual<float>(expected, *GetOutput(0));
+}
+
+TEST_F(RollOpTest, Simple_ThreeD32_NoMemcpy) {
+ MakeOp(DT_STRING, DT_INT32);
+
+ // Feed and run
+ AddInputFromArray<string>(
+ TensorShape({2, 2, 3}),
+ {"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"});
+ AddInputFromArray<int32>(TensorShape({3}), {1, -1, -1});
+ AddInputFromArray<int32>(TensorShape({3}), {0, 1, 2});
+ TF_ASSERT_OK(RunOpKernel());
+
+ // Check the output.
+ Tensor expected(allocator(), DT_STRING, TensorShape({2, 2, 3}));
+ test::FillValues<string>(
+ &expected, {"k", "l", "j", "h", "i", "g", "e", "f", "d", "b", "c", "a"});
+ test::ExpectTensorEqual<string>(expected, *GetOutput(0));
+}
+
+TEST_F(RollOpTest, Simple_TwoD64) {
+ MakeOp(DT_FLOAT, DT_INT64);
+
+ // Feed and run
+ AddInputFromArray<float>(TensorShape({5, 3}),
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14});
+ AddInputFromArray<int64>(TensorShape({2}), {-1, 4});
+ AddInputFromArray<int64>(TensorShape({2}), {0, 1});
+ TF_ASSERT_OK(RunOpKernel());
+
+ // Check the output.
+ Tensor expected(allocator(), DT_FLOAT, TensorShape({5, 3}));
+ test::FillValues<float>(&expected,
+ {5, 3, 4, 8, 6, 7, 11, 9, 10, 14, 12, 13, 2, 0, 1});
+ test::ExpectTensorEqual<float>(expected, *GetOutput(0));
+}
+
+TEST_F(RollOpTest, Simple_TwoD64_NoMemcpy) {
+ MakeOp(DT_STRING, DT_INT64);
+
+ // Feed and run
+ AddInputFromArray<string>(TensorShape({5, 3}),
+ {"a", "b", "c", "d", "e", "f", "g", "h", "i", "j",
+ "k", "l", "m", "n", "o"});
+ AddInputFromArray<int64>(TensorShape({2}), {-1, 4});
+ AddInputFromArray<int64>(TensorShape({2}), {0, 1});
+ TF_ASSERT_OK(RunOpKernel());
+
+ // Check the output.
+ Tensor expected(allocator(), DT_STRING, TensorShape({5, 3}));
+ test::FillValues<string>(&expected, {"f", "d", "e", "i", "g", "h", "l", "j",
+ "k", "o", "m", "n", "c", "a", "b"});
+ test::ExpectTensorEqual<string>(expected, *GetOutput(0));
+}
+
+TEST_F(RollOpTest, Simple_ThreeD64) {
+ MakeOp(DT_FLOAT, DT_INT64);
+
+ // Feed and run
+ AddInputFromArray<float>(TensorShape({4, 1, 3}),
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11});
+ AddInputFromArray<int64>(TensorShape({3}), {4, 3, 2});
+ AddInputFromArray<int64>(TensorShape({3}), {0, 1, 2});
+ TF_ASSERT_OK(RunOpKernel());
+
+ // Check the output.
+ Tensor expected(allocator(), DT_FLOAT, TensorShape({4, 1, 3}));
+ test::FillValues<float>(&expected, {1, 2, 0, 4, 5, 3, 7, 8, 6, 10, 11, 9});
+ test::ExpectTensorEqual<float>(expected, *GetOutput(0));
+}
+
+TEST_F(RollOpTest, Simple_ThreeD64_NoMemcpy) {
+ MakeOp(DT_STRING, DT_INT64);
+
+ // Feed and run
+ AddInputFromArray<string>(
+ TensorShape({4, 1, 3}),
+ {"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"});
+ AddInputFromArray<int64>(TensorShape({3}), {4, 3, 2});
+ AddInputFromArray<int64>(TensorShape({3}), {0, 1, 2});
+ TF_ASSERT_OK(RunOpKernel());
+
+ // Check the output.
+ Tensor expected(allocator(), DT_STRING, TensorShape({4, 1, 3}));
+ test::FillValues<string>(
+ &expected, {"b", "c", "a", "e", "f", "d", "h", "i", "g", "k", "l", "j"});
+ test::ExpectTensorEqual<string>(expected, *GetOutput(0));
+}
+
+TEST_F(RollOpTest, ZeroShift_ThreeD32) {
+ MakeOp(DT_FLOAT, DT_INT32);
+
+ // Feed and run
+ AddInputFromArray<float>(TensorShape({2, 2, 3}),
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11});
+ AddInputFromArray<int32>(TensorShape({3}), {0, 0, 0});
+ AddInputFromArray<int32>(TensorShape({3}), {0, 1, 2});
+ TF_ASSERT_OK(RunOpKernel());
+
+ // Check the output.
+ Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 2, 3}));
+ test::FillValues<float>(&expected, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11});
+ test::ExpectTensorEqual<float>(expected, *GetOutput(0));
+}
+
+TEST_F(RollOpTest, ZeroShift_ThreeD32_NoMemcpy) {
+ MakeOp(DT_STRING, DT_INT32);
+
+ // Feed and run
+ AddInputFromArray<string>(
+ TensorShape({2, 2, 3}),
+ {"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"});
+ AddInputFromArray<int32>(TensorShape({3}), {0, 0, 0});
+ AddInputFromArray<int32>(TensorShape({3}), {0, 1, 2});
+ TF_ASSERT_OK(RunOpKernel());
+
+ // Check the output.
+ Tensor expected(allocator(), DT_STRING, TensorShape({2, 2, 3}));
+ test::FillValues<string>(
+ &expected, {"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"});
+ test::ExpectTensorEqual<string>(expected, *GetOutput(0));
+}
+
+TEST_F(RollOpTest, ZeroSize_ThreeD32) {
+ MakeOp(DT_FLOAT, DT_INT32);
+
+ // Feed and run
+ AddInputFromArray<float>(TensorShape({5, 0, 0}), {});
+ AddInputFromArray<int32>(TensorShape({}), {1});
+ AddInputFromArray<int32>(TensorShape({}), {0});
+ TF_ASSERT_OK(RunOpKernel());
+
+ // Check the output.
+ Tensor expected(allocator(), DT_FLOAT, TensorShape({5, 0, 0}));
+ test::ExpectTensorEqual<float>(expected, *GetOutput(0));
+}
+
+TEST_F(RollOpTest, ZeroSize_ThreeD32_NoMemcpy) {
+ MakeOp(DT_STRING, DT_INT32);
+
+ // Feed and run
+ AddInputFromArray<string>(TensorShape({5, 0, 0}), {});
+ AddInputFromArray<int32>(TensorShape({}), {1});
+ AddInputFromArray<int32>(TensorShape({}), {0});
+ TF_ASSERT_OK(RunOpKernel());
+
+ // Check the output.
+ Tensor expected(allocator(), DT_STRING, TensorShape({5, 0, 0}));
+ test::ExpectTensorEqual<string>(expected, *GetOutput(0));
+}
+
+TEST_F(RollOpTest, OneSize_ThreeD32) {
+ MakeOp(DT_FLOAT, DT_INT32);
+
+ // Feed and run
+ AddInputFromArray<float>(TensorShape({1, 1, 1}), {5});
+ AddInputFromArray<int32>(TensorShape({}), {1});
+ AddInputFromArray<int32>(TensorShape({}), {0});
+ TF_ASSERT_OK(RunOpKernel());
+
+ // Check the output.
+ Tensor expected(allocator(), DT_FLOAT, TensorShape({1, 1, 1}));
+ test::FillValues<float>(&expected, {5});
+ test::ExpectTensorEqual<float>(expected, *GetOutput(0));
+}
+
+TEST_F(RollOpTest, OneSize_ThreeD32_NoMemcpy) {
+ MakeOp(DT_STRING, DT_INT32);
+
+ // Feed and run
+ AddInputFromArray<string>(TensorShape({1, 1, 1}), {"a"});
+ AddInputFromArray<int32>(TensorShape({}), {1});
+ AddInputFromArray<int32>(TensorShape({}), {0});
+ TF_ASSERT_OK(RunOpKernel());
+
+ // Check the output.
+ Tensor expected(allocator(), DT_STRING, TensorShape({1, 1, 1}));
+ test::FillValues<string>(&expected, {"a"});
+ test::ExpectTensorEqual<string>(expected, *GetOutput(0));
+}
+
+TEST_F(RollOpTest, MultiShifts_TwoD32) {
+ MakeOp(DT_FLOAT, DT_INT32);
+
+ // Feed and run
+ AddInputFromArray<float>(TensorShape({3, 5}),
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14});
+ AddInputFromArray<int32>(TensorShape({4}), {-2, 2, -1, 1});
+ AddInputFromArray<int32>(TensorShape({4}), {1, 0, 0, 1});
+ TF_ASSERT_OK(RunOpKernel());
+
+ // Check the output.
+ Tensor expected(allocator(), DT_FLOAT, TensorShape({3, 5}));
+ test::FillValues<float>(&expected,
+ {11, 12, 13, 14, 10, 1, 2, 3, 4, 0, 6, 7, 8, 9, 5});
+ test::ExpectTensorEqual<float>(expected, *GetOutput(0));
+}
+
+TEST_F(RollOpTest, MultiShifts_TwoD32_NoMemcpy) {
+ MakeOp(DT_STRING, DT_INT32);
+
+ // Feed and run
+ AddInputFromArray<string>(TensorShape({3, 5}),
+ {"a", "b", "c", "d", "e", "f", "g", "h", "i", "j",
+ "k", "l", "m", "n", "o"});
+ AddInputFromArray<int32>(TensorShape({4}), {-2, 2, -1, 1});
+ AddInputFromArray<int32>(TensorShape({4}), {1, 0, 0, 1});
+ TF_ASSERT_OK(RunOpKernel());
+
+ // Check the output.
+ Tensor expected(allocator(), DT_STRING, TensorShape({3, 5}));
+ test::FillValues<string>(&expected, {"l", "m", "n", "o", "k", "b", "c", "d",
+ "e", "a", "g", "h", "i", "j", "f"});
+ test::ExpectTensorEqual<string>(expected, *GetOutput(0));
+}
+
+TEST_F(RollOpTest, Error_InputMustBeVectorOrHigher) {
+ MakeOp(DT_FLOAT, DT_INT32);
+
+ // Feed and run
+ AddInputFromArray<float>(TensorShape({}), {7});
+ AddInputFromArray<int32>(TensorShape({}), {1});
+ AddInputFromArray<int32>(TensorShape({}), {0});
+ Status s = RunOpKernel();
+ EXPECT_TRUE(StringPiece(s.ToString()).contains("input must be 1-D or higher"))
+ << s;
+}
+
+TEST_F(RollOpTest, Error_AxisMustBeScalarOrVector) {
+ MakeOp(DT_FLOAT, DT_INT32);
+
+ // Feed and run
+ AddInputFromArray<float>(TensorShape({2, 2}), {1, 2, 3, 4});
+ AddInputFromArray<int32>(TensorShape({}), {1});
+ AddInputFromArray<int32>(TensorShape({1, 2}), {0, 1});
+ Status s = RunOpKernel();
+ EXPECT_TRUE(StringPiece(s.ToString())
+ .contains("axis must be a scalar or a 1-D vector"))
+ << s;
+}
+
+TEST_F(RollOpTest, Error_ShiftMustBeScalarOrVector) {
+ MakeOp(DT_FLOAT, DT_INT32);
+
+ // Feed and run
+ AddInputFromArray<float>(TensorShape({2, 2}), {1, 2, 3, 4});
+ AddInputFromArray<int32>(TensorShape({1, 2}), {0, 1});
+ AddInputFromArray<int32>(TensorShape({}), {1});
+ Status s = RunOpKernel();
+ EXPECT_TRUE(StringPiece(s.ToString())
+ .contains("shift must be a scalar or a 1-D vector"))
+ << s;
+}
+
+TEST_F(RollOpTest, Error_ShiftAndAxisMustBeSameSize) {
+ MakeOp(DT_FLOAT, DT_INT32);
+
+ // Feed and run
+ AddInputFromArray<float>(TensorShape({2, 2}), {1, 2, 3, 4});
+ AddInputFromArray<int32>(TensorShape({1}), {1});
+ AddInputFromArray<int32>(TensorShape({2}), {0, 1});
+ Status s = RunOpKernel();
+ EXPECT_TRUE(StringPiece(s.ToString())
+ .contains("shift and axis must have the same size"))
+ << s;
+}
+
+TEST_F(RollOpTest, Error_AxisOutOfRange) {
+ MakeOp(DT_FLOAT, DT_INT32);
+
+ // Feed and run
+ AddInputFromArray<float>(TensorShape({4}), {1, 2, 3, 4});
+ AddInputFromArray<int32>(TensorShape({}), {1});
+ AddInputFromArray<int32>(TensorShape({}), {1});
+ Status s = RunOpKernel();
+ EXPECT_TRUE(StringPiece(s.ToString()).contains("is out of range")) << s;
+}
+
+// isd - (inner shift dimension) The inner most dimension to be shifted.
+// All outer dimensions will also be shifted for testing.
+static Graph* RollGraph(const TensorShape& shape, int isd) {
+ Graph* g = new Graph(OpRegistry::Global());
+ Tensor input(DT_FLOAT, shape);
+ input.flat<float>().setRandom();
+ const int dims = static_cast<int>(input.dims());
+ Tensor shift(DT_INT32, TensorShape({dims}));
+ for (int i = 0; i < dims; i++) {
+ // shift the inner shift dimension and all outer dimensions
+ shift.flat<int32>()(i) = (i <= isd) ? 2 : 0;
+ }
+ Tensor axis(DT_INT32, TensorShape({dims}));
+ for (int i = 0; i < dims; i++) {
+ axis.flat<int32>()(i) = i;
+ }
+ test::graph::Roll(g, test::graph::Constant(g, input),
+ test::graph::Constant(g, shift),
+ test::graph::Constant(g, axis));
+ return g;
+}
+
+#define BM_ROLL_OUTER(DEVICE) \
+ static void BM_##DEVICE##_roll_outer(int iters, int rows, int columns) { \
+ TensorShape shape{rows, columns}; \
+ const int64 num_items = static_cast<int64>(iters) * shape.num_elements(); \
+ testing::ItemsProcessed(num_items); \
+ testing::BytesProcessed(num_items * sizeof(float)); \
+ testing::UseRealTime(); \
+ test::Benchmark(#DEVICE, RollGraph(shape, 0)).Run(iters); \
+ } \
+ BENCHMARK(BM_##DEVICE##_roll_outer) \
+ ->ArgPair(256, 256) \
+ ->ArgPair(512, 512) \
+ ->ArgPair(1024, 1024) \
+ ->ArgPair(2048, 2048)
+
+#define BM_ROLL_ALL(DEVICE) \
+ static void BM_##DEVICE##_roll_all(int iters, int rows, int columns) { \
+ TensorShape shape{rows, columns}; \
+ const int64 num_items = static_cast<int64>(iters) * shape.num_elements(); \
+ testing::ItemsProcessed(num_items); \
+ testing::BytesProcessed(num_items * sizeof(float)); \
+ testing::UseRealTime(); \
+ test::Benchmark(#DEVICE, RollGraph(shape, 1)).Run(iters); \
+ } \
+ BENCHMARK(BM_##DEVICE##_roll_all) \
+ ->ArgPair(256, 256) \
+ ->ArgPair(512, 512) \
+ ->ArgPair(1024, 1024) \
+ ->ArgPair(2048, 2048)
+
+BM_ROLL_OUTER(cpu);
+BM_ROLL_ALL(cpu);
+} // namespace
+} // namespace tensorflow
diff --git a/tensorflow/core/kernels/unravel_index_op.cc b/tensorflow/core/kernels/unravel_index_op.cc
new file mode 100644
index 0000000000..a61272675b
--- /dev/null
+++ b/tensorflow/core/kernels/unravel_index_op.cc
@@ -0,0 +1,122 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#define EIGEN_USE_THREADS
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/types.h"
+
+namespace tensorflow {
+
+namespace {
+template <typename T>
+struct mod_op {
+ const T operator()(const T& a, const T& b) const { return a % b; }
+};
+} // namespace
+
+typedef Eigen::ThreadPoolDevice CPUDevice;
+
+template <typename Tidx>
+class UnravelIndexOp : public OpKernel {
+ public:
+ explicit UnravelIndexOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
+
+ void Compute(OpKernelContext* ctx) override {
+ const Tensor& indices_tensor = ctx->input(0);
+ OP_REQUIRES(ctx,
+ TensorShapeUtils::IsVector(indices_tensor.shape()) ||
+ TensorShapeUtils::IsScalar(indices_tensor.shape()),
+ errors::InvalidArgument(
+ "The indices can only be scalar or vector, got \"",
+ indices_tensor.shape().DebugString(), "\""));
+
+ const Tensor& dims_tensor = ctx->input(1);
+ OP_REQUIRES(
+ ctx, TensorShapeUtils::IsVector(dims_tensor.shape()),
+ errors::InvalidArgument("The indices can only be 1-D, got \"",
+ dims_tensor.shape().DebugString(), "\""));
+
+ auto dims = dims_tensor.vec<Tidx>();
+
+ Eigen::array<bool, 1> reverse({true});
+
+ Tensor strides_tensor;
+ OP_REQUIRES_OK(ctx,
+ ctx->allocate_temp(DataTypeToEnum<Tidx>::value,
+ TensorShape({dims_tensor.NumElements()}),
+ &strides_tensor));
+
+ auto strides = strides_tensor.vec<Tidx>();
+ strides = dims.reverse(reverse)
+ .scan(0, Eigen::internal::ProdReducer<Tidx>(), false)
+ .reverse(reverse);
+
+ Tensor strides_shifted_tensor;
+ OP_REQUIRES_OK(ctx,
+ ctx->allocate_temp(DataTypeToEnum<Tidx>::value,
+ TensorShape({dims_tensor.NumElements()}),
+ &strides_shifted_tensor));
+
+ auto strides_shifted = strides_shifted_tensor.vec<Tidx>();
+ strides_shifted = dims.reverse(reverse)
+ .scan(0, Eigen::internal::ProdReducer<Tidx>(), true)
+ .reverse(reverse);
+
+ Tensor* output_tensor = nullptr;
+ if (TensorShapeUtils::IsScalar(indices_tensor.shape())) {
+ OP_REQUIRES_OK(
+ ctx, ctx->allocate_output(0, TensorShape({dims_tensor.NumElements()}),
+ &output_tensor));
+
+ auto output = output_tensor->vec<Tidx>();
+
+ output = output.constant(indices_tensor.scalar<Tidx>()());
+ output = output.binaryExpr(strides, mod_op<Tidx>()) / strides_shifted;
+ } else {
+ OP_REQUIRES_OK(
+ ctx, ctx->allocate_output(0,
+ TensorShape({dims_tensor.NumElements(),
+ indices_tensor.NumElements()}),
+ &output_tensor));
+
+ auto output = output_tensor->matrix<Tidx>();
+
+ Eigen::array<int64, 2> reshape{{dims_tensor.NumElements(), 1}};
+ Eigen::array<int64, 2> bcast({1, indices_tensor.NumElements()});
+ Eigen::array<int64, 2> indices_reshape{{1, indices_tensor.NumElements()}};
+ Eigen::array<int64, 2> indices_bcast({dims_tensor.NumElements(), 1});
+
+ output = indices_tensor.vec<Tidx>()
+ .reshape(indices_reshape)
+ .broadcast(indices_bcast);
+ output = output.binaryExpr(strides.reshape(reshape).broadcast(bcast),
+ mod_op<Tidx>()) /
+ strides_shifted.reshape(reshape).broadcast(bcast);
+ }
+ }
+};
+
+#define REGISTER_KERNEL(type) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("UnravelIndex").Device(DEVICE_CPU).TypeConstraint<type>("Tidx"), \
+ UnravelIndexOp<type>);
+TF_CALL_int32(REGISTER_KERNEL) TF_CALL_int64(REGISTER_KERNEL)
+#undef REGISTER_KERNEL
+
+} // namespace tensorflow
diff --git a/tensorflow/core/lib/io/random_inputstream.cc b/tensorflow/core/lib/io/random_inputstream.cc
index 8b8c1392a1..09336e79cd 100644
--- a/tensorflow/core/lib/io/random_inputstream.cc
+++ b/tensorflow/core/lib/io/random_inputstream.cc
@@ -57,6 +57,43 @@ Status RandomAccessInputStream::ReadNBytes(int64 bytes_to_read,
return Status::OK();
}
+// To limit memory usage, the default implementation of SkipNBytes() only reads
+// 8MB at a time.
+static constexpr int64 kMaxSkipSize = 8 * 1024 * 1024;
+
+Status RandomAccessInputStream::SkipNBytes(int64 bytes_to_skip) {
+ if (bytes_to_skip < 0) {
+ return errors::InvalidArgument("Can't skip a negative number of bytes");
+ }
+ std::unique_ptr<char[]> scratch(new char[kMaxSkipSize]);
+ // Try to read 1 bytes first, if we could complete the read then EOF is
+ // not reached yet and we could return.
+ if (bytes_to_skip > 0) {
+ StringPiece data;
+ Status s = file_->Read(pos_ + bytes_to_skip - 1, 1, &data, scratch.get());
+ if ((s.ok() || errors::IsOutOfRange(s)) && data.size() == 1) {
+ pos_ += bytes_to_skip;
+ return Status::OK();
+ }
+ }
+ // Read kDefaultSkipSize at a time till bytes_to_skip.
+ while (bytes_to_skip > 0) {
+ int64 bytes_to_read = std::min<int64>(kMaxSkipSize, bytes_to_skip);
+ StringPiece data;
+ Status s = file_->Read(pos_, bytes_to_read, &data, scratch.get());
+ if (s.ok() || errors::IsOutOfRange(s)) {
+ pos_ += data.size();
+ } else {
+ return s;
+ }
+ if (data.size() < bytes_to_read) {
+ return errors::OutOfRange("reached end of file");
+ }
+ bytes_to_skip -= bytes_to_read;
+ }
+ return Status::OK();
+}
+
int64 RandomAccessInputStream::Tell() const { return pos_; }
} // namespace io
diff --git a/tensorflow/core/lib/io/random_inputstream.h b/tensorflow/core/lib/io/random_inputstream.h
index 09ebe9ba49..bdbdbd71ff 100644
--- a/tensorflow/core/lib/io/random_inputstream.h
+++ b/tensorflow/core/lib/io/random_inputstream.h
@@ -34,6 +34,8 @@ class RandomAccessInputStream : public InputStreamInterface {
Status ReadNBytes(int64 bytes_to_read, string* result) override;
+ Status SkipNBytes(int64 bytes_to_skip) override;
+
int64 Tell() const override;
Status Seek(int64 position) {
diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index 87dfa77689..267ce88440 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -335,6 +335,13 @@ REGISTER_OP("Unpack")
return Status::OK();
});
+REGISTER_OP("UnravelIndex")
+ .Input("indices: Tidx")
+ .Input("dims: Tidx")
+ .Output("output: Tidx")
+ .Attr("Tidx: {int32, int64} = DT_INT32")
+ .SetShapeFn([](InferenceContext* c) { return Status::OK(); });
+
// --------------------------------------------------------------------------
// TODO(josh11b): Remove the >= 2 constraint, once we can rewrite the graph
// in the N == 1 case to remove the node.
diff --git a/tensorflow/core/ops/image_ops.cc b/tensorflow/core/ops/image_ops.cc
index ef2ac267cc..a62e2d782b 100644
--- a/tensorflow/core/ops/image_ops.cc
+++ b/tensorflow/core/ops/image_ops.cc
@@ -586,6 +586,17 @@ REGISTER_OP("NonMaxSuppression")
.Output("selected_indices: int32")
.Attr("iou_threshold: float = 0.5")
.SetShapeFn([](InferenceContext* c) {
+ // Get inputs and validate ranks.
+ ShapeHandle boxes;
+ TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &boxes));
+ ShapeHandle scores;
+ TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &scores));
+ ShapeHandle max_output_size;
+ TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &max_output_size));
+ // The boxes is a 2-D float Tensor of shape [num_boxes, 4].
+ DimensionHandle unused;
+ TF_RETURN_IF_ERROR(c->WithValue(c->Dim(boxes, 1), 4, &unused));
+
c->set_output(0, c->Vector(c->UnknownDim()));
return Status::OK();
});
@@ -597,6 +608,19 @@ REGISTER_OP("NonMaxSuppressionV2")
.Input("iou_threshold: float")
.Output("selected_indices: int32")
.SetShapeFn([](InferenceContext* c) {
+ // Get inputs and validate ranks.
+ ShapeHandle boxes;
+ TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &boxes));
+ ShapeHandle scores;
+ TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &scores));
+ ShapeHandle max_output_size;
+ TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &max_output_size));
+ ShapeHandle iou_threshold;
+ TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &iou_threshold));
+ // The boxes is a 2-D float Tensor of shape [num_boxes, 4].
+ DimensionHandle unused;
+ TF_RETURN_IF_ERROR(c->WithValue(c->Dim(boxes, 1), 4, &unused));
+
c->set_output(0, c->Vector(c->UnknownDim()));
return Status::OK();
});
diff --git a/tensorflow/core/ops/manip_ops.cc b/tensorflow/core/ops/manip_ops.cc
new file mode 100644
index 0000000000..95b4774fe6
--- /dev/null
+++ b/tensorflow/core/ops/manip_ops.cc
@@ -0,0 +1,33 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+namespace tensorflow {
+
+// --------------------------------------------------------------------------
+REGISTER_OP("Roll")
+ .Input("input: T")
+ .Input("shift: Tshift")
+ .Input("axis: Taxis")
+ .Output("output: T")
+ .Attr("T: type")
+ .Attr("Tshift: {int32,int64}")
+ .Attr("Taxis: {int32,int64}")
+ .SetShapeFn(shape_inference::UnchangedShape);
+
+} // namespace tensorflow
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index 62661fe4bd..67481fd202 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -1818,7 +1818,7 @@ REGISTER_OP("_MklMaxPool")
.Input("input: T")
.Input("mkl_input: uint8")
.Output("output: T")
-#ifndef INTEL_MKL_DNN
+#ifdef INTEL_MKL_ML
.Output("workspace: T")
#else
.Output("workspace: uint8")
@@ -1844,7 +1844,7 @@ REGISTER_OP("_MklMaxPoolGrad")
.Input("orig_input: T")
.Input("orig_output: T")
.Input("grad: T")
-#ifndef INTEL_MKL_DNN
+#ifdef INTEL_MKL_ML
.Input("workspace: T")
#else
.Input("workspace: uint8")
@@ -1916,7 +1916,7 @@ REGISTER_OP("_MklLRN")
.Input("input: T")
.Input("mkl_input: uint8")
.Output("output: T")
-#ifndef INTEL_MKL_DNN
+#ifdef INTEL_MKL_ML
.Output("workspace: T")
#else
.Output("workspace: uint8")
@@ -1944,7 +1944,7 @@ REGISTER_OP("_MklLRNGrad")
.Input("input_grads: T")
.Input("input_image: T")
.Input("output_image: T")
-#ifndef INTEL_MKL_DNN
+#ifdef INTEL_MKL_ML
.Input("workspace: T")
#else
.Input("workspace: uint8")
diff --git a/tensorflow/core/platform/cpu_feature_guard.cc b/tensorflow/core/platform/cpu_feature_guard.cc
index b0d7b3a67a..b570658158 100644
--- a/tensorflow/core/platform/cpu_feature_guard.cc
+++ b/tensorflow/core/platform/cpu_feature_guard.cc
@@ -97,14 +97,17 @@ std::once_flag g_cpu_feature_guard_warn_once_flag;
void InfoAboutUnusedCPUFeatures() {
std::call_once(g_cpu_feature_guard_warn_once_flag, [] {
string missing_instructions;
-#ifdef PLATFORM_WINDOWS
+#if defined(_MSC_VER) && !defined(__clang__)
+
#ifndef __AVX__
CheckIfFeatureUnused(CPUFeature::AVX, "AVX", missing_instructions);
#endif // __AVX__
#ifndef __AVX2__
CheckIfFeatureUnused(CPUFeature::AVX2, "AVX2", missing_instructions);
#endif // __AVX2__
-#else // ifdef platform windows
+
+#else // if defined(_MSC_VER) && !defined(__clang__)
+
#ifndef __SSE__
CheckIfFeatureUnused(CPUFeature::SSE, "SSE", missing_instructions);
#endif // __SSE__
@@ -132,7 +135,7 @@ void InfoAboutUnusedCPUFeatures() {
#ifndef __FMA__
CheckIfFeatureUnused(CPUFeature::FMA, "FMA", missing_instructions);
#endif // __FMA__
-#endif // else of ifdef platform windows
+#endif // else of if defined(_MSC_VER) && !defined(__clang__)
if (!missing_instructions.empty()) {
LOG(INFO) << "Your CPU supports instructions that this TensorFlow "
<< "binary was not compiled to use:" << missing_instructions;
diff --git a/tensorflow/core/platform/profile_utils/cpu_utils.h b/tensorflow/core/platform/profile_utils/cpu_utils.h
index 2da20bb1b8..7b580c8bf6 100644
--- a/tensorflow/core/platform/profile_utils/cpu_utils.h
+++ b/tensorflow/core/platform/profile_utils/cpu_utils.h
@@ -42,7 +42,7 @@ namespace profile_utils {
class CpuUtils {
public:
// Constant for invalid frequency.
- // This value is returned when the furequency is not obtained somehow.
+ // This value is returned when the frequency is not obtained somehow.
static constexpr int64 INVALID_FREQUENCY = -1;
static constexpr uint64 DUMMY_CYCLE_CLOCK = 1;
@@ -105,7 +105,7 @@ class CpuUtils {
static int64 GetCycleCounterFrequency();
#endif
- // Return micro secound per each clock
+ // Return micro second per each clock
// As this method caches the cpu frequency internally,
// the first call will incur overhead, but not subsequent calls.
static double GetMicroSecPerClock();
diff --git a/tensorflow/core/platform/s3/s3_file_system.cc b/tensorflow/core/platform/s3/s3_file_system.cc
index ebda3a2065..52bf0d4694 100644
--- a/tensorflow/core/platform/s3/s3_file_system.cc
+++ b/tensorflow/core/platform/s3/s3_file_system.cc
@@ -22,6 +22,7 @@ limitations under the License.
#include <aws/core/Aws.h>
#include <aws/core/config/AWSProfileConfigLoader.h>
#include <aws/core/utils/FileSystemUtils.h>
+#include <aws/core/utils/StringUtils.h>
#include <aws/core/utils/logging/AWSLogging.h>
#include <aws/core/utils/logging/LogSystemInterface.h>
#include <aws/s3/S3Client.h>
@@ -128,6 +129,15 @@ Aws::Client::ClientConfiguration& GetDefaultClientConfig() {
return cfg;
};
+void ShutdownClient(Aws::S3::S3Client* s3_client) {
+ if (s3_client != nullptr) {
+ delete s3_client;
+ Aws::SDKOptions options;
+ Aws::ShutdownAPI(options);
+ AWSLogSystem::ShutdownAWSLogging();
+ }
+}
+
Status ParseS3Path(const string& fname, bool empty_object_ok, string* bucket,
string* object) {
if (!bucket || !object) {
@@ -155,12 +165,12 @@ Status ParseS3Path(const string& fname, bool empty_object_ok, string* bucket,
class S3RandomAccessFile : public RandomAccessFile {
public:
- S3RandomAccessFile(const string& bucket, const string& object)
- : bucket_(bucket), object_(object) {}
+ S3RandomAccessFile(const string& bucket, const string& object,
+ std::shared_ptr<Aws::S3::S3Client> s3_client)
+ : bucket_(bucket), object_(object), s3_client_(s3_client) {}
Status Read(uint64 offset, size_t n, StringPiece* result,
char* scratch) const override {
- Aws::S3::S3Client s3Client(GetDefaultClientConfig());
Aws::S3::Model::GetObjectRequest getObjectRequest;
getObjectRequest.WithBucket(bucket_.c_str()).WithKey(object_.c_str());
string bytes = strings::StrCat("bytes=", offset, "-", offset + n - 1);
@@ -168,7 +178,7 @@ class S3RandomAccessFile : public RandomAccessFile {
getObjectRequest.SetResponseStreamFactory([]() {
return Aws::New<Aws::StringStream>(kS3FileSystemAllocationTag);
});
- auto getObjectOutcome = s3Client.GetObject(getObjectRequest);
+ auto getObjectOutcome = this->s3_client_->GetObject(getObjectRequest);
if (!getObjectOutcome.IsSuccess()) {
n = 0;
*result = StringPiece(scratch, n);
@@ -186,13 +196,16 @@ class S3RandomAccessFile : public RandomAccessFile {
private:
string bucket_;
string object_;
+ std::shared_ptr<Aws::S3::S3Client> s3_client_;
};
class S3WritableFile : public WritableFile {
public:
- S3WritableFile(const string& bucket, const string& object)
+ S3WritableFile(const string& bucket, const string& object,
+ std::shared_ptr<Aws::S3::S3Client> s3_client)
: bucket_(bucket),
object_(object),
+ s3_client_(s3_client),
sync_needed_(true),
outfile_(Aws::MakeShared<Aws::Utils::TempFile>(
kS3FileSystemAllocationTag, "/tmp/s3_filesystem_XXXXXX",
@@ -231,17 +244,13 @@ class S3WritableFile : public WritableFile {
if (!sync_needed_) {
return Status::OK();
}
- Aws::Client::ClientConfiguration clientConfig = GetDefaultClientConfig();
- clientConfig.connectTimeoutMs = 300000;
- clientConfig.requestTimeoutMs = 600000;
- Aws::S3::S3Client s3Client(clientConfig);
Aws::S3::Model::PutObjectRequest putObjectRequest;
putObjectRequest.WithBucket(bucket_.c_str()).WithKey(object_.c_str());
long offset = outfile_->tellp();
outfile_->seekg(0);
putObjectRequest.SetBody(outfile_);
putObjectRequest.SetContentLength(offset);
- auto putObjectOutcome = s3Client.PutObject(putObjectRequest);
+ auto putObjectOutcome = this->s3_client_->PutObject(putObjectRequest);
outfile_->clear();
outfile_->seekp(offset);
if (!putObjectOutcome.IsSuccess()) {
@@ -256,6 +265,7 @@ class S3WritableFile : public WritableFile {
private:
string bucket_;
string object_;
+ std::shared_ptr<Aws::S3::S3Client> s3_client_;
bool sync_needed_;
std::shared_ptr<Aws::Utils::TempFile> outfile_;
};
@@ -274,31 +284,46 @@ class S3ReadOnlyMemoryRegion : public ReadOnlyMemoryRegion {
} // namespace
-S3FileSystem::S3FileSystem() {
- AWSLogSystem::InitializeAWSLogging();
-
- Aws::SDKOptions options;
- options.cryptoOptions.sha256Factory_create_fn = []() {
- return Aws::MakeShared<S3SHA256Factory>(S3CryptoAllocationTag);
- };
- options.cryptoOptions.sha256HMACFactory_create_fn = []() {
- return Aws::MakeShared<S3SHA256HmacFactory>(S3CryptoAllocationTag);
- };
- Aws::InitAPI(options);
-}
-
-S3FileSystem::~S3FileSystem() {
- Aws::SDKOptions options;
- Aws::ShutdownAPI(options);
+S3FileSystem::S3FileSystem()
+ : s3_client_(nullptr, ShutdownClient), client_lock_() {}
+
+S3FileSystem::~S3FileSystem() {}
+
+// Initializes s3_client_, if needed, and returns it.
+std::shared_ptr<Aws::S3::S3Client> S3FileSystem::GetS3Client() {
+ std::lock_guard<mutex> lock(this->client_lock_);
+
+ if (this->s3_client_.get() == nullptr) {
+ AWSLogSystem::InitializeAWSLogging();
+
+ Aws::SDKOptions options;
+ options.cryptoOptions.sha256Factory_create_fn = []() {
+ return Aws::MakeShared<S3SHA256Factory>(S3CryptoAllocationTag);
+ };
+ options.cryptoOptions.sha256HMACFactory_create_fn = []() {
+ return Aws::MakeShared<S3SHA256HmacFactory>(S3CryptoAllocationTag);
+ };
+ Aws::InitAPI(options);
+
+ // The creation of S3Client disables virtual addressing:
+ // S3Client(clientConfiguration, signPayloads, useVirtualAdressing = true)
+ // The purpose is to address the issue encountered when there is an `.`
+ // in the bucket name. Due to TLS hostname validation or DNS rules,
+ // the bucket may not be resolved. Disabling of virtual addressing
+ // should address the issue. See GitHub issue 16397 for details.
+ this->s3_client_ = std::shared_ptr<Aws::S3::S3Client>(new Aws::S3::S3Client(
+ GetDefaultClientConfig(),
+ Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, false));
+ }
- AWSLogSystem::ShutdownAWSLogging();
+ return this->s3_client_;
}
Status S3FileSystem::NewRandomAccessFile(
const string& fname, std::unique_ptr<RandomAccessFile>* result) {
string bucket, object;
TF_RETURN_IF_ERROR(ParseS3Path(fname, false, &bucket, &object));
- result->reset(new S3RandomAccessFile(bucket, object));
+ result->reset(new S3RandomAccessFile(bucket, object, this->GetS3Client()));
return Status::OK();
}
@@ -306,7 +331,7 @@ Status S3FileSystem::NewWritableFile(const string& fname,
std::unique_ptr<WritableFile>* result) {
string bucket, object;
TF_RETURN_IF_ERROR(ParseS3Path(fname, false, &bucket, &object));
- result->reset(new S3WritableFile(bucket, object));
+ result->reset(new S3WritableFile(bucket, object, this->GetS3Client()));
return Status::OK();
}
@@ -321,7 +346,7 @@ Status S3FileSystem::NewAppendableFile(const string& fname,
string bucket, object;
TF_RETURN_IF_ERROR(ParseS3Path(fname, false, &bucket, &object));
- result->reset(new S3WritableFile(bucket, object));
+ result->reset(new S3WritableFile(bucket, object, this->GetS3Client()));
while (true) {
status = reader->Read(offset, kS3ReadAppendableFileBufferSize, &read_chunk,
@@ -372,7 +397,6 @@ Status S3FileSystem::GetChildren(const string& dir,
prefix.push_back('/');
}
- Aws::S3::S3Client s3Client(GetDefaultClientConfig());
Aws::S3::Model::ListObjectsRequest listObjectsRequest;
listObjectsRequest.WithBucket(bucket.c_str())
.WithPrefix(prefix.c_str())
@@ -383,7 +407,8 @@ Status S3FileSystem::GetChildren(const string& dir,
Aws::S3::Model::ListObjectsResult listObjectsResult;
do {
- auto listObjectsOutcome = s3Client.ListObjects(listObjectsRequest);
+ auto listObjectsOutcome =
+ this->GetS3Client()->ListObjects(listObjectsRequest);
if (!listObjectsOutcome.IsSuccess()) {
string error = strings::StrCat(
listObjectsOutcome.GetError().GetExceptionName().c_str(), ": ",
@@ -417,11 +442,10 @@ Status S3FileSystem::Stat(const string& fname, FileStatistics* stats) {
string bucket, object;
TF_RETURN_IF_ERROR(ParseS3Path(fname, true, &bucket, &object));
- Aws::S3::S3Client s3Client(GetDefaultClientConfig());
if (object.empty()) {
Aws::S3::Model::HeadBucketRequest headBucketRequest;
headBucketRequest.WithBucket(bucket.c_str());
- auto headBucketOutcome = s3Client.HeadBucket(headBucketRequest);
+ auto headBucketOutcome = this->GetS3Client()->HeadBucket(headBucketRequest);
if (!headBucketOutcome.IsSuccess()) {
string error = strings::StrCat(
headBucketOutcome.GetError().GetExceptionName().c_str(), ": ",
@@ -439,7 +463,7 @@ Status S3FileSystem::Stat(const string& fname, FileStatistics* stats) {
headObjectRequest.WithBucket(bucket.c_str()).WithKey(object.c_str());
headObjectRequest.SetResponseStreamFactory(
[]() { return Aws::New<Aws::StringStream>(kS3FileSystemAllocationTag); });
- auto headObjectOutcome = s3Client.HeadObject(headObjectRequest);
+ auto headObjectOutcome = this->GetS3Client()->HeadObject(headObjectRequest);
if (headObjectOutcome.IsSuccess()) {
stats->length = headObjectOutcome.GetResult().GetContentLength();
stats->is_directory = 0;
@@ -457,7 +481,8 @@ Status S3FileSystem::Stat(const string& fname, FileStatistics* stats) {
.WithMaxKeys(1);
listObjectsRequest.SetResponseStreamFactory(
[]() { return Aws::New<Aws::StringStream>(kS3FileSystemAllocationTag); });
- auto listObjectsOutcome = s3Client.ListObjects(listObjectsRequest);
+ auto listObjectsOutcome =
+ this->GetS3Client()->ListObjects(listObjectsRequest);
if (listObjectsOutcome.IsSuccess()) {
if (listObjectsOutcome.GetResult().GetContents().size() > 0) {
stats->length = 0;
@@ -475,11 +500,11 @@ Status S3FileSystem::DeleteFile(const string& fname) {
string bucket, object;
TF_RETURN_IF_ERROR(ParseS3Path(fname, false, &bucket, &object));
- Aws::S3::S3Client s3Client(GetDefaultClientConfig());
Aws::S3::Model::DeleteObjectRequest deleteObjectRequest;
deleteObjectRequest.WithBucket(bucket.c_str()).WithKey(object.c_str());
- auto deleteObjectOutcome = s3Client.DeleteObject(deleteObjectRequest);
+ auto deleteObjectOutcome =
+ this->GetS3Client()->DeleteObject(deleteObjectRequest);
if (!deleteObjectOutcome.IsSuccess()) {
string error = strings::StrCat(
deleteObjectOutcome.GetError().GetExceptionName().c_str(), ": ",
@@ -494,10 +519,9 @@ Status S3FileSystem::CreateDir(const string& dirname) {
TF_RETURN_IF_ERROR(ParseS3Path(dirname, true, &bucket, &object));
if (object.empty()) {
- Aws::S3::S3Client s3Client(GetDefaultClientConfig());
Aws::S3::Model::HeadBucketRequest headBucketRequest;
headBucketRequest.WithBucket(bucket.c_str());
- auto headBucketOutcome = s3Client.HeadBucket(headBucketRequest);
+ auto headBucketOutcome = this->GetS3Client()->HeadBucket(headBucketRequest);
if (!headBucketOutcome.IsSuccess()) {
return errors::NotFound("The bucket ", bucket, " was not found.");
}
@@ -517,7 +541,6 @@ Status S3FileSystem::DeleteDir(const string& dirname) {
string bucket, object;
TF_RETURN_IF_ERROR(ParseS3Path(dirname, false, &bucket, &object));
- Aws::S3::S3Client s3Client(GetDefaultClientConfig());
string prefix = object;
if (prefix.back() != '/') {
prefix.push_back('/');
@@ -528,7 +551,8 @@ Status S3FileSystem::DeleteDir(const string& dirname) {
.WithMaxKeys(2);
listObjectsRequest.SetResponseStreamFactory(
[]() { return Aws::New<Aws::StringStream>(kS3FileSystemAllocationTag); });
- auto listObjectsOutcome = s3Client.ListObjects(listObjectsRequest);
+ auto listObjectsOutcome =
+ this->GetS3Client()->ListObjects(listObjectsRequest);
if (listObjectsOutcome.IsSuccess()) {
auto contents = listObjectsOutcome.GetResult().GetContents();
if (contents.size() > 1 ||
@@ -568,8 +592,6 @@ Status S3FileSystem::RenameFile(const string& src, const string& target) {
}
}
- Aws::S3::S3Client s3Client(GetDefaultClientConfig());
-
Aws::S3::Model::CopyObjectRequest copyObjectRequest;
Aws::S3::Model::DeleteObjectRequest deleteObjectRequest;
@@ -582,7 +604,8 @@ Status S3FileSystem::RenameFile(const string& src, const string& target) {
Aws::S3::Model::ListObjectsResult listObjectsResult;
do {
- auto listObjectsOutcome = s3Client.ListObjects(listObjectsRequest);
+ auto listObjectsOutcome =
+ this->GetS3Client()->ListObjects(listObjectsRequest);
if (!listObjectsOutcome.IsSuccess()) {
string error = strings::StrCat(
listObjectsOutcome.GetError().GetExceptionName().c_str(), ": ",
@@ -595,13 +618,15 @@ Status S3FileSystem::RenameFile(const string& src, const string& target) {
Aws::String src_key = object.GetKey();
Aws::String target_key = src_key;
target_key.replace(0, src_object.length(), target_object.c_str());
- Aws::String source = Aws::String(src_bucket.c_str()) + "/" + src_key;
+ Aws::String source = Aws::String(src_bucket.c_str()) + "/" +
+ Aws::Utils::StringUtils::URLEncode(src_key.c_str());
copyObjectRequest.SetBucket(target_bucket.c_str());
copyObjectRequest.SetKey(target_key);
copyObjectRequest.SetCopySource(source);
- auto copyObjectOutcome = s3Client.CopyObject(copyObjectRequest);
+ auto copyObjectOutcome =
+ this->GetS3Client()->CopyObject(copyObjectRequest);
if (!copyObjectOutcome.IsSuccess()) {
string error = strings::StrCat(
copyObjectOutcome.GetError().GetExceptionName().c_str(), ": ",
@@ -612,7 +637,8 @@ Status S3FileSystem::RenameFile(const string& src, const string& target) {
deleteObjectRequest.SetBucket(src_bucket.c_str());
deleteObjectRequest.SetKey(src_key.c_str());
- auto deleteObjectOutcome = s3Client.DeleteObject(deleteObjectRequest);
+ auto deleteObjectOutcome =
+ this->GetS3Client()->DeleteObject(deleteObjectRequest);
if (!deleteObjectOutcome.IsSuccess()) {
string error = strings::StrCat(
deleteObjectOutcome.GetError().GetExceptionName().c_str(), ": ",
diff --git a/tensorflow/core/platform/s3/s3_file_system.h b/tensorflow/core/platform/s3/s3_file_system.h
index 31ba3cecc5..31264be621 100644
--- a/tensorflow/core/platform/s3/s3_file_system.h
+++ b/tensorflow/core/platform/s3/s3_file_system.h
@@ -16,7 +16,9 @@ limitations under the License.
#ifndef TENSORFLOW_CONTRIB_S3_S3_FILE_SYSTEM_H_
#define TENSORFLOW_CONTRIB_S3_S3_FILE_SYSTEM_H_
+#include <aws/s3/S3Client.h>
#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/mutex.h"
namespace tensorflow {
@@ -53,6 +55,26 @@ class S3FileSystem : public FileSystem {
Status GetFileSize(const string& fname, uint64* size) override;
Status RenameFile(const string& src, const string& target) override;
+
+ private:
+ // Returns the member S3 client, initializing as-needed.
+ // When the client tries to access the object in S3, e.g.,
+ // s3://bucket-name/path/to/object
+ // the behavior could be controlled by various environmental
+ // variables.
+ // By default S3 access regional endpoint, with region
+ // controlled by `AWS_REGION`. The endpoint could be overridden
+ // explicitly with `S3_ENDPOINT`. S3 uses HTTPS by default.
+ // If S3_USE_HTTPS=0 is specified, HTTP is used. Also,
+ // S3_VERIFY_SSL=0 could disable SSL verification in case
+ // HTTPS is used.
+ // This S3 Client does not support Virtual Hosted–Style Method
+ // for a bucket.
+ std::shared_ptr<Aws::S3::S3Client> GetS3Client();
+
+ std::shared_ptr<Aws::S3::S3Client> s3_client_;
+ // Lock held when checking for s3_client_ initialization.
+ mutex client_lock_;
};
} // namespace tensorflow
diff --git a/tensorflow/core/platform/s3/s3_file_system_test.cc b/tensorflow/core/platform/s3/s3_file_system_test.cc
index 0b42f5fcec..d4411d9865 100644
--- a/tensorflow/core/platform/s3/s3_file_system_test.cc
+++ b/tensorflow/core/platform/s3/s3_file_system_test.cc
@@ -130,6 +130,8 @@ TEST_F(S3FileSystemTest, NewReadOnlyMemoryRegionFromFile) {
TEST_F(S3FileSystemTest, FileExists) {
const string fname = TmpDir("FileExists");
+ // Ensure the file doesn't yet exist.
+ TF_ASSERT_OK(s3fs.DeleteFile(fname));
EXPECT_EQ(error::Code::NOT_FOUND, s3fs.FileExists(fname).code());
TF_ASSERT_OK(WriteString(fname, "test"));
TF_EXPECT_OK(s3fs.FileExists(fname));
diff --git a/tensorflow/core/platform/windows/cpu_info.h b/tensorflow/core/platform/windows/cpu_info.h
index d6e78dbc8f..f20939d3c0 100644
--- a/tensorflow/core/platform/windows/cpu_info.h
+++ b/tensorflow/core/platform/windows/cpu_info.h
@@ -22,8 +22,10 @@ limitations under the License.
// Byte order defines provided by gcc. MSVC doesn't define those so
// we define them here.
// We assume that all windows platform out there are little endian.
+#if defined(_MSC_VER) && !defined(__clang__)
#define __ORDER_LITTLE_ENDIAN__ 0x4d2
#define __ORDER_BIG_ENDIAN__ 0x10e1
#define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__
+#endif
#endif // TENSORFLOW_PLATFORM_WINDOWS_CPU_INFO_H_
diff --git a/tensorflow/core/profiler/README.md b/tensorflow/core/profiler/README.md
index 460f935e4a..57d76eb4cb 100644
--- a/tensorflow/core/profiler/README.md
+++ b/tensorflow/core/profiler/README.md
@@ -240,8 +240,9 @@ Open a Chrome browser, enter URL chrome://tracing and load the timeline file.
# can also generate memory profile using `-select bytes`
tfprof> code -select accelerator_micros -max_depth 100000 -output pprof:outfile=<filename> -trim_name_regexes .*apply_op.*
-# Use pprof to visualize the generated file.
-pprof -png --nodecount=100 --sample_index=1 <filename>
+# Use google-pprof, from the google-perftools package to visualize the generated file.
+# On Ubuntu you can install it with `apt-get install it google-perftools`.
+google-pprof --pdf --nodecount=100 <filename>
```
![PprofGraph](g3doc/pprof.jpg)
diff --git a/tensorflow/core/profiler/internal/tfprof_stats.h b/tensorflow/core/profiler/internal/tfprof_stats.h
index 0790cb0ca6..db148c936c 100644
--- a/tensorflow/core/profiler/internal/tfprof_stats.h
+++ b/tensorflow/core/profiler/internal/tfprof_stats.h
@@ -83,7 +83,7 @@ class TFStats {
const MultiGraphNodeProto& ShowMultiGraphNode(const string& cmd,
const Options& opts) const;
- // A a (partial) graph to existing graph.
+ // Add a (partial) graph to existing graph.
void AddGraph(std::unique_ptr<GraphDef> graph);
// Add a step of run time meta data.
@@ -118,7 +118,7 @@ class TFStats {
MultiGraphNodeProto empty_multi_graph_node_;
std::map<int64, string> id_to_string_;
- // Graph nodes covered by RunMetdata, that is traced with run time stats.
+ // Graph nodes covered by RunMetadata, that is traced with run time stats.
std::set<int64> covered_nodes_;
};
diff --git a/tensorflow/core/profiler/profiler.cc b/tensorflow/core/profiler/profiler.cc
index 2cc212d589..808e3c853b 100644
--- a/tensorflow/core/profiler/profiler.cc
+++ b/tensorflow/core/profiler/profiler.cc
@@ -206,8 +206,12 @@ int Run(int argc, char** argv) {
"graph_path,op_log_path,run_meta_path\n");
std::unique_ptr<GraphDef> graph(new GraphDef());
if (!FLAGS_graph_path.empty()) {
- TF_CHECK_OK(
- ReadProtoFile(Env::Default(), FLAGS_graph_path, graph.get(), false));
+ s = ReadProtoFile(Env::Default(), FLAGS_graph_path, graph.get(), false);
+ if (!s.ok()) {
+ fprintf(stderr, "Failed to read graph_path: %s\n",
+ s.ToString().c_str());
+ return 1;
+ }
}
std::unique_ptr<OpLogProto> op_log(new OpLogProto());
diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index 67da7bf452..b02f899b87 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -24,7 +24,7 @@ limitations under the License.
// TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
// "-beta", "-rc", "-rc.1")
-#define TF_VERSION_SUFFIX "-rc1"
+#define TF_VERSION_SUFFIX ""
#define TF_STR_HELPER(x) #x
#define TF_STR(x) TF_STR_HELPER(x)
diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h
index 864e7e39c2..db4c5c35e3 100644
--- a/tensorflow/core/util/mkl_util.h
+++ b/tensorflow/core/util/mkl_util.h
@@ -35,7 +35,7 @@ limitations under the License.
#include "tensorflow/core/util/padding.h"
#include "tensorflow/core/util/tensor_format.h"
-#ifdef INTEL_MKL_DNN
+#ifndef INTEL_MKL_ML
#include "mkldnn.hpp"
using mkldnn::engine;
@@ -325,7 +325,7 @@ class MklShape {
nullptr; // TF dimension corresponding to this MKL dimension
};
-#ifdef INTEL_MKL_DNN
+#ifndef INTEL_MKL_ML
// Forward decl
TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format);
@@ -660,7 +660,7 @@ class MklDnnShape {
typedef std::vector<MklShape> MklShapeList;
-#ifdef INTEL_MKL_DNN
+#ifndef INTEL_MKL_ML
typedef std::vector<MklDnnShape> MklDnnShapeList;
#endif
@@ -674,7 +674,7 @@ inline bool AreAllMklTensors(const MklShapeList& shapes) {
return true;
}
-#ifndef INTEL_MKL_DNN
+#ifdef INTEL_MKL_ML
template <typename T>
inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor,
const MklShape& mkl_shape) {
@@ -725,7 +725,7 @@ inline void GetMklShape(OpKernelContext* ctext, int n, MklShape* mklshape) {
sizeof(uint8));
}
-#ifdef INTEL_MKL_DNN
+#ifndef INTEL_MKL_ML
inline void GetMklShape(OpKernelContext* ctext, int n, MklDnnShape* mklshape) {
mklshape->DeSerializeMklDnnShape(
ctext->input(GetTensorMetaDataIndex(n, ctext->num_inputs()))
@@ -749,7 +749,7 @@ inline void GetMklInputList(OpKernelContext* ctext, StringPiece name,
ctext->input_list(name, input_tensors);
}
-#ifndef INTEL_MKL_DNN
+#ifdef INTEL_MKL_ML
inline void GetMklShapeList(OpKernelContext* ctext, StringPiece name,
MklShapeList* mkl_shapes) {
@@ -779,7 +779,7 @@ inline void GetMklShapeList(OpKernelContext* ctext, StringPiece name,
#endif
-#ifdef INTEL_MKL_DNN
+#ifndef INTEL_MKL_ML
/// Get shape of input tensor pointed by 'input_idx' in TensorShape format.
/// If the input tensor is in MKL layout, then obtains TensorShape from
/// MklShape.
@@ -814,7 +814,7 @@ inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n,
second_tensor->flat<uint8>().size() * sizeof(uint8));
}
-#ifdef INTEL_MKL_DNN
+#ifndef INTEL_MKL_ML
// Allocate the second output tensor that will contain
// the MKL shape serialized
inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n,
@@ -851,7 +851,7 @@ inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n,
second_tensor->flat<uint8>().size() * sizeof(uint8));
}
-#ifdef INTEL_MKL_DNN
+#ifndef INTEL_MKL_ML
// Allocate the output tensor, create a second output tensor that will contain
// the MKL shape serialized
inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n,
@@ -875,7 +875,7 @@ inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n,
// Allocates a temp tensor and returns the data buffer for temporary storage.
// Currently
-#ifdef INTEL_MKL_DNN
+#ifndef INTEL_MKL_ML
template <typename T>
inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out,
const memory::primitive_desc& pd, void** buf_out) {
@@ -994,7 +994,7 @@ inline void CopyMklTensorInToOut(OpKernelContext* context, int idx_in,
context->set_output(idx_meta_out, meta_output);
}
-#ifndef INTEL_MKL_DNN
+#ifdef INTEL_MKL_ML
inline void CopyTfTensorInToOutWithShape(OpKernelContext* context, int idx_in,
int idx_out,
const TensorShape& shape) {
@@ -1032,7 +1032,7 @@ inline void CopyTfTensorInToOutWithShape(OpKernelContext* context, int idx_in,
}
#endif
-#ifndef INTEL_MKL_DNN
+#ifdef INTEL_MKL_ML
inline void ForwardTfTensorInToOut(OpKernelContext* context, int idx_in,
int idx_out) {
@@ -1090,7 +1090,7 @@ inline void ForwardMklTensorInToOut(OpKernelContext* context, int idx_in,
}
}
-#ifdef INTEL_MKL_DNN
+#ifndef INTEL_MKL_ML
inline void ForwardMklTensorInToOutWithMklShape(OpKernelContext* context,
int idx_in, int idx_out,
const MklDnnShape& mkl_shape) {
@@ -1132,7 +1132,7 @@ inline void SetDummyMklShapeOutput(OpKernelContext* context,
AllocateOutputSetMklShape(context, idx_data_out, mkl_shape_output);
}
-#ifndef INTEL_MKL_DNN
+#ifdef INTEL_MKL_ML
// We don't need these functions in MKLDNN. We have defined equality operator
// on MklDnnShape class directly.
@@ -1242,7 +1242,7 @@ inline void MklNCHWToNHWC(const Tensor& input, Tensor** output) {
// -------------------------------------------------------------------
-#ifdef INTEL_MKL_DNN
+#ifndef INTEL_MKL_ML
/// Return MKL-DNN data type (memory::data_type) for input type T
///
@@ -1753,7 +1753,7 @@ class MklDnnData {
}
};
-#endif // INTEL_MKL_DNN
+#endif // INTEL_MKL_ML
} // namespace tensorflow
#endif // INTEL_MKL
diff --git a/tensorflow/core/util/mkl_util_test.cc b/tensorflow/core/util/mkl_util_test.cc
index 8b73eadb40..cd1d0713ad 100644
--- a/tensorflow/core/util/mkl_util_test.cc
+++ b/tensorflow/core/util/mkl_util_test.cc
@@ -22,7 +22,7 @@ limitations under the License.
namespace tensorflow {
namespace {
-#ifdef INTEL_MKL_DNN
+#ifndef INTEL_MKL_ML
TEST(MklUtilTest, MklDnnTfShape) {
auto cpu_engine = engine(engine::cpu, 0);
@@ -84,7 +84,7 @@ TEST(MklUtilTest, MklDnnBlockedFormatTest) {
EXPECT_EQ(b_md2.data.format, mkldnn_blocked);
}
-#endif // INTEL_MKL_DNN
+#endif // INTEL_MKL_ML
} // namespace
} // namespace tensorflow
diff --git a/tensorflow/docs_src/about/bib.md b/tensorflow/docs_src/about/bib.md
index c9f0c532c6..5593a3d95c 100644
--- a/tensorflow/docs_src/about/bib.md
+++ b/tensorflow/docs_src/about/bib.md
@@ -60,7 +60,7 @@ author={
Lukasz~Kaiser and
Manjunath~Kudlur and
Josh~Levenberg and
- Dan~Man\'{e} and
+ Dandelion~Man\'{e} and
Rajat~Monga and
Sherry~Moore and
Derek~Murray and
diff --git a/tensorflow/docs_src/api_guides/python/contrib.signal.md b/tensorflow/docs_src/api_guides/python/contrib.signal.md
index 85ef3ad134..0f7690f80a 100644
--- a/tensorflow/docs_src/api_guides/python/contrib.signal.md
+++ b/tensorflow/docs_src/api_guides/python/contrib.signal.md
@@ -28,14 +28,14 @@ The `axis` parameter to @{tf.contrib.signal.frame} allows you to frame tensors
with inner structure (e.g. a spectrogram):
```python
-# `magnitude_spectrograms` is a [batch_size, ?, 127] tensor of spectrograms. We
+# `magnitude_spectrograms` is a [batch_size, ?, 129] tensor of spectrograms. We
# would like to produce overlapping fixed-size spectrogram patches; for example,
# for use in a situation where a fixed size input is needed.
magnitude_spectrograms = tf.abs(tf.contrib.signal.stft(
signals, frame_length=256, frame_step=64, fft_length=256))
-# `spectrogram_patches` is a [batch_size, ?, 64, 127] tensor containing a
-# variable number of [64, 127] spectrogram patches per batch item.
+# `spectrogram_patches` is a [batch_size, ?, 64, 129] tensor containing a
+# variable number of [64, 129] spectrogram patches per batch item.
spectrogram_patches = tf.contrib.signal.frame(
magnitude_spectrograms, frame_length=64, frame_step=16, axis=1)
```
diff --git a/tensorflow/docs_src/api_guides/python/regression_examples.md b/tensorflow/docs_src/api_guides/python/regression_examples.md
index 45cb9d829c..dae50a8f03 100644
--- a/tensorflow/docs_src/api_guides/python/regression_examples.md
+++ b/tensorflow/docs_src/api_guides/python/regression_examples.md
@@ -229,4 +229,4 @@ passed through to the `model_fn` when the `model_fn` is called.
The `model_fn` returns an
@{tf.estimator.EstimatorSpec$`EstimatorSpec`} which is a simple structure
indicating to the `Estimator` which operations should be run to accomplish
-varions tasks.
+various tasks.
diff --git a/tensorflow/docs_src/get_started/custom_estimators.md b/tensorflow/docs_src/get_started/custom_estimators.md
index 6343cc4ee4..42a246678a 100644
--- a/tensorflow/docs_src/get_started/custom_estimators.md
+++ b/tensorflow/docs_src/get_started/custom_estimators.md
@@ -15,7 +15,7 @@ git clone https://github.com/tensorflow/models/
cd models/samples/core/get_started
```
-In this document we wil be looking at
+In this document we will be looking at
[`custom_estimator.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/custom_estimator.py).
You can run it with the following command:
@@ -161,7 +161,7 @@ classifier = tf.estimator.Estimator(
To implement a typical model function, you must do the following:
-* (Define the model)[#define_the_model].
+* [Define the model](#define_the_model).
* Specify additional calculations for each of
the [three different modes](#modes):
* [Predict](#predict)
diff --git a/tensorflow/docs_src/get_started/datasets_quickstart.md b/tensorflow/docs_src/get_started/datasets_quickstart.md
index ecfbf160f0..a8a2ab6e56 100644
--- a/tensorflow/docs_src/get_started/datasets_quickstart.md
+++ b/tensorflow/docs_src/get_started/datasets_quickstart.md
@@ -169,7 +169,7 @@ the number of examples in the `Dataset` ensures that the data is completely
shuffled. The Iris data set only contains 150 examples.
The @{tf.data.Dataset.repeat$`repeat`} method has the `Dataset` restart when
-it reaches the end. To limit the number of epochss, set the `count` argument.
+it reaches the end. To limit the number of epochs, set the `count` argument.
The @{tf.data.Dataset.repeat$`batch`} method collects a number of examples and
stacks them, to create batches. This adds a dimension to their shape. The new
@@ -282,7 +282,7 @@ produce the necessary `(features, label)` pairs.
We will start by building a function to parse a single line.
-The following `iris_data.parse_line` function acomplishes this taks using the
+The following `iris_data.parse_line` function accomplishes this task using the
@{tf.decode_csv} function, and some simple python code:
We must parse each of the lines in the dataset in order to generate the
diff --git a/tensorflow/docs_src/get_started/feature_columns.md b/tensorflow/docs_src/get_started/feature_columns.md
index e3308ed716..ad3e1fe3e3 100644
--- a/tensorflow/docs_src/get_started/feature_columns.md
+++ b/tensorflow/docs_src/get_started/feature_columns.md
@@ -461,8 +461,8 @@ permitting a richer palette of numbers for every cell, an embedding column
contains far fewer cells than an indicator column.
Let's look at an example comparing indicator and embedding columns. Suppose our
-input examples consists of different words from a limited palette of only 81
-words. Further suppose that the data set provides provides the following input
+input examples consist of different words from a limited palette of only 81
+words. Further suppose that the data set provides the following input
words in 4 separate examples:
* `"dog"`
diff --git a/tensorflow/docs_src/get_started/premade_estimators.md b/tensorflow/docs_src/get_started/premade_estimators.md
index 45850a8996..4f01f997c3 100644
--- a/tensorflow/docs_src/get_started/premade_estimators.md
+++ b/tensorflow/docs_src/get_started/premade_estimators.md
@@ -372,7 +372,7 @@ Test set accuracy: 0.967
We now have a trained model that produces good evaluation results.
We can now use the trained model to predict the species of an Iris flower
-based on some unlabeled measurments. As with training and evaluation, we make
+based on some unlabeled measurements. As with training and evaluation, we make
predictions using a single function call:
```python
diff --git a/tensorflow/docs_src/install/install_c.md b/tensorflow/docs_src/install/install_c.md
index ba1a4118ae..14add7c77e 100644
--- a/tensorflow/docs_src/install/install_c.md
+++ b/tensorflow/docs_src/install/install_c.md
@@ -38,7 +38,7 @@ enable TensorFlow for C:
OS="linux" # Change to "darwin" for macOS
TARGET_DIRECTORY="/usr/local"
curl -L \
- "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.5.0-rc1.tar.gz" |
+ "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.5.0.tar.gz" |
sudo tar -C $TARGET_DIRECTORY -xz
The `tar` command extracts the TensorFlow C library into the `lib`
diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md
index 87cc647317..d2af9d9843 100644
--- a/tensorflow/docs_src/install/install_go.md
+++ b/tensorflow/docs_src/install/install_go.md
@@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go:
TF_TYPE="cpu" # Change to "gpu" for GPU support
TARGET_DIRECTORY='/usr/local'
curl -L \
- "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.5.0-rc1.tar.gz" |
+ "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.5.0.tar.gz" |
sudo tar -C $TARGET_DIRECTORY -xz
The `tar` command extracts the TensorFlow C library into the `lib`
diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md
index 37e109a6e4..e5388c4b1e 100644
--- a/tensorflow/docs_src/install/install_java.md
+++ b/tensorflow/docs_src/install/install_java.md
@@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs:
<dependency>
<groupId>org.tensorflow</groupId>
<artifactId>tensorflow</artifactId>
- <version>1.5.0-rc1</version>
+ <version>1.5.0</version>
</dependency>
```
@@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow:
<dependency>
<groupId>org.tensorflow</groupId>
<artifactId>tensorflow</artifactId>
- <version>1.5.0-rc1</version>
+ <version>1.5.0</version>
</dependency>
</dependencies>
</project>
@@ -123,12 +123,12 @@ instead:
<dependency>
<groupId>org.tensorflow</groupId>
<artifactId>libtensorflow</artifactId>
- <version>1.5.0-rc1</version>
+ <version>1.5.0</version>
</dependency>
<dependency>
<groupId>org.tensorflow</groupId>
<artifactId>libtensorflow_jni_gpu</artifactId>
- <version>1.5.0-rc1</version>
+ <version>1.5.0</version>
</dependency>
```
@@ -147,7 +147,7 @@ refer to the simpler instructions above instead.
Take the following steps to install TensorFlow for Java on Linux or macOS:
1. Download
- [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.5.0-rc1.jar),
+ [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.5.0.jar),
which is the TensorFlow Java Archive (JAR).
2. Decide whether you will run TensorFlow for Java on CPU(s) only or with
@@ -166,7 +166,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS:
OS=$(uname -s | tr '[:upper:]' '[:lower:]')
mkdir -p ./jni
curl -L \
- "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.5.0-rc1.tar.gz" |
+ "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.5.0.tar.gz" |
tar -xz -C ./jni
### Install on Windows
@@ -174,10 +174,10 @@ Take the following steps to install TensorFlow for Java on Linux or macOS:
Take the following steps to install TensorFlow for Java on Windows:
1. Download
- [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.5.0-rc1.jar),
+ [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.5.0.jar),
which is the TensorFlow Java Archive (JAR).
2. Download the following Java Native Interface (JNI) file appropriate for
- [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.5.0-rc1.zip).
+ [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.5.0.zip).
3. Extract this .zip file.
@@ -225,7 +225,7 @@ must be part of your `classpath`. For example, you can include the
downloaded `.jar` in your `classpath` by using the `-cp` compilation flag
as follows:
-<pre><b>javac -cp libtensorflow-1.5.0-rc1.jar HelloTF.java</b></pre>
+<pre><b>javac -cp libtensorflow-1.5.0.jar HelloTF.java</b></pre>
### Running
@@ -239,11 +239,11 @@ two files are available to the JVM:
For example, the following command line executes the `HelloTF` program on Linux
and macOS X:
-<pre><b>java -cp libtensorflow-1.5.0-rc1.jar:. -Djava.library.path=./jni HelloTF</b></pre>
+<pre><b>java -cp libtensorflow-1.5.0.jar:. -Djava.library.path=./jni HelloTF</b></pre>
And the following command line executes the `HelloTF` program on Windows:
-<pre><b>java -cp libtensorflow-1.5.0-rc1.jar;. -Djava.library.path=jni HelloTF</b></pre>
+<pre><b>java -cp libtensorflow-1.5.0.jar;. -Djava.library.path=jni HelloTF</b></pre>
If the program prints <tt>Hello from <i>version</i></tt>, you've successfully
installed TensorFlow for Java and are ready to use the API. If the program
diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md
index 03f12dff08..cd8c14599f 100644
--- a/tensorflow/docs_src/install/install_linux.md
+++ b/tensorflow/docs_src/install/install_linux.md
@@ -31,13 +31,13 @@ If you are installing TensorFlow with GPU support using one of the
mechanisms described in this guide, then the following NVIDIA software
must be installed on your system:
- * CUDA® Toolkit 8.0. For details, see
+ * CUDA® Toolkit 9.0. For details, see
[NVIDIA's documentation](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/#axzz4VZnqTJ2A).
Ensure that you append the relevant Cuda pathnames to the
`LD_LIBRARY_PATH` environment variable as described in the
NVIDIA documentation.
- * The NVIDIA drivers associated with CUDA Toolkit 8.0.
- * cuDNN v6.0. For details, see
+ * The NVIDIA drivers associated with CUDA Toolkit 9.0.
+ * cuDNN v7.0. For details, see
[NVIDIA's documentation](https://developer.nvidia.com/cudnn).
Ensure that you create the `CUDA_HOME` environment variable as
described in the NVIDIA documentation.
@@ -188,7 +188,7 @@ Take the following steps to install TensorFlow with Virtualenv:
Virtualenv environment:
<pre>(tensorflow)$ <b>pip3 install --upgrade \
- https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0rc1-cp34-cp34m-linux_x86_64.whl</b></pre>
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0-cp34-cp34m-linux_x86_64.whl</b></pre>
If you encounter installation problems, see
[Common Installation Problems](#common_installation_problems).
@@ -293,7 +293,7 @@ take the following steps:
<pre>
$ <b>sudo pip3 install --upgrade \
- https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0rc1-cp34-cp34m-linux_x86_64.whl</b>
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0-cp34-cp34m-linux_x86_64.whl</b>
</pre>
If this step fails, see
@@ -480,7 +480,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
<pre>
(tensorflow)$ <b>pip install --ignore-installed --upgrade \
- https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0rc1-cp34-cp34m-linux_x86_64.whl</b></pre>
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0-cp34-cp34m-linux_x86_64.whl</b></pre>
<a name="ValidateYourInstallation"></a>
@@ -648,14 +648,14 @@ This section documents the relevant values for Linux installations.
CPU only:
<pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0rc1-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0-cp27-none-linux_x86_64.whl
</pre>
GPU support:
<pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.5.0rc1-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.5.0-cp27-none-linux_x86_64.whl
</pre>
Note that GPU support requires the NVIDIA hardware and software described in
@@ -667,14 +667,14 @@ Note that GPU support requires the NVIDIA hardware and software described in
CPU only:
<pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0rc1-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0-cp34-cp34m-linux_x86_64.whl
</pre>
GPU support:
<pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.5.0rc1-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.5.0-cp34-cp34m-linux_x86_64.whl
</pre>
Note that GPU support requires the NVIDIA hardware and software described in
@@ -686,14 +686,14 @@ Note that GPU support requires the NVIDIA hardware and software described in
CPU only:
<pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0rc1-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0-cp35-cp35m-linux_x86_64.whl
</pre>
GPU support:
<pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.5.0rc1-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.5.0-cp35-cp35m-linux_x86_64.whl
</pre>
@@ -705,14 +705,14 @@ Note that GPU support requires the NVIDIA hardware and software described in
CPU only:
<pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0rc1-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0-cp36-cp36m-linux_x86_64.whl
</pre>
GPU support:
<pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.5.0rc1-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.5.0-cp36-cp36m-linux_x86_64.whl
</pre>
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md
index e13ddadab7..f49d3a2f08 100644
--- a/tensorflow/docs_src/install/install_mac.md
+++ b/tensorflow/docs_src/install/install_mac.md
@@ -115,7 +115,7 @@ Take the following steps to install TensorFlow with Virtualenv:
TensorFlow in the active Virtualenv is as follows:
<pre> $ <b>pip3 install --upgrade \
- https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.5.0rc1-py2-none-any.whl</b></pre>
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.5.0-py3-none-any.whl</b></pre>
If you encounter installation problems, see
[Common Installation Problems](#common-installation-problems).
@@ -238,7 +238,7 @@ take the following steps:
issue the following command:
<pre> $ <b>sudo pip3 install --upgrade \
- https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.5.0rc1-py2-none-any.whl</b> </pre>
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.5.0-py3-none-any.whl</b> </pre>
If the preceding command fails, see
[installation problems](#common-installation-problems).
@@ -347,7 +347,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
TensorFlow for Python 2.7:
<pre> (<i>targetDirectory</i>)$ <b>pip install --ignore-installed --upgrade \
- https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.5.0rc1-py2-none-any.whl</b></pre>
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.5.0-py2-none-any.whl</b></pre>
<a name="ValidateYourInstallation"></a>
@@ -520,7 +520,7 @@ This section documents the relevant values for Mac OS installations.
<pre>
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.5.0rc1-py2-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.5.0-py2-none-any.whl
</pre>
@@ -528,5 +528,5 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.5.0rc1-py2-none-a
<pre>
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.5.0rc1-py3-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.5.0-py3-none-any.whl
</pre>
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md
index 485863bf2e..bc7d2080dc 100644
--- a/tensorflow/docs_src/install/install_sources.md
+++ b/tensorflow/docs_src/install/install_sources.md
@@ -133,7 +133,7 @@ The following NVIDIA <i>hardware</i> must be installed on your system:
The following NVIDIA <i>software</i> must be installed on your system:
- * NVIDIA's Cuda Toolkit (>= 7.0). We recommend version 8.0.
+ * NVIDIA's Cuda Toolkit (>= 7.0). We recommend version 9.0.
For details, see
[NVIDIA's documentation](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/#axzz4VZnqTJ2A).
Ensure that you append the relevant Cuda pathnames to the
@@ -289,11 +289,11 @@ Do you wish to build TensorFlow with CUDA support? [y/N] <b>Y</b>
CUDA support will be enabled for TensorFlow
Do you want to use clang as CUDA compiler? [y/N]
nvcc will be used as CUDA compiler
-Please specify the Cuda SDK version you want to use, e.g. 7.0. [Leave empty to default to CUDA 8.0]: <b>8.0</b>
-Please specify the location where CUDA 8.0 toolkit is installed. Refer to README.md for more details. [Default is /usr/local/cuda]:
+Please specify the Cuda SDK version you want to use, e.g. 7.0. [Leave empty to default to CUDA 9.0]: <b>9.0</b>
+Please specify the location where CUDA 9.0 toolkit is installed. Refer to README.md for more details. [Default is /usr/local/cuda]:
Please specify which gcc should be used by nvcc as the host compiler. [Default is /usr/bin/gcc]:
-Please specify the cuDNN version you want to use. [Leave empty to default to cuDNN 6.0]: <b>6</b>
-Please specify the location where cuDNN 6 library is installed. Refer to README.md for more details. [Default is /usr/local/cuda]:
+Please specify the cuDNN version you want to use. [Leave empty to default to cuDNN 7.0]: <b>7</b>
+Please specify the location where cuDNN 7 library is installed. Refer to README.md for more details. [Default is /usr/local/cuda]:
Please specify a list of comma-separated Cuda compute capabilities you want to build with.
You can find the compute capability of your device at: https://developer.nvidia.com/cuda-gpus.
Please note that each additional compute capability significantly increases your build time and binary size.
@@ -359,10 +359,10 @@ Invoke `pip install` to install that pip package.
The filename of the `.whl` file depends on your platform.
For example, the following command will install the pip package
-for TensorFlow 1.5.0rc1 on Linux:
+for TensorFlow 1.5.0 on Linux:
<pre>
-$ <b>sudo pip install /tmp/tensorflow_pkg/tensorflow-1.5.0rc1-py2-none-any.whl</b>
+$ <b>sudo pip install /tmp/tensorflow_pkg/tensorflow-1.5.0-py2-none-any.whl</b>
</pre>
## Validate your installation
@@ -461,8 +461,8 @@ Stack Overflow and specify the `tensorflow` tag.
<table>
<tr><th>Version:</th><th>CPU/GPU:</th><th>Python Version:</th><th>Compiler:</th><th>Build Tools:</th><th>cuDNN:</th><th>CUDA:</th></tr>
-<tr><td>tensorflow-1.5.0-rc1</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.8.0</td><td>N/A</td><td>N/A</td></tr>
-<tr><td>tensorflow_gpu-1.5.0-rc1</td><td>GPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.8.0</td><td>7</td><td>9</td></tr>
+<tr><td>tensorflow-1.5.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.8.0</td><td>N/A</td><td>N/A</td></tr>
+<tr><td>tensorflow_gpu-1.5.0</td><td>GPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.8.0</td><td>7</td><td>9</td></tr>
<tr><td>tensorflow-1.4.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.5.4</td><td>N/A</td><td>N/A</td></tr>
<tr><td>tensorflow_gpu-1.4.0</td><td>GPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.5.4</td><td>6</td><td>8</td></tr>
<tr><td>tensorflow-1.3.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.4.5</td><td>N/A</td><td>N/A</td></tr>
@@ -478,7 +478,7 @@ Stack Overflow and specify the `tensorflow` tag.
**Mac**
<table>
<tr><th>Version:</th><th>CPU/GPU:</th><th>Python Version:</th><th>Compiler:</th><th>Build Tools:</th><th>cuDNN:</th><th>CUDA:</th></tr>
-<tr><td>tensorflow-1.5.0-rc1</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.8.1</td><td>N/A</td><td>N/A</td></tr>
+<tr><td>tensorflow-1.5.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.8.1</td><td>N/A</td><td>N/A</td></tr>
<tr><td>tensorflow-1.4.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.5.4</td><td>N/A</td><td>N/A</td></tr>
<tr><td>tensorflow-1.3.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.4.5</td><td>N/A</td><td>N/A</td></tr>
<tr><td>tensorflow-1.2.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.4.5</td><td>N/A</td><td>N/A</td></tr>
@@ -491,8 +491,8 @@ Stack Overflow and specify the `tensorflow` tag.
**Windows**
<table>
<tr><th>Version:</th><th>CPU/GPU:</th><th>Python Version:</th><th>Compiler:</th><th>Build Tools:</th><th>cuDNN:</th><th>CUDA:</th></tr>
-<tr><td>tensorflow-1.5.0-rc1</td><td>CPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>N/A</td><td>N/A</td></tr>
-<tr><td>tensorflow_gpu-1.5.0-rc1</td><td>GPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>7</td><td>9</td></tr>
+<tr><td>tensorflow-1.5.0</td><td>CPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>N/A</td><td>N/A</td></tr>
+<tr><td>tensorflow_gpu-1.5.0</td><td>GPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>7</td><td>9</td></tr>
<tr><td>tensorflow-1.4.0</td><td>CPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>N/A</td><td>N/A</td></tr>
<tr><td>tensorflow_gpu-1.4.0</td><td>GPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>6</td><td>8</td></tr>
<tr><td>tensorflow-1.3.0</td><td>CPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>N/A</td><td>N/A</td></tr>
diff --git a/tensorflow/docs_src/install/install_windows.md b/tensorflow/docs_src/install/install_windows.md
index 8d0eb7966f..86a111c2ec 100644
--- a/tensorflow/docs_src/install/install_windows.md
+++ b/tensorflow/docs_src/install/install_windows.md
@@ -30,13 +30,13 @@ If you are installing TensorFlow with GPU support using one of the mechanisms
described in this guide, then the following NVIDIA software must be
installed on your system:
- * CUDA® Toolkit 8.0. For details, see
+ * CUDA® Toolkit 9.0. For details, see
[NVIDIA's
documentation](http://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/)
Ensure that you append the relevant Cuda pathnames to the `%PATH%`
environment variable as described in the NVIDIA documentation.
- * The NVIDIA drivers associated with CUDA Toolkit 8.0.
- * cuDNN v6.0. For details, see
+ * The NVIDIA drivers associated with CUDA Toolkit 9.0.
+ * cuDNN v7.0. For details, see
[NVIDIA's documentation](https://developer.nvidia.com/cudnn).
Note that cuDNN is typically installed in a different location from the
other CUDA DLLs. Ensure that you add the directory where you installed
diff --git a/tensorflow/docs_src/programmers_guide/graphs.md b/tensorflow/docs_src/programmers_guide/graphs.md
index 2b4896c381..9049a5a9f3 100644
--- a/tensorflow/docs_src/programmers_guide/graphs.md
+++ b/tensorflow/docs_src/programmers_guide/graphs.md
@@ -125,14 +125,14 @@ an operation:
@{tf.Tensor} accepts an optional `name` argument. For example,
`tf.constant(42.0, name="answer")` creates a new @{tf.Operation} named
`"answer"` and returns a @{tf.Tensor} named `"answer:0"`. If the default graph
- already contained an operation named `"answer"`, the TensorFlow would append
+ already contains an operation named `"answer"`, then TensorFlow would append
`"_1"`, `"_2"`, and so on to the name, in order to make it unique.
* The @{tf.name_scope} function makes it possible to add a **name scope** prefix
to all operations created in a particular context. The current name scope
prefix is a `"/"`-delimited list of the names of all active @{tf.name_scope}
context managers. If a name scope has already been used in the current
- context, TensorFlow appens `"_1"`, `"_2"`, and so on. For example:
+ context, TensorFlow appends `"_1"`, `"_2"`, and so on. For example:
```python
c_0 = tf.constant(0, name="c") # => operation named "c"
diff --git a/tensorflow/examples/android/BUILD b/tensorflow/examples/android/BUILD
index 46df5973e8..1214647797 100644
--- a/tensorflow/examples/android/BUILD
+++ b/tensorflow/examples/android/BUILD
@@ -92,7 +92,7 @@ android_binary(
filegroup(
name = "external_assets",
srcs = [
- "@inception5h//:model_files",
+ "@inception_v1//:model_files",
"@mobile_ssd//:model_files",
"@speech_commands//:model_files",
"@stylize//:model_files",
diff --git a/tensorflow/examples/android/build.gradle b/tensorflow/examples/android/build.gradle
index f7bdf8b816..0767726aa9 100644
--- a/tensorflow/examples/android/build.gradle
+++ b/tensorflow/examples/android/build.gradle
@@ -56,10 +56,12 @@ def nativeOutDir = 'libs/' + cpuType
def nativeBuildRule = 'buildNativeBazel'
def demoLibPath = '../../../bazel-bin/tensorflow/examples/android/libtensorflow_demo.so'
def inferenceLibPath = '../../../bazel-bin/tensorflow/contrib/android/libtensorflow_inference.so'
+
+// Override for Makefile builds.
if (nativeBuildSystem == 'makefile') {
nativeBuildRule = 'buildNativeMake'
- demoLibPath = '../../../tensorflow/contrib/makefile/gen/lib/libtensorflow_demo.so'
- inferenceLibPath = '../../../tensorflow/contrib/makefile/gen/lib/libtensorflow_inference.so'
+ demoLibPath = '../../../tensorflow/contrib/makefile/gen/lib/android_' + cpuType + '/libtensorflow_demo.so'
+ inferenceLibPath = '../../../tensorflow/contrib/makefile/gen/lib/android_' + cpuType + '/libtensorflow_inference.so'
}
// If building with Bazel, this is the location of the bazel binary.
@@ -154,7 +156,8 @@ task buildNativeMake(type: Exec) {
'-s', \
'tensorflow/contrib/makefile/sub_makefiles/android/Makefile.in', \
'-t', \
- 'libtensorflow_inference.so libtensorflow_demo.so' \
+ 'libtensorflow_inference.so libtensorflow_demo.so all' \
+ , '-a', cpuType \
//, '-T' // Uncomment to skip protobuf and speed up subsequent builds.
}
diff --git a/tensorflow/examples/android/download-models.gradle b/tensorflow/examples/android/download-models.gradle
index 0e2cf65f53..d3b67eab52 100644
--- a/tensorflow/examples/android/download-models.gradle
+++ b/tensorflow/examples/android/download-models.gradle
@@ -9,7 +9,7 @@
*/
// hard coded model files
// LINT.IfChange
-def models = ['inception5h.zip',
+def models = ['inception_v1.zip',
'object_detection/ssd_mobilenet_v1_android_export.zip',
'stylize_v1.zip',
'speech_commands_conv_actions.zip']
diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/LegacyCameraConnectionFragment.java b/tensorflow/examples/android/src/org/tensorflow/demo/LegacyCameraConnectionFragment.java
index a317273acd..068c7b0d94 100644
--- a/tensorflow/examples/android/src/org/tensorflow/demo/LegacyCameraConnectionFragment.java
+++ b/tensorflow/examples/android/src/org/tensorflow/demo/LegacyCameraConnectionFragment.java
@@ -81,8 +81,11 @@ public class LegacyCameraConnectionFragment extends Fragment {
try {
Camera.Parameters parameters = camera.getParameters();
- parameters.setFocusMode(Camera.Parameters.FOCUS_MODE_CONTINUOUS_PICTURE);
-
+ List<String> focusModes = parameters.getSupportedFocusModes();
+ if (focusModes != null
+ && focusModes.contains(Camera.Parameters.FOCUS_MODE_CONTINUOUS_PICTURE)) {
+ parameters.setFocusMode(Camera.Parameters.FOCUS_MODE_CONTINUOUS_PICTURE);
+ }
List<Camera.Size> cameraSizes = parameters.getSupportedPreviewSizes();
Size[] sizes = new Size[cameraSizes.size()];
int i = 0;
diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/tracking/MultiBoxTracker.java b/tensorflow/examples/android/src/org/tensorflow/demo/tracking/MultiBoxTracker.java
index 2fe2ba539e..af6af2bc8f 100644
--- a/tensorflow/examples/android/src/org/tensorflow/demo/tracking/MultiBoxTracker.java
+++ b/tensorflow/examples/android/src/org/tensorflow/demo/tracking/MultiBoxTracker.java
@@ -199,7 +199,7 @@ public class MultiBoxTracker {
final int w,
final int h,
final int rowStride,
- final int sensorOrienation,
+ final int sensorOrientation,
final byte[] frame,
final long timestamp) {
if (objectTracker == null && !initialized) {
@@ -209,7 +209,7 @@ public class MultiBoxTracker {
objectTracker = ObjectTracker.getInstance(w, h, rowStride, true);
frameWidth = w;
frameHeight = h;
- this.sensorOrientation = sensorOrienation;
+ this.sensorOrientation = sensorOrientation;
initialized = true;
if (objectTracker == null) {
diff --git a/tensorflow/examples/udacity/Dockerfile b/tensorflow/examples/udacity/Dockerfile
index 3ca58566c1..00eb853e52 100644
--- a/tensorflow/examples/udacity/Dockerfile
+++ b/tensorflow/examples/udacity/Dockerfile
@@ -8,7 +8,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
-RUN pip install scikit-learn pyreadline Pillow
+RUN pip install scikit-learn pyreadline Pillow imageio
RUN rm -rf /notebooks/*
ADD *.ipynb /notebooks/
WORKDIR /notebooks
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 2b4d5b8e0f..f5cd7885e7 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -76,6 +76,7 @@ py_library(
":layers",
":lib",
":list_ops",
+ ":manip_ops",
":math_ops",
":metrics",
":nn",
@@ -1424,6 +1425,14 @@ tf_gen_op_wrapper_private_py(
)
tf_gen_op_wrapper_private_py(
+ name = "manip_ops_gen",
+ visibility = [
+ "//learning/brain/python/ops:__pkg__",
+ "//tensorflow/python/kernel_tests:__pkg__",
+ ],
+)
+
+tf_gen_op_wrapper_private_py(
name = "math_ops_gen",
visibility = [
"//learning/brain/google/python/ops:__pkg__",
@@ -1755,6 +1764,8 @@ py_library(
":linalg_grad",
":linalg_ops",
":logging_ops",
+ ":manip_grad",
+ ":manip_ops",
":math_grad",
":math_ops",
":platform",
@@ -1878,6 +1889,29 @@ py_library(
)
py_library(
+ name = "manip_grad",
+ srcs = ["ops/manip_grad.py"],
+ srcs_version = "PY2AND3",
+ deps = [
+ ":control_flow_ops",
+ ":framework_for_generated_wrappers",
+ ":manip_ops",
+ ],
+)
+
+py_library(
+ name = "manip_ops",
+ srcs = ["ops/manip_ops.py"],
+ srcs_version = "PY2AND3",
+ deps = [
+ ":dtypes",
+ ":framework_ops",
+ ":manip_ops_gen",
+ "//third_party/py/numpy",
+ ],
+)
+
+py_library(
name = "logging_ops",
srcs = ["ops/logging_ops.py"],
srcs_version = "PY2AND3",
@@ -2339,6 +2373,8 @@ py_library(
":linalg_ops",
":logging_ops",
":lookup_ops",
+ ":manip_grad",
+ ":manip_ops",
":math_grad",
":math_ops",
":numerics",
diff --git a/tensorflow/python/__init__.py b/tensorflow/python/__init__.py
index bc9ddec2a5..ea7604d30f 100644
--- a/tensorflow/python/__init__.py
+++ b/tensorflow/python/__init__.py
@@ -84,6 +84,7 @@ from tensorflow.python.feature_column import feature_column_lib as feature_colum
from tensorflow.python.layers import layers
from tensorflow.python.ops import bitwise_ops as bitwise
from tensorflow.python.ops import image_ops as image
+from tensorflow.python.ops import manip_ops as manip
from tensorflow.python.ops import metrics
from tensorflow.python.ops import nn
from tensorflow.python.ops import sets
@@ -241,6 +242,7 @@ _allowed_symbols.extend([
'linalg',
'logging',
'losses',
+ 'manip',
'metrics',
'newaxis',
'nn',
diff --git a/tensorflow/python/client/session_benchmark.py b/tensorflow/python/client/session_benchmark.py
index 721bca91b7..da74855193 100644
--- a/tensorflow/python/client/session_benchmark.py
+++ b/tensorflow/python/client/session_benchmark.py
@@ -22,6 +22,7 @@ import time
import numpy as np
+from six.moves import xrange # pylint: disable=redefined-builtin
from tensorflow.python.client import session
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 5d318531d5..c4b7e4919b 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -203,7 +203,7 @@ class Dataset(object):
tensors: A nested structure of tensors.
Returns:
- A `Dataset`.
+ Dataset: A `Dataset`.
"""
return TensorDataset(tensors)
@@ -216,7 +216,7 @@ class Dataset(object):
0th dimension.
Returns:
- A `Dataset`.
+ Dataset: A `Dataset`.
"""
return TensorSliceDataset(tensors)
@@ -229,7 +229,7 @@ class Dataset(object):
sparse_tensor: A `tf.SparseTensor`.
Returns:
- A `Dataset` of rank-(N-1) sparse tensors.
+ Dataset: A `Dataset` of rank-(N-1) sparse tensors.
"""
return SparseTensorSliceDataset(sparse_tensor)
@@ -315,7 +315,7 @@ class Dataset(object):
`generator`.
Returns:
- A `Dataset`.
+ Dataset: A `Dataset`.
"""
if not callable(generator):
raise TypeError("`generator` must be callable.")
@@ -458,7 +458,7 @@ class Dataset(object):
len(args) == 3 -> start = args[0], stop = args[1, stop = args[2]
Returns:
- A `RangeDataset`.
+ Dataset: A `RangeDataset`.
Raises:
ValueError: if len(args) == 0.
@@ -502,7 +502,7 @@ class Dataset(object):
datasets: A nested structure of datasets.
Returns:
- A `Dataset`.
+ Dataset: A `Dataset`.
"""
return ZipDataset(datasets)
@@ -528,7 +528,7 @@ class Dataset(object):
dataset: `Dataset` to be concatenated.
Returns:
- A `Dataset`.
+ Dataset: A `Dataset`.
"""
return ConcatenateDataset(self, dataset)
@@ -540,7 +540,7 @@ class Dataset(object):
maximum number elements that will be buffered when prefetching.
Returns:
- A `Dataset`.
+ Dataset: A `Dataset`.
"""
return PrefetchDataset(self, buffer_size)
@@ -558,12 +558,14 @@ class Dataset(object):
- /path/to/dir/b.py
- /path/to/dir/c.py
+ NOTE: The order of the file names returned can be non-deterministic.
+
Args:
file_pattern: A string or scalar string `tf.Tensor`, representing
the filename pattern that will be matched.
Returns:
- A `Dataset` of strings corresponding to file names.
+ Dataset: A `Dataset` of strings corresponding to file names.
"""
return Dataset.from_tensor_slices(gen_io_ops.matching_files(file_pattern))
@@ -580,7 +582,7 @@ class Dataset(object):
indefinitely.
Returns:
- A `Dataset`.
+ Dataset: A `Dataset`.
"""
return RepeatDataset(self, count)
@@ -604,7 +606,7 @@ class Dataset(object):
iterated over. (Defaults to `True`.)
Returns:
- A `Dataset`.
+ Dataset: A `Dataset`.
"""
return ShuffleDataset(self, buffer_size, seed, reshuffle_each_iteration)
@@ -617,7 +619,7 @@ class Dataset(object):
If a filename is not provided, the dataset will be cached in memory.
Returns:
- A `Dataset`.
+ Dataset: A `Dataset`.
"""
return CacheDataset(self, filename)
@@ -631,7 +633,7 @@ class Dataset(object):
dataset, the new dataset will contain all elements of this dataset.
Returns:
- A `Dataset`.
+ Dataset: A `Dataset`.
"""
return TakeDataset(self, count)
@@ -646,7 +648,7 @@ class Dataset(object):
is -1, skips the entire dataset.
Returns:
- A `Dataset`.
+ Dataset: A `Dataset`.
"""
return SkipDataset(self, count)
@@ -693,7 +695,7 @@ class Dataset(object):
index: A `tf.int64` scalar `tf.Tensor`, representing the worker index.
Returns:
- A `Dataset`.
+ Dataset: A `Dataset`.
Raises:
ValueError: if `num_shards` or `index` are illegal values. Note: error
@@ -737,7 +739,7 @@ class Dataset(object):
consecutive elements of this dataset to combine in a single batch.
Returns:
- A `Dataset`.
+ Dataset: A `Dataset`.
"""
return BatchDataset(self, batch_size)
@@ -766,7 +768,7 @@ class Dataset(object):
the empty string for string types.
Returns:
- A `Dataset`.
+ Dataset: A `Dataset`.
"""
return PaddedBatchDataset(self, batch_size, padded_shapes, padding_values)
@@ -782,7 +784,7 @@ class Dataset(object):
specified, elements will be processed sequentially.
Returns:
- A `Dataset`.
+ Dataset: A `Dataset`.
"""
if num_parallel_calls is None:
return MapDataset(self, map_func)
@@ -798,7 +800,7 @@ class Dataset(object):
`Dataset`.
Returns:
- A `Dataset`.
+ Dataset: A `Dataset`.
"""
return FlatMapDataset(self, map_func)
@@ -867,7 +869,7 @@ class Dataset(object):
input element before cycling to another input element.
Returns:
- A `Dataset`.
+ Dataset: A `Dataset`.
"""
return InterleaveDataset(self, map_func, cycle_length, block_length)
@@ -880,7 +882,7 @@ class Dataset(object):
scalar `tf.bool` tensor.
Returns:
- A `Dataset`.
+ Dataset: A `Dataset`.
"""
return FilterDataset(self, predicate)
@@ -901,10 +903,11 @@ class Dataset(object):
Args:
transformation_func: A function that takes one `Dataset` argument and
- returns a `Dataset`.
+ returns a `Dataset`.
Returns:
- The `Dataset` returned by applying `transformation_func` to this dataset.
+ Dataset: The `Dataset` returned by applying `transformation_func` to this
+ dataset.
"""
dataset = transformation_func(self)
if not isinstance(dataset, Dataset):
diff --git a/tensorflow/python/data/util/nest.py b/tensorflow/python/data/util/nest.py
index e387e35740..e90ce3fb40 100644
--- a/tensorflow/python/data/util/nest.py
+++ b/tensorflow/python/data/util/nest.py
@@ -266,7 +266,7 @@ def map_structure(func, *structure, **check_types_dict):
and the return value will contain the results in the same structure.
Args:
- func: A callable that acceps as many arguments are there are structures.
+ func: A callable that accepts as many arguments are there are structures.
*structure: scalar, or tuple or list of constructed scalars and/or other
tuples/lists, or scalars. Note: numpy arrays are considered scalars.
**check_types_dict: only valid keyword argument is `check_types`. If set to
@@ -479,8 +479,8 @@ def map_structure_up_to(shallow_tree, func, *inputs):
The `inputs`, can be thought of as having the same structure as
`shallow_tree`, but with leaf nodes that are themselves tree structures.
- This function therefore will return something with the same base structure as
- `shallow_tree`.
+ This function, therefore, will return something with the same base structure
+ as `shallow_tree`.
Examples:
diff --git a/tensorflow/python/data/util/sparse.py b/tensorflow/python/data/util/sparse.py
index 5ebcb4ea81..5e6d224709 100644
--- a/tensorflow/python/data/util/sparse.py
+++ b/tensorflow/python/data/util/sparse.py
@@ -141,7 +141,7 @@ def serialize_sparse_tensors(tensors):
tensors: a tensor structure to serialize.
Returns:
- `tensors` with any sparse tensors replaced by the their serialized version.
+ `tensors` with any sparse tensors replaced by their serialized version.
"""
ret = nest.pack_sequence_as(tensors, [
diff --git a/tensorflow/python/debug/cli/tensor_format.py b/tensorflow/python/debug/cli/tensor_format.py
index d4aea76d65..e0759a8bc1 100644
--- a/tensorflow/python/debug/cli/tensor_format.py
+++ b/tensorflow/python/debug/cli/tensor_format.py
@@ -535,7 +535,7 @@ def numeric_summary(tensor):
if not isinstance(tensor, np.ndarray) or not np.size(tensor):
return debugger_cli_common.RichTextLines([
"No numeric summary available due to empty tensor."])
- elif (np.issubdtype(tensor.dtype, np.float) or
+ elif (np.issubdtype(tensor.dtype, np.floating) or
np.issubdtype(tensor.dtype, np.complex) or
np.issubdtype(tensor.dtype, np.integer)):
counts = [
diff --git a/tensorflow/python/debug/lib/debug_data.py b/tensorflow/python/debug/lib/debug_data.py
index c4b13a1045..8d355aa27f 100644
--- a/tensorflow/python/debug/lib/debug_data.py
+++ b/tensorflow/python/debug/lib/debug_data.py
@@ -222,7 +222,7 @@ def has_inf_or_nan(datum, tensor):
# Also return False for data types that cannot be represented as numpy
# arrays.
return False
- elif (np.issubdtype(tensor.dtype, np.float) or
+ elif (np.issubdtype(tensor.dtype, np.floating) or
np.issubdtype(tensor.dtype, np.complex) or
np.issubdtype(tensor.dtype, np.integer)):
return np.any(np.isnan(tensor)) or np.any(np.isinf(tensor))
diff --git a/tensorflow/python/eager/execution_callbacks.py b/tensorflow/python/eager/execution_callbacks.py
index 2f1654dda4..988442c971 100644
--- a/tensorflow/python/eager/execution_callbacks.py
+++ b/tensorflow/python/eager/execution_callbacks.py
@@ -153,7 +153,7 @@ def inf_nan_callback(op_type,
continue
numpy_dtype = output.dtype.as_numpy_dtype
- if (np.issubdtype(numpy_dtype, np.float) or
+ if (np.issubdtype(numpy_dtype, np.floating) or
np.issubdtype(numpy_dtype, np.complex) or
np.issubdtype(numpy_dtype, np.integer)):
try:
diff --git a/tensorflow/python/estimator/canned/dnn_testing_utils.py b/tensorflow/python/estimator/canned/dnn_testing_utils.py
index 2bdec69303..706575985f 100644
--- a/tensorflow/python/estimator/canned/dnn_testing_utils.py
+++ b/tensorflow/python/estimator/canned/dnn_testing_utils.py
@@ -877,7 +877,7 @@ class BaseDNNWarmStartingTest(object):
# Create a second DNNClassifier, warm-started from the first. Use a
# learning_rate = 0.0 optimizer to check values (use SGD so we don't have
- # accumulator values that change). Use a a new FeatureColumn with a
+ # accumulator values that change). Use a new FeatureColumn with a
# different vocabulary for occupation.
new_vocab_list = ['doctor', 'consultant', 'engineer']
new_vocab_file = os.path.join(self._ckpt_and_vocab_dir,
diff --git a/tensorflow/python/estimator/canned/linear_testing_utils.py b/tensorflow/python/estimator/canned/linear_testing_utils.py
index cccb9af4b2..3e9183cf1b 100644
--- a/tensorflow/python/estimator/canned/linear_testing_utils.py
+++ b/tensorflow/python/estimator/canned/linear_testing_utils.py
@@ -2003,7 +2003,7 @@ class BaseLinearWarmStartingTest(object):
# Create a second LinearClassifier, warm-started from the first. Use a
# learning_rate = 0.0 optimizer to check values (use SGD so we don't have
- # accumulator values that change). Use a a new FeatureColumn with a
+ # accumulator values that change). Use a new FeatureColumn with a
# different vocabulary for occupation.
new_vocab_list = ['doctor', 'consultant', 'engineer']
new_vocab_file = os.path.join(self._ckpt_and_vocab_dir,
diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py
index 6da890cd22..17fab3df4d 100644
--- a/tensorflow/python/estimator/estimator.py
+++ b/tensorflow/python/estimator/estimator.py
@@ -55,6 +55,7 @@ from tensorflow.python.training import saver
from tensorflow.python.training import training
from tensorflow.python.training import training_util
from tensorflow.python.util import compat
+from tensorflow.python.util import compat_internal
from tensorflow.python.util import nest
@@ -179,7 +180,7 @@ class Estimator(object):
self._config = config
# Model directory.
- model_dir = compat.path_to_str(model_dir)
+ model_dir = compat_internal.path_to_str(model_dir)
if (model_dir is not None) and (self._config.model_dir is not None):
if model_dir != self._config.model_dir:
# TODO(alanyee): remove this suppression after it is no longer needed
diff --git a/tensorflow/python/estimator/run_config.py b/tensorflow/python/estimator/run_config.py
index 61a537022b..0c636a8da1 100644
--- a/tensorflow/python/estimator/run_config.py
+++ b/tensorflow/python/estimator/run_config.py
@@ -27,7 +27,7 @@ import six
from tensorflow.core.protobuf import config_pb2
from tensorflow.python.platform import tf_logging as logging
from tensorflow.python.training import server_lib
-from tensorflow.python.util import compat
+from tensorflow.python.util import compat_internal
_USE_DEFAULT = object()
@@ -444,7 +444,8 @@ class RunConfig(object):
if tf_config:
logging.info('TF_CONFIG environment variable: %s', tf_config)
- model_dir = _get_model_dir(tf_config, compat.path_to_str(model_dir))
+ model_dir = _get_model_dir(tf_config,
+ compat_internal.path_to_str(model_dir))
RunConfig._replace(
self,
diff --git a/tensorflow/python/keras/_impl/keras/layers/convolutional.py b/tensorflow/python/keras/_impl/keras/layers/convolutional.py
index b2ad4c4b65..2ee0732775 100644
--- a/tensorflow/python/keras/_impl/keras/layers/convolutional.py
+++ b/tensorflow/python/keras/_impl/keras/layers/convolutional.py
@@ -563,7 +563,7 @@ class Conv2DTranspose(tf_convolutional_layers.Conv2DTranspose, Layer):
return dict(list(base_config.items()) + list(config.items()))
-class Conv3DTranspose(tf_convolutional_layers.Conv3D, Layer):
+class Conv3DTranspose(tf_convolutional_layers.Conv3DTranspose, Layer):
"""Transposed convolution layer (sometimes called Deconvolution).
The need for transposed convolutions generally arises
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index c87b7652ad..3a6058054b 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -1602,6 +1602,19 @@ cuda_py_test(
)
cuda_py_test(
+ name = "manip_ops_test",
+ size = "small",
+ srcs = ["manip_ops_test.py"],
+ additional_deps = [
+ "//third_party/py/numpy",
+ "//tensorflow/python:manip_ops",
+ "//tensorflow/python:client_testlib",
+ "//tensorflow/python:framework_for_generated_wrappers",
+ ],
+ tags = ["no_windows_gpu"],
+)
+
+cuda_py_test(
name = "matmul_op_test",
size = "small",
srcs = ["matmul_op_test.py"],
diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py
index aae6d0a36e..7ec4624310 100644
--- a/tensorflow/python/kernel_tests/array_ops_test.py
+++ b/tensorflow/python/kernel_tests/array_ops_test.py
@@ -1162,6 +1162,27 @@ class InvertPermutationTest(test_util.TensorFlowTestCase):
self.assertAllEqual(y.eval(), [2, 4, 3, 0, 1])
+class UnravelIndexTest(test_util.TensorFlowTestCase):
+
+ def testUnravelIndex(self):
+ with self.test_session():
+ for dtype in [dtypes.int32, dtypes.int64]:
+ indices_1 = constant_op.constant(1621, dtype=dtype)
+ dims_1 = constant_op.constant([6, 7, 8, 9], dtype=dtype)
+ out_1 = array_ops.unravel_index(indices_1, dims_1)
+ self.assertAllEqual(out_1.eval(), [3, 1, 4, 1])
+
+ indices_2 = constant_op.constant([1621], dtype=dtype)
+ dims_2 = constant_op.constant([6, 7, 8, 9], dtype=dtype)
+ out_2 = array_ops.unravel_index(indices_2, dims_2)
+ self.assertAllEqual(out_2.eval(), [[3], [1], [4], [1]])
+
+ indices_3 = constant_op.constant([22, 41, 37], dtype=dtype)
+ dims_3 = constant_op.constant([7, 6], dtype=dtype)
+ out_3 = array_ops.unravel_index(indices_3, dims_3)
+ self.assertAllEqual(out_3.eval(), [[3, 6, 6], [4, 5, 1]])
+
+
class GuaranteeConstOpTest(test_util.TensorFlowTestCase):
def testSimple(self):
diff --git a/tensorflow/python/kernel_tests/constant_op_test.py b/tensorflow/python/kernel_tests/constant_op_test.py
index 030c690167..16e56349c4 100644
--- a/tensorflow/python/kernel_tests/constant_op_test.py
+++ b/tensorflow/python/kernel_tests/constant_op_test.py
@@ -454,18 +454,19 @@ class ZerosLikeTest(test.TestCase):
def testZerosLikeCPU(self):
for dtype in [
- dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int8,
- dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16, dtypes_lib.int32,
- dtypes_lib.int64, dtypes_lib.bool, dtypes_lib.complex64,
- dtypes_lib.complex128, dtypes_lib.string
+ dtypes_lib.half, dtypes_lib.float32, dtypes_lib.float64,
+ dtypes_lib.int8, dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16,
+ dtypes_lib.int32, dtypes_lib.int64, dtypes_lib.bool,
+ dtypes_lib.complex64, dtypes_lib.complex128, dtypes_lib.string
]:
self._compareZeros(dtype, fully_defined_shape=False, use_gpu=False)
self._compareZeros(dtype, fully_defined_shape=True, use_gpu=False)
def testZerosLikeGPU(self):
for dtype in [
- dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int32,
- dtypes_lib.bool, dtypes_lib.int64, dtypes_lib.string
+ dtypes_lib.half, dtypes_lib.float32, dtypes_lib.float64,
+ dtypes_lib.int32, dtypes_lib.int64, dtypes_lib.complex64,
+ dtypes_lib.complex128, dtypes_lib.bool
]:
self._compareZeros(dtype, fully_defined_shape=False, use_gpu=True)
self._compareZeros(dtype, fully_defined_shape=True, use_gpu=True)
diff --git a/tensorflow/python/kernel_tests/conv_ops_test.py b/tensorflow/python/kernel_tests/conv_ops_test.py
index 3e9bd3dade..edfb20d6a2 100644
--- a/tensorflow/python/kernel_tests/conv_ops_test.py
+++ b/tensorflow/python/kernel_tests/conv_ops_test.py
@@ -24,6 +24,7 @@ import time
import numpy as np
+from six.moves import xrange # pylint: disable=redefined-builtin
from tensorflow.contrib import layers
from tensorflow.python.client import session as session_lib
from tensorflow.python.framework import constant_op
@@ -519,7 +520,7 @@ class Conv2DTest(test.TestCase):
dilations=[2, 2],
padding="VALID")
- # TODO this currently fails.
+ # TODO(yzhwang): this currently fails.
# self._VerifyValues(tensor_in_sizes=[1, 8, 8, 1],
# filter_in_sizes=[2, 2, 1, 1],
# strides=[4, 4], padding="SAME",
diff --git a/tensorflow/python/kernel_tests/decode_jpeg_op_test.py b/tensorflow/python/kernel_tests/decode_jpeg_op_test.py
index ead55cd03b..89fd26c544 100644
--- a/tensorflow/python/kernel_tests/decode_jpeg_op_test.py
+++ b/tensorflow/python/kernel_tests/decode_jpeg_op_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
import os
import time
+from six.moves import xrange
from tensorflow.python.client import session
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
diff --git a/tensorflow/python/kernel_tests/io_ops_test.py b/tensorflow/python/kernel_tests/io_ops_test.py
index f91875c6f0..61944f7e31 100644
--- a/tensorflow/python/kernel_tests/io_ops_test.py
+++ b/tensorflow/python/kernel_tests/io_ops_test.py
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# -*- coding: utf-8 -*-
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/tensorflow/python/kernel_tests/losses_test.py b/tensorflow/python/kernel_tests/losses_test.py
index 00c6706593..197dbf44af 100644
--- a/tensorflow/python/kernel_tests/losses_test.py
+++ b/tensorflow/python/kernel_tests/losses_test.py
@@ -953,14 +953,14 @@ class MeanPairwiseSquaredErrorTest(test.TestCase):
# Compute the expected loss 'manually'.
total = np.zeros((batch_size,))
for b in range(batch_size):
- for i in range(dims):
- for j in range(dims):
+ for i in range(dims - 1):
+ for j in range(i + 1, dims):
x = self._predictions[b, i].item() - self._predictions[b, j].item()
y = self._labels[b, i].item() - self._labels[b, j].item()
diff = (x - y)
total[b] += (diff * diff)
- self._expected_losses = np.divide(total, 9.0)
+ self._expected_losses = np.divide(total, 3.0)
def testValueErrorThrownWhenWeightIsNone(self):
with self.test_session():
@@ -1059,8 +1059,7 @@ class MeanPairwiseSquaredErrorTest(test.TestCase):
[[4, 8, 12], [1, 2, 3], [4, 5, 6]],
[[8, 1, 3], [7, 8, 9], [10, 11, 12]],
])
- self._test_valid_weights(
- labels, predictions, expected_loss=122.22222)
+ self._test_valid_weights(labels, predictions, expected_loss=137.5)
def test3dWeightedScalar(self):
labels = np.array([
@@ -1073,8 +1072,7 @@ class MeanPairwiseSquaredErrorTest(test.TestCase):
])
weight = 3.0
self._test_valid_weights(
- labels, predictions, expected_loss=weight * 122.22222,
- weights=weight)
+ labels, predictions, expected_loss=weight * 137.5, weights=weight)
def _test_invalid_weights(
self, labels, predictions, weights=1.0):
@@ -1124,7 +1122,9 @@ class MeanPairwiseSquaredErrorTest(test.TestCase):
])
self._test_valid_weights(
# TODO(ptucker): This doesn't look right.
- labels, predictions, expected_loss=9 * 122.22222,
+ labels,
+ predictions,
+ expected_loss=9 * 137.5,
weights=np.ones((2, 3, 3)))
def testLossWithAllZeroBatchSpecificWeights(self):
diff --git a/tensorflow/python/kernel_tests/manip_ops_test.py b/tensorflow/python/kernel_tests/manip_ops_test.py
new file mode 100644
index 0000000000..b8200ac0cb
--- /dev/null
+++ b/tensorflow/python/kernel_tests/manip_ops_test.py
@@ -0,0 +1,138 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for manip_ops."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import errors_impl
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import gradient_checker
+from tensorflow.python.ops import manip_ops
+from tensorflow.python.platform import test as test_lib
+
+# pylint: disable=g-import-not-at-top
+try:
+ from distutils.version import StrictVersion as Version
+ # numpy.roll for multiple shifts was introduced in numpy version 1.12.0
+ NP_ROLL_CAN_MULTISHIFT = Version(np.version.version) >= Version("1.12.0")
+except ImportError:
+ NP_ROLL_CAN_MULTISHIFT = False
+# pylint: enable=g-import-not-at-top
+
+
+class RollTest(test_util.TensorFlowTestCase):
+
+ def _testRoll(self, np_input, shift, axis):
+ expected_roll = np.roll(np_input, shift, axis)
+ with self.test_session():
+ roll = manip_ops.roll(np_input, shift, axis)
+ self.assertAllEqual(roll.eval(), expected_roll)
+
+ def _testGradient(self, np_input, shift, axis):
+ with self.test_session():
+ inx = constant_op.constant(np_input.tolist())
+ xs = list(np_input.shape)
+ y = manip_ops.roll(inx, shift, axis)
+ # Expected y's shape to be the same
+ ys = xs
+ jacob_t, jacob_n = gradient_checker.compute_gradient(
+ inx, xs, y, ys, x_init_value=np_input)
+ self.assertAllClose(jacob_t, jacob_n, rtol=1e-5, atol=1e-5)
+
+ def _testAll(self, np_input, shift, axis):
+ self._testRoll(np_input, shift, axis)
+ if np_input.dtype == np.float32:
+ self._testGradient(np_input, shift, axis)
+
+ def testIntTypes(self):
+ for t in [np.int32, np.int64]:
+ self._testAll(np.random.randint(-100, 100, (5)).astype(t), 3, 0)
+ if NP_ROLL_CAN_MULTISHIFT:
+ self._testAll(
+ np.random.randint(-100, 100, (4, 4, 3)).astype(t), [1, -2, 3],
+ [0, 1, 2])
+ self._testAll(
+ np.random.randint(-100, 100, (4, 2, 1, 3)).astype(t), [0, 1, -2],
+ [1, 2, 3])
+
+ def testFloatTypes(self):
+ for t in [np.float32, np.float64]:
+ self._testAll(np.random.rand(5).astype(t), 2, 0)
+ if NP_ROLL_CAN_MULTISHIFT:
+ self._testAll(np.random.rand(3, 4).astype(t), [1, 2], [1, 0])
+ self._testAll(np.random.rand(1, 3, 4).astype(t), [1, 0, -3], [0, 1, 2])
+
+ def testComplexTypes(self):
+ for t in [np.complex64, np.complex128]:
+ x = np.random.rand(4, 4).astype(t)
+ self._testAll(x + 1j * x, 2, 0)
+ if NP_ROLL_CAN_MULTISHIFT:
+ x = np.random.rand(2, 5).astype(t)
+ self._testAll(x + 1j * x, [1, 2], [1, 0])
+ x = np.random.rand(3, 2, 1, 1).astype(t)
+ self._testAll(x + 1j * x, [2, 1, 1, 0], [0, 3, 1, 2])
+
+ def testRollInputMustVectorHigherRaises(self):
+ tensor = 7
+ shift = 1
+ axis = 0
+ with self.test_session():
+ with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
+ "input must be 1-D or higher"):
+ manip_ops.roll(tensor, shift, axis).eval()
+
+ def testRollAxisMustBeScalarOrVectorRaises(self):
+ tensor = [[1, 2], [3, 4]]
+ shift = 1
+ axis = [[0, 1]]
+ with self.test_session():
+ with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
+ "axis must be a scalar or a 1-D vector"):
+ manip_ops.roll(tensor, shift, axis).eval()
+
+ def testRollShiftMustBeScalarOrVectorRaises(self):
+ tensor = [[1, 2], [3, 4]]
+ shift = [[0, 1]]
+ axis = 1
+ with self.test_session():
+ with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
+ "shift must be a scalar or a 1-D vector"):
+ manip_ops.roll(tensor, shift, axis).eval()
+
+ def testRollShiftAndAxisMustBeSameSizeRaises(self):
+ tensor = [[1, 2], [3, 4]]
+ shift = [1]
+ axis = [0, 1]
+ with self.test_session():
+ with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
+ "shift and axis must have the same size"):
+ manip_ops.roll(tensor, shift, axis).eval()
+
+ def testRollAxisOutOfRangeRaises(self):
+ tensor = [1, 2]
+ shift = 1
+ axis = 1
+ with self.test_session():
+ with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
+ "is out of range"):
+ manip_ops.roll(tensor, shift, axis).eval()
+
+
+if __name__ == "__main__":
+ test_lib.main()
diff --git a/tensorflow/python/kernel_tests/rnn_test.py b/tensorflow/python/kernel_tests/rnn_test.py
index 0c77d1db92..daa42938e6 100644
--- a/tensorflow/python/kernel_tests/rnn_test.py
+++ b/tensorflow/python/kernel_tests/rnn_test.py
@@ -23,6 +23,7 @@ import timeit
import numpy as np
+from six.moves import xrange # pylint: disable=redefined-builtin
from tensorflow.contrib import rnn as contrib_rnn
from tensorflow.core.protobuf import config_pb2
from tensorflow.python.client import session
diff --git a/tensorflow/python/kernel_tests/tensordot_op_test.py b/tensorflow/python/kernel_tests/tensordot_op_test.py
index f1670a47f5..8ad29afd0a 100644
--- a/tensorflow/python/kernel_tests/tensordot_op_test.py
+++ b/tensorflow/python/kernel_tests/tensordot_op_test.py
@@ -66,7 +66,7 @@ class TensordotTest(test_lib.TestCase):
a = [[1, 2], [3, 4]]
b = [[1, 2], [3, 4]]
# Invalid static axes.
- for axes_value in -1, 0, [1], [[1]], [[1], [0, 1]]:
+ for axes_value in -1, 3, [1], [[1]], [[1], [0, 1]]:
with self.assertRaises(ValueError):
math_ops.tensordot(a, b, axes_value)
@@ -91,7 +91,7 @@ class TensordotTest(test_lib.TestCase):
# Test case for 11950
def test_valid_axis(self):
- for axes_value in [1, 2], [[1], [2]]:
+ for axes_value in [1, 2], [[1], [2]], [[], []], 0:
with self.test_session() as sess:
np_a = np.ones((3, 3))
np_b = np.array([2, 3, 1])[None, None]
@@ -105,29 +105,29 @@ class TensordotTest(test_lib.TestCase):
self.assertAllEqual(tf_ans, np_ans)
def test_partial_shape_inference(self):
- a = array_ops.placeholder(dtypes.float32)
- b = array_ops.placeholder(dtypes.float32)
- axes = ([1], [0])
- output = math_ops.tensordot(a, b, axes)
- self.assertEqual(output.get_shape().ndims, None)
- a.set_shape([None, 2])
- b.set_shape([2, 3])
- output = math_ops.tensordot(a, b, axes)
- output_shape = output.get_shape()
- self.assertEqual(output_shape.ndims, 2)
- output_shape = output_shape.as_list()
- self.assertEqual(output_shape[0], None)
- self.assertEqual(output_shape[1], 3)
- a = array_ops.placeholder(dtypes.float32)
- b = array_ops.placeholder(dtypes.float32)
- a.set_shape([2, 2])
- b.set_shape([2, None])
- output = math_ops.tensordot(a, b, axes)
- output_shape = output.get_shape()
- self.assertEqual(output_shape.ndims, 2)
- output_shape = output_shape.as_list()
- self.assertEqual(output_shape[0], 2)
- self.assertEqual(output_shape[1], None)
+ for axes in ([1], [0]), 1:
+ a = array_ops.placeholder(dtypes.float32)
+ b = array_ops.placeholder(dtypes.float32)
+ output = math_ops.tensordot(a, b, axes)
+ self.assertEqual(output.get_shape().ndims, None)
+ a.set_shape([None, 2])
+ b.set_shape([2, 3])
+ output = math_ops.tensordot(a, b, axes)
+ output_shape = output.get_shape()
+ self.assertEqual(output_shape.ndims, 2)
+ output_shape = output_shape.as_list()
+ self.assertEqual(output_shape[0], None)
+ self.assertEqual(output_shape[1], 3)
+ a = array_ops.placeholder(dtypes.float32)
+ b = array_ops.placeholder(dtypes.float32)
+ a.set_shape([2, 2])
+ b.set_shape([2, None])
+ output = math_ops.tensordot(a, b, axes)
+ output_shape = output.get_shape()
+ self.assertEqual(output_shape.ndims, 2)
+ output_shape = output_shape.as_list()
+ self.assertEqual(output_shape[0], 2)
+ self.assertEqual(output_shape[1], None)
def _get_tensordot_tests(dtype_, rank_a_, rank_b_, num_dims_, dynamic_shape_):
@@ -196,8 +196,8 @@ def _get_tensordot_tests(dtype_, rank_a_, rank_b_, num_dims_, dynamic_shape_):
low=-1.0, high=1.0, size=np.prod(shape)).reshape(shape).astype(dtype_)
b_np = np.random.uniform(
low=-1.0, high=1.0, size=np.prod(shape)).reshape(shape).astype(dtype_)
- all_axes = [1]
- if a_np.ndim > 1:
+ all_axes = [0, 1]
+ if a_np.ndim > 2:
all_axes.append(a_np.ndim - 1)
for axes in all_axes:
np_ans = np.tensordot(a_np, b_np, axes=axes)
diff --git a/tensorflow/python/kernel_tests/topk_op_test.py b/tensorflow/python/kernel_tests/topk_op_test.py
index efb5b9f364..6ab931fdb9 100644
--- a/tensorflow/python/kernel_tests/topk_op_test.py
+++ b/tensorflow/python/kernel_tests/topk_op_test.py
@@ -58,7 +58,7 @@ class TopKTest(test.TestCase):
# Do some special casing of equality of indices: if indices
# are not the same, but values are floating type, ensure that
# the values are within epsilon of each other.
- if not np.issubdtype(np_expected_values.dtype, np.float):
+ if not np.issubdtype(np_expected_values.dtype, np.floating):
# Values are not floating point type; check indices exactly
self.assertAllEqual(np_expected_indices, indices)
else:
diff --git a/tensorflow/python/layers/convolutional.py b/tensorflow/python/layers/convolutional.py
index 79c421f4c9..e8dba3cea3 100644
--- a/tensorflow/python/layers/convolutional.py
+++ b/tensorflow/python/layers/convolutional.py
@@ -1094,7 +1094,7 @@ class SeparableConv1D(_SeparableConv):
strides = (1, 1, 1) + self.strides
spatial_start_dim = 2
- # Explictly broadcast inputs and kernels to 4D.
+ # Explicitly broadcast inputs and kernels to 4D.
# TODO(fchollet): refactor when a native separable_conv1d op is available.
inputs = array_ops.expand_dims(inputs, spatial_start_dim)
depthwise_kernel = array_ops.expand_dims(self.depthwise_kernel, 0)
@@ -1904,6 +1904,7 @@ class Conv3DTranspose(Conv3D):
dtype=self.dtype)
else:
self.bias = None
+ self.built = True
def call(self, inputs):
inputs_shape = array_ops.shape(inputs)
@@ -1974,6 +1975,8 @@ class Conv3DTranspose(Conv3D):
if self.use_bias:
outputs_shape = outputs.shape.as_list()
+ if outputs_shape[0] is None:
+ outputs_shape[0] = -1
if self.data_format == 'channels_first':
outputs_4d = array_ops.reshape(outputs, [
outputs_shape[0], outputs_shape[1],
@@ -2007,11 +2010,11 @@ class Conv3DTranspose(Conv3D):
output_shape[c_axis] = self.filters
output_shape[d_axis] = utils.deconv_output_length(
- output_shape[d_axis], stride_d, kernel_d, self.padding)
+ output_shape[d_axis], kernel_d, self.padding, stride_d)
output_shape[h_axis] = utils.deconv_output_length(
- output_shape[h_axis], stride_h, kernel_h, self.padding)
+ output_shape[h_axis], kernel_h, self.padding, stride_h)
output_shape[w_axis] = utils.deconv_output_length(
- output_shape[w_axis], stride_w, kernel_w, self.padding)
+ output_shape[w_axis], kernel_w, self.padding, stride_w)
return tensor_shape.TensorShape(output_shape)
diff --git a/tensorflow/python/layers/utils.py b/tensorflow/python/layers/utils.py
index e8be347799..7407d9a7b3 100644
--- a/tensorflow/python/layers/utils.py
+++ b/tensorflow/python/layers/utils.py
@@ -81,7 +81,7 @@ def normalize_tuple(value, n, name):
for single_value in value_tuple:
try:
int(single_value)
- except ValueError:
+ except (ValueError, TypeError):
raise ValueError('The `' + name + '` argument must be a tuple of ' +
str(n) + ' integers. Received: ' + str(value) + ' '
'including element ' + str(single_value) + ' of type' +
diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index e3902f5a8a..ad409ad7e5 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -35,6 +35,7 @@ See the @{$python/array_ops} guide.
@@reshape
@@squeeze
@@expand_dims
+@@unravel_index
@@meshgrid
@@slice
@@strided_slice
@@ -1589,9 +1590,9 @@ def zeros_like(tensor, dtype=None, name=None, optimize=True):
Args:
tensor: A `Tensor`.
- dtype: A type for the returned `Tensor`. Must be `float32`, `float64`,
- `int8`, `uint8`, `int16`, `uint16`, int32`, `int64`,
- `complex64`, `complex128` or `bool`.
+ dtype: A type for the returned `Tensor`. Must be `float16`, `float32`,
+ `float64`, `int8`, `uint8`, `int16`, `uint16`, `int32`, `int64`,
+ `complex64`, `complex128`, `bool` or `string`.
name: A name for the operation (optional).
optimize: if true, attempt to statically determine the shape of 'tensor'
and encode it as a constant.
diff --git a/tensorflow/python/ops/functional_ops.py b/tensorflow/python/ops/functional_ops.py
index 7dbccf1caf..ac03d30fcd 100644
--- a/tensorflow/python/ops/functional_ops.py
+++ b/tensorflow/python/ops/functional_ops.py
@@ -458,7 +458,7 @@ def scan(fn, elems, initializer=None, parallel_iterations=10, back_prop=True,
For example, if `elems` is `(t1, [t2, t3])` and `initializer` is
`[i1, i2]` then an appropriate signature for `fn` in `python2` is:
- `fn = lambda (acc_p1, acc_p2), (t1 [t2, t3]):` and `fn` must return a list,
+ `fn = lambda (acc_p1, acc_p2), (t1, [t2, t3]):` and `fn` must return a list,
`[acc_n1, acc_n2]`. An alternative correct signature for `fn`, and the
one that works in `python3`, is:
`fn = lambda a, t:`, where `a` and `t` correspond to the input tuples.
diff --git a/tensorflow/python/ops/gradients_impl.py b/tensorflow/python/ops/gradients_impl.py
index 28b26a09a5..9f06c0ee1f 100644
--- a/tensorflow/python/ops/gradients_impl.py
+++ b/tensorflow/python/ops/gradients_impl.py
@@ -44,6 +44,7 @@ from tensorflow.python.ops import image_grad # pylint: disable=unused-import
from tensorflow.python.ops import linalg_grad # pylint: disable=unused-import
from tensorflow.python.ops import linalg_ops # pylint: disable=unused-import
from tensorflow.python.ops import logging_ops # pylint: disable=unused-import
+from tensorflow.python.ops import manip_grad # pylint: disable=unused-import
from tensorflow.python.ops import math_grad # pylint: disable=unused-import
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import resource_variable_ops
diff --git a/tensorflow/python/ops/image_ops.py b/tensorflow/python/ops/image_ops.py
index 3b0b5a978c..de12c5f63f 100644
--- a/tensorflow/python/ops/image_ops.py
+++ b/tensorflow/python/ops/image_ops.py
@@ -49,6 +49,10 @@ See the @{$python/image} guide.
@@grayscale_to_rgb
@@hsv_to_rgb
@@rgb_to_hsv
+@@rgb_to_yiq
+@@yiq_to_rgb
+@@rgb_to_yuv
+@@yuv_to_rgb
@@convert_image_dtype
@@adjust_brightness
@@random_brightness
diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py
index 2c231ef56c..14a38f25d1 100644
--- a/tensorflow/python/ops/image_ops_impl.py
+++ b/tensorflow/python/ops/image_ops_impl.py
@@ -1508,7 +1508,7 @@ def sample_distorted_bounding_box(image_size,
bounding_boxes,
seed=None,
seed2=None,
- min_object_covered=None,
+ min_object_covered=0.1,
aspect_ratio_range=None,
area_range=None,
max_attempts=None,
@@ -1669,3 +1669,107 @@ def non_max_suppression(boxes,
return gen_image_ops._non_max_suppression_v2(boxes, scores, max_output_size,
iou_threshold)
# pylint: enable=protected-access
+
+
+_rgb_to_yiq_kernel = [[0.299, 0.59590059,
+ 0.2115], [0.587, -0.27455667, -0.52273617],
+ [0.114, -0.32134392, 0.31119955]]
+
+
+def rgb_to_yiq(images):
+ """Converts one or more images from RGB to YIQ.
+
+ Outputs a tensor of the same shape as the `images` tensor, containing the YIQ
+ value of the pixels.
+ The output is only well defined if the value in images are in [0,1].
+
+ Args:
+ images: 2-D or higher rank. Image data to convert. Last dimension must be
+ size 3.
+
+ Returns:
+ images: tensor with the same shape as `images`.
+ """
+ images = ops.convert_to_tensor(images, name='images')
+ kernel = ops.convert_to_tensor(
+ _rgb_to_yiq_kernel, dtype=images.dtype, name='kernel')
+ ndims = images.get_shape().ndims
+ return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]])
+
+
+_yiq_to_rgb_kernel = [[1, 1, 1], [0.95598634, -0.27201283, -1.10674021],
+ [0.6208248, -0.64720424, 1.70423049]]
+
+
+def yiq_to_rgb(images):
+ """Converts one or more images from YIQ to RGB.
+
+ Outputs a tensor of the same shape as the `images` tensor, containing the RGB
+ value of the pixels.
+ The output is only well defined if the Y value in images are in [0,1],
+ I value are in [-0.5957,0.5957] and Q value are in [-0.5226,0.5226].
+
+ Args:
+ images: 2-D or higher rank. Image data to convert. Last dimension must be
+ size 3.
+
+ Returns:
+ images: tensor with the same shape as `images`.
+ """
+ images = ops.convert_to_tensor(images, name='images')
+ kernel = ops.convert_to_tensor(
+ _yiq_to_rgb_kernel, dtype=images.dtype, name='kernel')
+ ndims = images.get_shape().ndims
+ return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]])
+
+
+_rgb_to_yuv_kernel = [[0.299, -0.14714119,
+ 0.61497538], [0.587, -0.28886916, -0.51496512],
+ [0.114, 0.43601035, -0.10001026]]
+
+
+def rgb_to_yuv(images):
+ """Converts one or more images from RGB to YUV.
+
+ Outputs a tensor of the same shape as the `images` tensor, containing the YUV
+ value of the pixels.
+ The output is only well defined if the value in images are in [0,1].
+
+ Args:
+ images: 2-D or higher rank. Image data to convert. Last dimension must be
+ size 3.
+
+ Returns:
+ images: tensor with the same shape as `images`.
+ """
+ images = ops.convert_to_tensor(images, name='images')
+ kernel = ops.convert_to_tensor(
+ _rgb_to_yuv_kernel, dtype=images.dtype, name='kernel')
+ ndims = images.get_shape().ndims
+ return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]])
+
+
+_yuv_to_rgb_kernel = [[1, 1, 1], [0, -0.394642334, 2.03206185],
+ [1.13988303, -0.58062185, 0]]
+
+
+def yuv_to_rgb(images):
+ """Converts one or more images from YUV to RGB.
+
+ Outputs a tensor of the same shape as the `images` tensor, containing the RGB
+ value of the pixels.
+ The output is only well defined if the Y value in images are in [0,1],
+ U and V value are in [-0.5,0.5].
+
+ Args:
+ images: 2-D or higher rank. Image data to convert. Last dimension must be
+ size 3.
+
+ Returns:
+ images: tensor with the same shape as `images`.
+ """
+ images = ops.convert_to_tensor(images, name='images')
+ kernel = ops.convert_to_tensor(
+ _yuv_to_rgb_kernel, dtype=images.dtype, name='kernel')
+ ndims = images.get_shape().ndims
+ return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]])
diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py
index 47dd8231c0..b12bd3d5b0 100644
--- a/tensorflow/python/ops/image_ops_test.py
+++ b/tensorflow/python/ops/image_ops_test.py
@@ -85,6 +85,64 @@ class RGBToHSVTest(test_util.TensorFlowTestCase):
self.assertAllClose(rgb_tf, rgb_np)
+class RGBToYIQTest(test_util.TensorFlowTestCase):
+
+ def testBatch(self):
+ # Build an arbitrary RGB image
+ np.random.seed(7)
+ batch_size = 5
+ shape = (batch_size, 2, 7, 3)
+
+ for nptype in [np.float32, np.float64]:
+ inp = np.random.rand(*shape).astype(nptype)
+
+ # Convert to YIQ and back, as a batch and individually
+ with self.test_session(use_gpu=True) as sess:
+ batch0 = constant_op.constant(inp)
+ batch1 = image_ops.rgb_to_yiq(batch0)
+ batch2 = image_ops.yiq_to_rgb(batch1)
+ split0 = array_ops.unstack(batch0)
+ split1 = list(map(image_ops.rgb_to_yiq, split0))
+ split2 = list(map(image_ops.yiq_to_rgb, split1))
+ join1 = array_ops.stack(split1)
+ join2 = array_ops.stack(split2)
+ batch1, batch2, join1, join2 = sess.run([batch1, batch2, join1, join2])
+
+ # Verify that processing batch elements together is the same as separate
+ self.assertAllClose(batch1, join1, rtol=1e-4, atol=1e-4)
+ self.assertAllClose(batch2, join2, rtol=1e-4, atol=1e-4)
+ self.assertAllClose(batch2, inp, rtol=1e-4, atol=1e-4)
+
+
+class RGBToYUVTest(test_util.TensorFlowTestCase):
+
+ def testBatch(self):
+ # Build an arbitrary RGB image
+ np.random.seed(7)
+ batch_size = 5
+ shape = (batch_size, 2, 7, 3)
+
+ for nptype in [np.float32, np.float64]:
+ inp = np.random.rand(*shape).astype(nptype)
+
+ # Convert to YUV and back, as a batch and individually
+ with self.test_session(use_gpu=True) as sess:
+ batch0 = constant_op.constant(inp)
+ batch1 = image_ops.rgb_to_yuv(batch0)
+ batch2 = image_ops.yuv_to_rgb(batch1)
+ split0 = array_ops.unstack(batch0)
+ split1 = list(map(image_ops.rgb_to_yuv, split0))
+ split2 = list(map(image_ops.yuv_to_rgb, split1))
+ join1 = array_ops.stack(split1)
+ join2 = array_ops.stack(split2)
+ batch1, batch2, join1, join2 = sess.run([batch1, batch2, join1, join2])
+
+ # Verify that processing batch elements together is the same as separate
+ self.assertAllClose(batch1, join1, rtol=1e-4, atol=1e-4)
+ self.assertAllClose(batch2, join2, rtol=1e-4, atol=1e-4)
+ self.assertAllClose(batch2, inp, rtol=1e-4, atol=1e-4)
+
+
class GrayscaleToRGBTest(test_util.TensorFlowTestCase):
def _RGBToGrayscale(self, images):
@@ -1839,6 +1897,26 @@ class SelectDistortedCropBoxTest(test_util.TensorFlowTestCase):
self.assertAllEqual([3], end.get_shape().as_list())
self.assertAllEqual([1, 1, 4], bbox_for_drawing.get_shape().as_list())
+ def testDefaultMinObjectCovered(self):
+ # By default min_object_covered=0.1 if not provided
+ with self.test_session(use_gpu=True):
+ image_size = constant_op.constant(
+ [40, 50, 1], shape=[3], dtype=dtypes.int32)
+ bounding_box = constant_op.constant(
+ [0.0, 0.0, 1.0, 1.0],
+ shape=[4],
+ dtype=dtypes.float32,
+ )
+ begin, end, bbox_for_drawing = image_ops.sample_distorted_bounding_box(
+ image_size=image_size,
+ bounding_boxes=bounding_box,
+ aspect_ratio_range=(0.75, 1.33),
+ area_range=(0.05, 1.0))
+
+ self.assertAllEqual([3], begin.get_shape().as_list())
+ self.assertAllEqual([3], end.get_shape().as_list())
+ self.assertAllEqual([1, 1, 4], bbox_for_drawing.get_shape().as_list())
+
class ResizeImagesTest(test_util.TensorFlowTestCase):
@@ -3092,6 +3170,40 @@ class NonMaxSuppressionTest(test_util.TensorFlowTestCase):
boxes, scores, max_output_size, iou_threshold).eval()
self.assertAllClose(selected_indices, [3, 0, 5])
+ def testInvalidShape(self):
+ # The boxes should be 2D of shape [num_boxes, 4].
+ with self.assertRaisesRegexp(ValueError,
+ "Shape must be rank 2 but is rank 1"):
+ boxes = constant_op.constant([0.0, 0.0, 1.0, 1.0])
+ scores = constant_op.constant([0.9])
+ image_ops.non_max_suppression(boxes, scores, 3, 0.5)
+
+ with self.assertRaisesRegexp(ValueError, "Dimension must be 4 but is 3"):
+ boxes = constant_op.constant([[0.0, 0.0, 1.0]])
+ scores = constant_op.constant([0.9])
+ image_ops.non_max_suppression(boxes, scores, 3, 0.5)
+
+ # The scores should be 1D of shape [num_boxes].
+ with self.assertRaisesRegexp(ValueError,
+ "Shape must be rank 1 but is rank 2"):
+ boxes = constant_op.constant([[0.0, 0.0, 1.0, 1.0]])
+ scores = constant_op.constant([[0.9]])
+ image_ops.non_max_suppression(boxes, scores, 3, 0.5)
+
+ # The max_output_size should be a scaler (0-D).
+ with self.assertRaisesRegexp(ValueError,
+ "Shape must be rank 0 but is rank 1"):
+ boxes = constant_op.constant([[0.0, 0.0, 1.0, 1.0]])
+ scores = constant_op.constant([0.9])
+ image_ops.non_max_suppression(boxes, scores, [3], 0.5)
+
+ # The iou_threshold should be a scaler (0-D).
+ with self.assertRaisesRegexp(ValueError,
+ "Shape must be rank 0 but is rank 2"):
+ boxes = constant_op.constant([[0.0, 0.0, 1.0, 1.0]])
+ scores = constant_op.constant([0.9])
+ image_ops.non_max_suppression(boxes, scores, 3, [[0.5]])
+
if __name__ == "__main__":
googletest.main()
diff --git a/tensorflow/python/ops/linalg_grad.py b/tensorflow/python/ops/linalg_grad.py
index 13a32c83d9..3cbbf3412a 100644
--- a/tensorflow/python/ops/linalg_grad.py
+++ b/tensorflow/python/ops/linalg_grad.py
@@ -277,20 +277,28 @@ def _SvdGrad(op, grad_s, grad_u, grad_v):
# https://j-towns.github.io/papers/svd-derivative.pdf
a = op.inputs[0]
a_shape = a.get_shape().with_rank_at_least(2)
+ grad_s_mat = array_ops.matrix_diag(grad_s)
- if op.get_attr("compute_uv"):
- # TODO(rmlarsen): Make this work with complex types.
- if a.dtype.is_complex:
- raise NotImplementedError(
- "SVD gradient is not implemented for complex types and "
- "compute_uv=True.")
- grad_u_shape = grad_u.get_shape().with_rank_at_least(2)
- grad_v_shape = grad_v.get_shape().with_rank_at_least(2)
- m = a_shape[-2].merge_with(grad_u_shape[-2])
- n = a_shape[-1].merge_with(grad_v_shape[-2])
- batch_shape = a_shape[:-2].merge_with(grad_u_shape[:-2]).merge_with(
- grad_v_shape[:-2])
- a_shape = batch_shape.concatenate([m, n])
+ if not op.get_attr("compute_uv"):
+ s, u, v = linalg_ops.svd(a, compute_uv=True)
+ grad_a = math_ops.matmul(u, math_ops.matmul(grad_s_mat, v, adjoint_b=True))
+ grad_a.set_shape(a_shape)
+ return grad_a
+
+ full_matrices = op.get_attr("full_matrices")
+
+ # TODO(rmlarsen): Make this work with complex types.
+ if a.dtype.is_complex:
+ raise NotImplementedError(
+ "SVD gradient is not implemented for complex types and "
+ "compute_uv=True.")
+ grad_u_shape = grad_u.get_shape().with_rank_at_least(2)
+ grad_v_shape = grad_v.get_shape().with_rank_at_least(2)
+ m = a_shape[-2].merge_with(grad_u_shape[-2])
+ n = a_shape[-1].merge_with(grad_v_shape[-2])
+ batch_shape = a_shape[:-2].merge_with(grad_u_shape[:-2]).merge_with(
+ grad_v_shape[:-2])
+ a_shape = batch_shape.concatenate([m, n])
m = a_shape[-2].value
n = a_shape[-1].value
@@ -300,12 +308,9 @@ def _SvdGrad(op, grad_s, grad_u, grad_v):
"SVD gradient has not been implemented for input with unknown "
"inner matrix shape.")
- if not op.get_attr("compute_uv"):
- s, u, v = linalg_ops.svd(a, compute_uv=True, full_matrices=True)
- else:
- s = op.outputs[0]
- u = op.outputs[1]
- v = op.outputs[2]
+ s = op.outputs[0]
+ u = op.outputs[1]
+ v = op.outputs[2]
use_adjoint = False
if m > n:
@@ -317,19 +322,7 @@ def _SvdGrad(op, grad_s, grad_u, grad_v):
grad_u, grad_v = grad_v, grad_u
with ops.control_dependencies([grad_s, grad_u, grad_v]):
- grad_s_mat = array_ops.matrix_diag(grad_s)
- if not op.get_attr("compute_uv"):
- if use_adjoint:
- grad_a = math_ops.matmul(
- v[..., :, :m], math_ops.matmul(u, grad_s_mat), adjoint_b=True)
- else:
- grad_a = math_ops.matmul(u,
- math_ops.matmul(
- grad_s_mat, v[..., :, :m], adjoint_b=True))
- grad_a.set_shape(a_shape)
- return grad_a
-
- if op.get_attr("full_matrices") and abs(m - n) > 1:
+ if full_matrices and abs(m - n) > 1:
raise NotImplementedError(
"svd gradient is not implemented for abs(m - n) > 1 "
"when full_matrices is True")
@@ -371,7 +364,7 @@ def _SvdGrad(op, grad_s, grad_u, grad_v):
gv1t_v1 = math_ops.matmul(gv1t, v1)
term2_nous = gv1t - math_ops.matmul(gv1t_v1, v1, adjoint_b=True)
- if op.get_attr("full_matrices"):
+ if full_matrices:
v2 = v[..., :, m:n]
grad_v2 = grad_v[..., :, m:n]
diff --git a/tensorflow/python/ops/losses/losses_impl.py b/tensorflow/python/ops/losses/losses_impl.py
index e75a9b22e4..84afbf0627 100644
--- a/tensorflow/python/ops/losses/losses_impl.py
+++ b/tensorflow/python/ops/losses/losses_impl.py
@@ -547,12 +547,13 @@ def mean_pairwise_squared_error(
num_present_per_batch = _num_present(diffs, weights, per_batch=True)
term1 = 2.0 * _safe_div(sum_squares_diff_per_batch,
- num_present_per_batch)
+ num_present_per_batch - 1)
sum_diff = math_ops.reduce_sum(
diffs, reduction_indices=reduction_indices, keep_dims=True)
- term2 = 2.0 * _safe_div(math_ops.square(sum_diff),
- math_ops.square(num_present_per_batch))
+ term2 = 2.0 * _safe_div(
+ math_ops.square(sum_diff),
+ math_ops.multiply(num_present_per_batch, num_present_per_batch - 1))
weighted_losses = math_ops.multiply(term1 - term2, weights)
loss = math_ops.reduce_sum(weighted_losses)
diff --git a/tensorflow/python/ops/manip_grad.py b/tensorflow/python/ops/manip_grad.py
new file mode 100644
index 0000000000..bb2069359d
--- /dev/null
+++ b/tensorflow/python/ops/manip_grad.py
@@ -0,0 +1,31 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Gradients for operators defined in manip_ops.py."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import manip_ops
+
+
+@ops.RegisterGradient("Roll")
+def _RollGrad(op, grad):
+ # The gradient is just the roll reversed
+ shift = op.inputs[1]
+ axis = op.inputs[2]
+ roll_grad = manip_ops.roll(grad, -shift, axis)
+ return roll_grad, None, None
diff --git a/tensorflow/python/ops/manip_ops.py b/tensorflow/python/ops/manip_ops.py
new file mode 100644
index 0000000000..91e15b47b9
--- /dev/null
+++ b/tensorflow/python/ops/manip_ops.py
@@ -0,0 +1,38 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Operators for manipulating tensors.
+
+@@roll
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.ops import gen_manip_ops as _gen_manip_ops
+from tensorflow.python.util.all_util import remove_undocumented
+
+
+# pylint: disable=protected-access
+def roll(input, shift, axis): # pylint: disable=redefined-builtin
+ return _gen_manip_ops.roll(input, shift, axis)
+
+
+roll.__doc__ = _gen_manip_ops.roll.__doc__
+# pylint: enable=protected-access
+
+_allowed_symbols = ['roll']
+
+remove_undocumented(__name__, allowed_exception_list=_allowed_symbols)
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index 827e3caa36..9a8ac93de9 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -2826,10 +2826,14 @@ def tensordot(a, b, axes, name=None):
"""Generates two sets of contraction axes for the two tensor arguments."""
a_shape = a.get_shape()
if isinstance(axes, compat.integral_types):
- if axes < 1:
- raise ValueError("'axes' must be at least 1.")
+ if axes < 0:
+ raise ValueError("'axes' must be at least 0.")
if a_shape.ndims is not None:
- return range(a_shape.ndims - axes, a_shape.ndims), range(axes)
+ if axes > a_shape.ndims:
+ raise ValueError("'axes' must not be larger than the number of "
+ "dimensions of tensor %s." % a)
+ return (list(xrange(a_shape.ndims - axes, a_shape.ndims)),
+ list(xrange(axes)))
else:
rank = array_ops.rank(a)
return (range(rank - axes, rank, dtype=dtypes.int32),
diff --git a/tensorflow/python/ops/rnn.py b/tensorflow/python/ops/rnn.py
index 24c6f64f0a..da80e72071 100644
--- a/tensorflow/python/ops/rnn.py
+++ b/tensorflow/python/ops/rnn.py
@@ -1127,6 +1127,12 @@ def raw_rnn(cell, loop_fn,
def _copy_some_through(current, candidate):
"""Copy some tensors through via array_ops.where."""
def copy_fn(cur_i, cand_i):
+ # TensorArray and scalar get passed through.
+ if isinstance(cur_i, tensor_array_ops.TensorArray):
+ return cand_i
+ if cur_i.shape.ndims == 0:
+ return cand_i
+ # Otherwise propagate the old or the new value.
with ops.colocate_with(cand_i):
return array_ops.where(elements_finished, cur_i, cand_i)
return nest.map_structure(copy_fn, current, candidate)
diff --git a/tensorflow/python/ops/standard_ops.py b/tensorflow/python/ops/standard_ops.py
index 30bf4e4ef1..009d1dc3b9 100644
--- a/tensorflow/python/ops/standard_ops.py
+++ b/tensorflow/python/ops/standard_ops.py
@@ -25,6 +25,7 @@ import sys as _sys
# Imports the following modules so that @RegisterGradient get executed.
from tensorflow.python.ops import array_grad
from tensorflow.python.ops import data_flow_grad
+from tensorflow.python.ops import manip_grad
from tensorflow.python.ops import math_grad
from tensorflow.python.ops import sparse_grad
from tensorflow.python.ops import spectral_grad
@@ -42,11 +43,13 @@ from tensorflow.python.ops.special_math_ops import *
# TODO(vrv): Switch to import * once we're okay with exposing the module.
from tensorflow.python.ops.confusion_matrix import confusion_matrix
from tensorflow.python.ops.control_flow_ops import Assert
+from tensorflow.python.ops.control_flow_ops import case
+from tensorflow.python.ops.control_flow_ops import cond
from tensorflow.python.ops.control_flow_ops import group
from tensorflow.python.ops.control_flow_ops import no_op
+# pylint: disable=redefined-builtin
from tensorflow.python.ops.control_flow_ops import tuple
-from tensorflow.python.ops.control_flow_ops import cond
-from tensorflow.python.ops.control_flow_ops import case
+# pylint: enable=redefined-builtin
from tensorflow.python.ops.control_flow_ops import while_loop
from tensorflow.python.ops.data_flow_ops import *
from tensorflow.python.ops.functional_ops import *
@@ -59,6 +62,7 @@ from tensorflow.python.ops.logging_ops import Print
from tensorflow.python.ops.logging_ops import get_summary_op
from tensorflow.python.ops.lookup_ops import initialize_all_tables
from tensorflow.python.ops.lookup_ops import tables_initializer
+from tensorflow.python.ops.manip_ops import *
from tensorflow.python.ops.math_ops import *
from tensorflow.python.ops.numerics import *
from tensorflow.python.ops.parsing_ops import *
@@ -105,6 +109,7 @@ from tensorflow.python.ops import init_ops as _init_ops
from tensorflow.python.ops import io_ops as _io_ops
from tensorflow.python.ops import linalg_ops as _linalg_ops
from tensorflow.python.ops import logging_ops as _logging_ops
+from tensorflow.python.ops import manip_ops as _manip_ops
from tensorflow.python.ops import math_ops as _math_ops
from tensorflow.python.ops import numerics as _numerics
from tensorflow.python.ops import parsing_ops as _parsing_ops
@@ -264,34 +269,36 @@ _allowed_symbols = (_allowed_symbols_array_ops +
_allowed_symbols_misc +
_allowed_symbols_partitioned_variables)
-remove_undocumented(__name__, _allowed_symbols,
- [_sys.modules[__name__],
- _array_ops,
- _check_ops,
- _clip_ops,
- _confusion_matrix,
- _control_flow_ops,
- _constant_op,
- _data_flow_ops,
- _functional_ops,
- _gradients,
- _histogram_ops,
- _init_ops,
- _io_ops,
- _linalg_ops,
- _logging_ops,
- _math_ops,
- _numerics,
- _parsing_ops,
- _partitioned_variables,
- _random_ops,
- _script_ops,
- _session_ops,
- _sparse_ops,
- _special_math_ops,
- _state_ops,
- _string_ops,
- _template,
- _tensor_array_ops,
- _variable_scope,
- _variables,])
+remove_undocumented(__name__, _allowed_symbols, [
+ _sys.modules[__name__],
+ _array_ops,
+ _check_ops,
+ _clip_ops,
+ _confusion_matrix,
+ _control_flow_ops,
+ _constant_op,
+ _data_flow_ops,
+ _functional_ops,
+ _gradients,
+ _histogram_ops,
+ _init_ops,
+ _io_ops,
+ _linalg_ops,
+ _logging_ops,
+ _manip_ops,
+ _math_ops,
+ _numerics,
+ _parsing_ops,
+ _partitioned_variables,
+ _random_ops,
+ _script_ops,
+ _session_ops,
+ _sparse_ops,
+ _special_math_ops,
+ _state_ops,
+ _string_ops,
+ _template,
+ _tensor_array_ops,
+ _variable_scope,
+ _variables,
+])
diff --git a/tensorflow/python/saved_model/loader_impl.py b/tensorflow/python/saved_model/loader_impl.py
index ddfd6be6da..bebf1d5e0d 100644
--- a/tensorflow/python/saved_model/loader_impl.py
+++ b/tensorflow/python/saved_model/loader_impl.py
@@ -235,13 +235,10 @@ def load(sess, tags, export_dir, **saver_kwargs):
asset_tensors_dictionary = _get_asset_tensors(export_dir,
meta_graph_def_to_load)
- main_op_tensor = _get_main_op_tensor(meta_graph_def_to_load)
+ main_op_tensor = (
+ _get_main_op_tensor(meta_graph_def_to_load) or
+ (_get_legacy_init_op_tensor(meta_graph_def_to_load)))
if main_op_tensor is not None:
sess.run(fetches=[main_op_tensor], feed_dict=asset_tensors_dictionary)
- else:
- legacy_init_op_tensor = _get_legacy_init_op_tensor(meta_graph_def_to_load)
- if legacy_init_op_tensor is not None:
- sess.run(
- fetches=[legacy_init_op_tensor], feed_dict=asset_tensors_dictionary)
return meta_graph_def_to_load
diff --git a/tensorflow/python/tools/freeze_graph.py b/tensorflow/python/tools/freeze_graph.py
index 0ddf09260b..affa97062a 100644
--- a/tensorflow/python/tools/freeze_graph.py
+++ b/tensorflow/python/tools/freeze_graph.py
@@ -72,7 +72,8 @@ def freeze_graph_with_def_protos(input_graph_def,
variable_names_blacklist="",
input_meta_graph_def=None,
input_saved_model_dir=None,
- saved_model_tags=None):
+ saved_model_tags=None,
+ checkpoint_version=saver_pb2.SaverDef.V2):
"""Converts all variables in a graph and checkpoint into constants."""
del restore_op_name, filename_tensor_name # Unused by updated loading code.
@@ -100,7 +101,8 @@ def freeze_graph_with_def_protos(input_graph_def,
_ = importer.import_graph_def(input_graph_def, name="")
with session.Session() as sess:
if input_saver_def:
- saver = saver_lib.Saver(saver_def=input_saver_def)
+ saver = saver_lib.Saver(
+ saver_def=input_saver_def, write_version=checkpoint_version)
saver.restore(sess, input_checkpoint)
elif input_meta_graph_def:
restorer = saver_lib.import_meta_graph(
@@ -124,7 +126,8 @@ def freeze_graph_with_def_protos(input_graph_def,
# 'global_step' or a similar housekeeping element) so skip it.
continue
var_list[key] = tensor
- saver = saver_lib.Saver(var_list=var_list)
+ saver = saver_lib.Saver(
+ var_list=var_list, write_version=checkpoint_version)
saver.restore(sess, input_checkpoint)
if initializer_nodes:
sess.run(initializer_nodes.split(","))
@@ -217,7 +220,8 @@ def freeze_graph(input_graph,
variable_names_blacklist="",
input_meta_graph=None,
input_saved_model_dir=None,
- saved_model_tags=tag_constants.SERVING):
+ saved_model_tags=tag_constants.SERVING,
+ checkpoint_version=saver_pb2.SaverDef.V2):
"""Converts all variables in a graph and checkpoint into constants."""
input_graph_def = None
if input_saved_model_dir:
@@ -233,10 +237,21 @@ def freeze_graph(input_graph,
if input_saver:
input_saver_def = _parse_input_saver_proto(input_saver, input_binary)
freeze_graph_with_def_protos(
- input_graph_def, input_saver_def, input_checkpoint, output_node_names,
- restore_op_name, filename_tensor_name, output_graph, clear_devices,
- initializer_nodes, variable_names_whitelist, variable_names_blacklist,
- input_meta_graph_def, input_saved_model_dir, saved_model_tags.split(","))
+ input_graph_def,
+ input_saver_def,
+ input_checkpoint,
+ output_node_names,
+ restore_op_name,
+ filename_tensor_name,
+ output_graph,
+ clear_devices,
+ initializer_nodes,
+ variable_names_whitelist,
+ variable_names_blacklist,
+ input_meta_graph_def,
+ input_saved_model_dir,
+ saved_model_tags.split(","),
+ checkpoint_version=checkpoint_version)
def main(unused_args):
@@ -246,7 +261,7 @@ def main(unused_args):
FLAGS.output_graph, FLAGS.clear_devices, FLAGS.initializer_nodes,
FLAGS.variable_names_whitelist, FLAGS.variable_names_blacklist,
FLAGS.input_meta_graph, FLAGS.input_saved_model_dir,
- FLAGS.saved_model_tags)
+ FLAGS.saved_model_tags, FLAGS.checkpoint_version)
if __name__ == "__main__":
@@ -268,6 +283,11 @@ if __name__ == "__main__":
default="",
help="TensorFlow variables file to load.")
parser.add_argument(
+ "--checkpoint_version",
+ type=int,
+ default=saver_pb2.SaverDef.V2,
+ help="Tensorflow variable file format")
+ parser.add_argument(
"--output_graph",
type=str,
default="",
diff --git a/tensorflow/python/tools/freeze_graph_test.py b/tensorflow/python/tools/freeze_graph_test.py
index feeed7102c..91f0061ebc 100644
--- a/tensorflow/python/tools/freeze_graph_test.py
+++ b/tensorflow/python/tools/freeze_graph_test.py
@@ -84,9 +84,19 @@ class FreezeGraphTest(test_util.TensorFlowTestCase):
input_meta_graph = checkpoint_meta_graph_file
freeze_graph.freeze_graph(
- input_graph_path, input_saver_def_path, input_binary, checkpoint_path,
- output_node_names, restore_op_name, filename_tensor_name,
- output_graph_path, clear_devices, "", "", input_meta_graph)
+ input_graph_path,
+ input_saver_def_path,
+ input_binary,
+ checkpoint_path,
+ output_node_names,
+ restore_op_name,
+ filename_tensor_name,
+ output_graph_path,
+ clear_devices,
+ "",
+ "",
+ input_meta_graph,
+ checkpoint_version=saver_write_version)
# Now we make sure the variable is now a constant, and that the graph still
# produces the expected result.
diff --git a/tensorflow/python/tools/optimize_for_inference_lib.py b/tensorflow/python/tools/optimize_for_inference_lib.py
index c2687bf557..9c19271222 100644
--- a/tensorflow/python/tools/optimize_for_inference_lib.py
+++ b/tensorflow/python/tools/optimize_for_inference_lib.py
@@ -349,6 +349,7 @@ def fold_batch_norms(input_graph_def):
bias_add_op.op = "BiasAdd"
bias_add_op.name = node.name
bias_add_op.attr["T"].CopyFrom(conv_op.attr["T"])
+ bias_add_op.attr["data_format"].CopyFrom(conv_op.attr["data_format"])
bias_add_op.input.extend([new_conv_op.name, offset_op.name])
new_ops.extend([scaled_weights_op, new_conv_op, offset_op, bias_add_op])
diff --git a/tensorflow/python/tools/optimize_for_inference_test.py b/tensorflow/python/tools/optimize_for_inference_test.py
index 7686bb0f14..084a4500f8 100644
--- a/tensorflow/python/tools/optimize_for_inference_test.py
+++ b/tensorflow/python/tools/optimize_for_inference_test.py
@@ -173,48 +173,56 @@ class OptimizeForInferenceTest(test.TestCase):
self.assertNotEqual("BatchNormWithGlobalNormalization", node.op)
def testFoldFusedBatchNorms(self):
- with self.test_session() as sess:
- inputs = [1, 4, 2, 5, 3, 6, -1, -4, -2, -5, -3, -6]
- input_op = constant_op.constant(
- np.array(inputs), shape=[1, 1, 6, 2], dtype=dtypes.float32)
- weights = [1, 2, 3, 4, 0.1, 0.2, 0.3, 0.4]
- weights_op = constant_op.constant(
- np.array(weights), shape=[1, 2, 2, 2], dtype=dtypes.float32)
- conv_op = nn_ops.conv2d(
- input_op, weights_op, [1, 1, 1, 1], padding="SAME", name="conv_op")
- mean_op = constant_op.constant(
- np.array([10, 20]), shape=[2], dtype=dtypes.float32)
- variance_op = constant_op.constant(
- np.array([0.25, 0.5]), shape=[2], dtype=dtypes.float32)
- beta_op = constant_op.constant(
- np.array([0.1, 0.6]), shape=[2], dtype=dtypes.float32)
- gamma_op = constant_op.constant(
- np.array([1.0, 2.0]), shape=[2], dtype=dtypes.float32)
- ops.get_default_graph().graph_def_versions.producer = 9
- gen_nn_ops._fused_batch_norm(
- conv_op,
- gamma_op,
- beta_op,
- mean_op,
- variance_op,
- 0.00001,
- is_training=False,
- name="output")
- original_graph_def = sess.graph_def
- original_result = sess.run(["output:0"])
- optimized_graph_def = optimize_for_inference_lib.fold_batch_norms(
- original_graph_def)
-
- with self.test_session() as sess:
- _ = importer.import_graph_def(
- optimized_graph_def, input_map={}, name="optimized")
- optimized_result = sess.run(["optimized/output:0"])
-
- self.assertAllClose(
- original_result, optimized_result, rtol=1e-04, atol=1e-06)
-
- for node in optimized_graph_def.node:
- self.assertNotEqual("FusedBatchNorm", node.op)
+ for data_format, use_gpu in [("NHWC", False), ("NCHW", True)]:
+ with self.test_session(use_gpu=use_gpu) as sess:
+ inputs = [1, 4, 2, 5, 3, 6, -1, -4, -2, -5, -3, -6]
+ input_op = constant_op.constant(
+ np.array(inputs),
+ shape=[1, 1, 6, 2] if data_format == "NHWC" else [1, 2, 1, 6],
+ dtype=dtypes.float32)
+ weights = [1, 2, 3, 4, 0.1, 0.2, 0.3, 0.4]
+ weights_op = constant_op.constant(
+ np.array(weights), shape=[1, 2, 2, 2], dtype=dtypes.float32)
+ conv_op = nn_ops.conv2d(
+ input_op,
+ weights_op, [1, 1, 1, 1],
+ padding="SAME",
+ data_format=data_format,
+ name="conv_op")
+ mean_op = constant_op.constant(
+ np.array([10, 20]), shape=[2], dtype=dtypes.float32)
+ variance_op = constant_op.constant(
+ np.array([0.25, 0.5]), shape=[2], dtype=dtypes.float32)
+ beta_op = constant_op.constant(
+ np.array([0.1, 0.6]), shape=[2], dtype=dtypes.float32)
+ gamma_op = constant_op.constant(
+ np.array([1.0, 2.0]), shape=[2], dtype=dtypes.float32)
+ ops.get_default_graph().graph_def_versions.producer = 9
+ gen_nn_ops._fused_batch_norm(
+ conv_op,
+ gamma_op,
+ beta_op,
+ mean_op,
+ variance_op,
+ 0.00001,
+ is_training=False,
+ data_format=data_format,
+ name="output")
+ original_graph_def = sess.graph_def
+ original_result = sess.run(["output:0"])
+ optimized_graph_def = optimize_for_inference_lib.fold_batch_norms(
+ original_graph_def)
+
+ with self.test_session(use_gpu=use_gpu) as sess:
+ _ = importer.import_graph_def(
+ optimized_graph_def, input_map={}, name="optimized")
+ optimized_result = sess.run(["optimized/output:0"])
+
+ self.assertAllClose(
+ original_result, optimized_result, rtol=1e-04, atol=1e-06)
+
+ for node in optimized_graph_def.node:
+ self.assertNotEqual("FusedBatchNorm", node.op)
def testFuseResizePadAndConv(self):
with self.test_session() as sess:
diff --git a/tensorflow/python/tools/saved_model_cli.py b/tensorflow/python/tools/saved_model_cli.py
index 667a4b1db8..33f6debbcb 100644
--- a/tensorflow/python/tools/saved_model_cli.py
+++ b/tensorflow/python/tools/saved_model_cli.py
@@ -31,6 +31,7 @@ import warnings
import numpy as np
+from six import integer_types
from tensorflow.contrib.saved_model.python.saved_model import reader
from tensorflow.contrib.saved_model.python.saved_model import signature_def_utils
from tensorflow.core.example import example_pb2
@@ -440,7 +441,7 @@ def _create_example_string(example_dict):
elif isinstance(feature_list[0], str):
example.features.feature[feature_name].bytes_list.value.extend(
feature_list)
- elif isinstance(feature_list[0], (int, long)):
+ elif isinstance(feature_list[0], integer_types):
example.features.feature[feature_name].int64_list.value.extend(
feature_list)
else:
diff --git a/tensorflow/python/training/basic_session_run_hooks.py b/tensorflow/python/training/basic_session_run_hooks.py
index 17e07e171a..aae757b99a 100644
--- a/tensorflow/python/training/basic_session_run_hooks.py
+++ b/tensorflow/python/training/basic_session_run_hooks.py
@@ -336,7 +336,7 @@ class CheckpointSaverListener(object):
`CheckpointSaverHook`, as in this example:
```python
- class ExampleCheckpointSaverListerner(CheckpointSaverListener):
+ class ExampleCheckpointSaverListener(CheckpointSaverListener):
def begin(self):
# You can add ops to the graph here.
print('Starting the session.')
@@ -352,7 +352,7 @@ class CheckpointSaverListener(object):
print('Done with the session.')
...
- listener = ExampleCheckpointSaverListerner()
+ listener = ExampleCheckpointSaverListener()
saver_hook = tf.train.CheckpointSaverHook(
checkpoint_dir, listeners=[listener])
with tf.train.MonitoredTrainingSession(chief_only_hooks=[saver_hook]):
diff --git a/tensorflow/python/training/input.py b/tensorflow/python/training/input.py
index 992184ec9e..bd9985a7c5 100644
--- a/tensorflow/python/training/input.py
+++ b/tensorflow/python/training/input.py
@@ -58,6 +58,8 @@ _restore_sparse = sparse_ops._take_many_sparse_from_tensors_map
def match_filenames_once(pattern, name=None):
"""Save the list of files matching pattern, so it is only computed once.
+ NOTE: The order of the files returned can be non-deterministic.
+
Args:
pattern: A file pattern (glob), or 1D tensor of file patterns.
name: A name for the operations (optional).
diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py
index 3888e9bba4..0c1c8e664b 100644
--- a/tensorflow/python/training/saver.py
+++ b/tensorflow/python/training/saver.py
@@ -1597,9 +1597,9 @@ class Saver(object):
[Stripping Default-Valued Attributes](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md#stripping-default-valued-attributes).
Returns:
- A string: path prefix used for the checkpoint files. If the saver is
- sharded, this string ends with: '-?????-of-nnnnn' where 'nnnnn'
- is the number of shards created.
+ A string: path prefix used for the checkpoint files. If checkpoint
+ format is V1 and the saver is sharded, this string ends with:
+ '-?????-of-nnnnn' where 'nnnnn' is the number of shards created.
If the saver is empty, returns None.
Raises:
@@ -1749,6 +1749,12 @@ class Saver(object):
return
if save_path is None:
raise ValueError("Can't load save_path when it is None.")
+ if (os.path.isfile(save_path) and
+ self._write_version not in (
+ saver_pb2.SaverDef.V1, saver_pb2.SaverDef.LEGACY)):
+ raise ValueError("The specified path: %s is a file."
+ " Please specify only the path prefix"
+ " to the checkpoint files." % save_path)
logging.info("Restoring parameters from %s", save_path)
if context.in_graph_mode():
sess.run(self.saver_def.restore_op_name,
diff --git a/tensorflow/python/util/compat_internal.py b/tensorflow/python/util/compat_internal.py
new file mode 100644
index 0000000000..fee1d6fab7
--- /dev/null
+++ b/tensorflow/python/util/compat_internal.py
@@ -0,0 +1,34 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Functions for Python 2 vs. 3 compatibility that are private to TensorFlow."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+def path_to_str(path):
+ """Returns the file system path representation of a `PathLike` object,
+ else as it is.
+
+ Args:
+ path: An object that can be converted to path representation.
+
+ Returns:
+ A `str` object.
+ """
+ if hasattr(path, "__fspath__"):
+ path = as_str_any(path.__fspath__())
+ return path
diff --git a/tensorflow/stream_executor/cuda/cuda_diagnostics.cc b/tensorflow/stream_executor/cuda/cuda_diagnostics.cc
index f35542e18f..933c103f52 100644
--- a/tensorflow/stream_executor/cuda/cuda_diagnostics.cc
+++ b/tensorflow/stream_executor/cuda/cuda_diagnostics.cc
@@ -232,7 +232,7 @@ port::StatusOr<DriverVersion> Diagnostician::FindDsoVersion() {
result = StringToDriverVersion(version);
}
#else
-#if !defined(PLATFORM_WINDOWS) && !defined(NVIDIA_TEGRA)
+#if !defined(PLATFORM_WINDOWS) && !defined(ANDROID_TEGRA)
// Callback used when iterating through DSOs. Looks for the driver-interfacing
// DSO and yields its version number into the callback data, when found.
auto iterate_phdr =
diff --git a/tensorflow/stream_executor/dso_loader.cc b/tensorflow/stream_executor/dso_loader.cc
index 5210a81092..0c642912b1 100644
--- a/tensorflow/stream_executor/dso_loader.cc
+++ b/tensorflow/stream_executor/dso_loader.cc
@@ -96,10 +96,19 @@ string GetCudnnVersion() { return TF_CUDNN_VERSION; }
}
/* static */ port::Status DsoLoader::GetLibcuptiDsoHandle(void** dso_handle) {
+#if defined(ANDROID_TEGRA)
+ // On Android devices the CUDA version number is not added to the library
+ // name.
+ return GetDsoHandle(
+ FindDsoPath(port::Env::Default()->FormatLibraryFileName("cupti", ""),
+ GetCudaCuptiLibraryPath()),
+ dso_handle);
+#else
return GetDsoHandle(FindDsoPath(port::Env::Default()->FormatLibraryFileName(
"cupti", GetCudaVersion()),
GetCudaCuptiLibraryPath()),
dso_handle);
+#endif
}
static mutex& GetRpathMutex() {
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index bf4a9fe6ce..411b393b0a 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -270,6 +270,8 @@ def _rpath_linkopts(name):
clean_dep("//tensorflow:darwin"): [
"-Wl,%s" % (_make_search_paths("@loader_path", levels_to_root),),
],
+ clean_dep("//tensorflow:windows"): [],
+ clean_dep("//tensorflow:windows_msvc"): [],
"//conditions:default": [
"-Wl,%s" % (_make_search_paths("$$ORIGIN", levels_to_root),),
],
@@ -301,6 +303,7 @@ def tf_cc_shared_object(
"-Wl,-install_name,@rpath/" + name.split("/")[-1],
],
"//conditions:default": [
+ "-Wl,-soname," + name.split("/")[-1],
],
}),
**kwargs)
@@ -612,6 +615,8 @@ def tf_cc_test(name,
"//tensorflow:android": [
"-pie",
],
+ clean_dep("//tensorflow:windows"): [],
+ clean_dep("//tensorflow:windows_msvc"): [],
"//conditions:default": [
"-lpthread",
"-lm"
@@ -1264,6 +1269,8 @@ def tf_custom_op_library(name, srcs=[], gpu_srcs=[], deps=[], linkopts=[]):
"//conditions:default": [
"-lm",
],
+ clean_dep("//tensorflow:windows"): [],
+ clean_dep("//tensorflow:windows_msvc"): [],
clean_dep("//tensorflow:darwin"): [],
}),)
diff --git a/tensorflow/tools/api/golden/tensorflow.image.pbtxt b/tensorflow/tools/api/golden/tensorflow.image.pbtxt
index f32353c957..baedf596e8 100644
--- a/tensorflow/tools/api/golden/tensorflow.image.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.image.pbtxt
@@ -169,12 +169,20 @@ tf_module {
argspec: "args=[\'images\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
}
member_method {
+ name: "rgb_to_yiq"
+ argspec: "args=[\'images\'], varargs=None, keywords=None, defaults=None"
+ }
+ member_method {
+ name: "rgb_to_yuv"
+ argspec: "args=[\'images\'], varargs=None, keywords=None, defaults=None"
+ }
+ member_method {
name: "rot90"
argspec: "args=[\'image\', \'k\', \'name\'], varargs=None, keywords=None, defaults=[\'1\', \'None\'], "
}
member_method {
name: "sample_distorted_bounding_box"
- argspec: "args=[\'image_size\', \'bounding_boxes\', \'seed\', \'seed2\', \'min_object_covered\', \'aspect_ratio_range\', \'area_range\', \'max_attempts\', \'use_image_if_no_bounding_boxes\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+ argspec: "args=[\'image_size\', \'bounding_boxes\', \'seed\', \'seed2\', \'min_object_covered\', \'aspect_ratio_range\', \'area_range\', \'max_attempts\', \'use_image_if_no_bounding_boxes\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'0.1\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
}
member_method {
name: "total_variation"
@@ -184,4 +192,12 @@ tf_module {
name: "transpose_image"
argspec: "args=[\'image\'], varargs=None, keywords=None, defaults=None"
}
+ member_method {
+ name: "yiq_to_rgb"
+ argspec: "args=[\'images\'], varargs=None, keywords=None, defaults=None"
+ }
+ member_method {
+ name: "yuv_to_rgb"
+ argspec: "args=[\'images\'], varargs=None, keywords=None, defaults=None"
+ }
}
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d-transpose.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d-transpose.pbtxt
index d898c54627..11e05f884d 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d-transpose.pbtxt
@@ -1,6 +1,7 @@
path: "tensorflow.keras.layers.Conv3DTranspose"
tf_class {
is_instance: "<class \'tensorflow.python.keras._impl.keras.layers.convolutional.Conv3DTranspose\'>"
+ is_instance: "<class \'tensorflow.python.layers.convolutional.Conv3DTranspose\'>"
is_instance: "<class \'tensorflow.python.layers.convolutional.Conv3D\'>"
is_instance: "<class \'tensorflow.python.layers.convolutional._Conv\'>"
is_instance: "<class \'tensorflow.python.keras._impl.keras.engine.topology.Layer\'>"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt
index a7001bbe34..58724a1e16 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt
@@ -1,6 +1,7 @@
path: "tensorflow.keras.layers.Convolution3DTranspose"
tf_class {
is_instance: "<class \'tensorflow.python.keras._impl.keras.layers.convolutional.Conv3DTranspose\'>"
+ is_instance: "<class \'tensorflow.python.layers.convolutional.Conv3DTranspose\'>"
is_instance: "<class \'tensorflow.python.layers.convolutional.Conv3D\'>"
is_instance: "<class \'tensorflow.python.layers.convolutional._Conv\'>"
is_instance: "<class \'tensorflow.python.keras._impl.keras.engine.topology.Layer\'>"
diff --git a/tensorflow/tools/api/golden/tensorflow.manip.pbtxt b/tensorflow/tools/api/golden/tensorflow.manip.pbtxt
new file mode 100644
index 0000000000..0b84165285
--- /dev/null
+++ b/tensorflow/tools/api/golden/tensorflow.manip.pbtxt
@@ -0,0 +1,7 @@
+path: "tensorflow.manip"
+tf_module {
+ member_method {
+ name: "roll"
+ argspec: "args=[\'input\', \'shift\', \'axis\'], varargs=None, keywords=None, defaults=None"
+ }
+}
diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt
index db1ed42185..e8890e9cc0 100644
--- a/tensorflow/tools/api/golden/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.pbtxt
@@ -397,6 +397,10 @@ tf_module {
mtype: "<type \'module\'>"
}
member {
+ name: "manip"
+ mtype: "<type \'module\'>"
+ }
+ member {
name: "metrics"
mtype: "<type \'module\'>"
}
@@ -2045,6 +2049,10 @@ tf_module {
argspec: "args=[\'x\', \'out_idx\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'int32\'>\", \'None\'], "
}
member_method {
+ name: "unravel_index"
+ argspec: "args=[\'indices\', \'dims\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+ }
+ member_method {
name: "unsorted_segment_max"
argspec: "args=[\'data\', \'segment_ids\', \'num_segments\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
}
diff --git a/tensorflow/tools/ci_build/ci_sanity.sh b/tensorflow/tools/ci_build/ci_sanity.sh
index 636d5a1e81..310c1b6248 100755
--- a/tensorflow/tools/ci_build/ci_sanity.sh
+++ b/tensorflow/tools/ci_build/ci_sanity.sh
@@ -322,7 +322,7 @@ do_external_licenses_check(){
EXTRA_LICENSES_FILE="$(mktemp)_extra_licenses.log"
echo "Getting external dependencies for ${BUILD_TARGET}"
- bazel query "attr('licenses', 'notice', deps(${BUILD_TARGET}))" --no_implicit_deps --no_host_deps --keep_going \
+ bazel query "attr('licenses', 'notice', deps(${BUILD_TARGET}))" --keep_going \
| grep -E -v "^//tensorflow" \
| sed -e 's|:.*||' \
| sort \
@@ -331,7 +331,7 @@ do_external_licenses_check(){
echo
echo "Getting list of external licenses mentioned in ${LICENSES_TARGET}."
- bazel query "deps(${LICENSES_TARGET})" --no_implicit_deps --no_host_deps --keep_going \
+ bazel query "deps(${LICENSES_TARGET})" --keep_going \
| grep -E -v "^//tensorflow" \
| sed -e 's|:.*||' \
| sort \
@@ -345,6 +345,18 @@ do_external_licenses_check(){
EXTERNAL_LICENSES_CHECK_END_TIME=$(date +'%s')
+ # Blacklist
+ echo ${MISSING_LICENSES_FILE}
+ grep -e "@bazel_tools//third_party/" -e "@com_google_absl//absl" -e "@org_tensorflow//" -v ${MISSING_LICENSES_FILE} > temp.txt
+ mv temp.txt ${MISSING_LICENSES_FILE}
+
+ # Whitelist
+ echo ${EXTRA_LICENSE_FILE}
+ grep -e "@bazel_tools//src/" -e "@bazel_tools//tools/" -e "@com_google_absl//" -e "//external" -e "@local" -v ${EXTRA_LICENSES_FILE} > temp.txt
+ mv temp.txt ${EXTRA_LICENSES_FILE}
+
+
+
echo
echo "do_external_licenses_check took $((EXTERNAL_LICENSES_CHECK_END_TIME - EXTERNAL_LICENSES_CHECK_START_TIME)) s"
echo
diff --git a/tensorflow/tools/ci_build/windows/libtensorflow_cpu.sh b/tensorflow/tools/ci_build/windows/libtensorflow_cpu.sh
index fa28e3d79c..583d1d5f09 100755
--- a/tensorflow/tools/ci_build/windows/libtensorflow_cpu.sh
+++ b/tensorflow/tools/ci_build/windows/libtensorflow_cpu.sh
@@ -41,7 +41,7 @@ run_configure_for_cpu_build
# build_libtensorflow_tarball in ../builds/libtensorflow.sh
# cannot be used on Windows since it relies on pkg_tar rules.
# So we do something special here
-bazel build -c opt \
+bazel build -c opt --copt=/arch:AVX \
tensorflow:libtensorflow.so \
tensorflow/tools/lib_package:clicenses_generate \
tensorflow/java:libtensorflow_jni.so \
diff --git a/tensorflow/tools/ci_build/windows/libtensorflow_gpu.sh b/tensorflow/tools/ci_build/windows/libtensorflow_gpu.sh
index 573c926203..94276c6c5c 100644
--- a/tensorflow/tools/ci_build/windows/libtensorflow_gpu.sh
+++ b/tensorflow/tools/ci_build/windows/libtensorflow_gpu.sh
@@ -41,7 +41,7 @@ run_configure_for_gpu_build
# build_libtensorflow_tarball in ../builds/libtensorflow.sh
# cannot be used on Windows since it relies on pkg_tar rules.
# So we do something special here
-bazel build -c opt \
+bazel build -c opt --copt=/arch:AVX \
tensorflow:libtensorflow.so \
tensorflow/tools/lib_package:clicenses_generate \
tensorflow/java:libtensorflow_jni.so \
diff --git a/tensorflow/tools/docker/jupyter_notebook_config.py b/tensorflow/tools/docker/jupyter_notebook_config.py
index 0acbf6fcee..05dcefb099 100644
--- a/tensorflow/tools/docker/jupyter_notebook_config.py
+++ b/tensorflow/tools/docker/jupyter_notebook_config.py
@@ -15,6 +15,7 @@
import os
from IPython.lib import passwd
+c = c # pylint:disable=undefined-variable
c.NotebookApp.ip = '*'
c.NotebookApp.port = int(os.getenv('PORT', 8888))
c.NotebookApp.open_browser = False
diff --git a/tensorflow/tools/docs/pretty_docs.py b/tensorflow/tools/docs/pretty_docs.py
index ac04f566d0..543b5fa6fe 100644
--- a/tensorflow/tools/docs/pretty_docs.py
+++ b/tensorflow/tools/docs/pretty_docs.py
@@ -327,7 +327,7 @@ class _Metadata(object):
"""
def __init__(self, name):
- """Creata a Metadata builder.
+ """Create a Metadata builder.
Args:
name: The name of the page being described by the Metadata block.
diff --git a/tensorflow/tools/lib_package/BUILD b/tensorflow/tools/lib_package/BUILD
index dbc81599de..35e55c0d31 100644
--- a/tensorflow/tools/lib_package/BUILD
+++ b/tensorflow/tools/lib_package/BUILD
@@ -99,6 +99,7 @@ genrule(
"//third_party/hadoop:LICENSE.txt",
"//third_party/eigen3:LICENSE",
"//third_party/fft2d:LICENSE",
+ "@aws//:LICENSE",
"@boringssl//:LICENSE",
"@com_googlesource_code_re2//:LICENSE",
"@cub_archive//:LICENSE.TXT",
@@ -112,8 +113,10 @@ genrule(
"@jemalloc//:COPYING",
"@jpeg//:LICENSE.md",
"@libxsmm_archive//:LICENSE",
+ "@llvm//:LICENSE.TXT",
"@lmdb//:LICENSE",
"@local_config_sycl//sycl:LICENSE.text",
+ "@nasm//:LICENSE",
"@nsync//:LICENSE",
"@png_archive//:LICENSE",
"@protobuf_archive//:LICENSE",
@@ -134,6 +137,7 @@ genrule(
"//third_party/hadoop:LICENSE.txt",
"//third_party/eigen3:LICENSE",
"//third_party/fft2d:LICENSE",
+ "@aws//:LICENSE",
"@boringssl//:LICENSE",
"@com_googlesource_code_re2//:LICENSE",
"@cub_archive//:LICENSE.TXT",
@@ -149,6 +153,7 @@ genrule(
"@libxsmm_archive//:LICENSE",
"@lmdb//:LICENSE",
"@local_config_sycl//sycl:LICENSE.text",
+ "@nasm//:LICENSE",
"@nsync//:LICENSE",
"@png_archive//:LICENSE",
"@protobuf_archive//:LICENSE",
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index e4fa6694d8..a9c4a8de42 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -88,13 +88,20 @@ filegroup(
"//third_party/eigen3:LICENSE",
"//third_party/fft2d:LICENSE",
"//third_party/hadoop:LICENSE.txt",
+ "@absl_py//absl/flags:LICENSE",
+ "@arm_neon_2_x86_sse//:LICENSE",
+ "@astor_archive//:LICENSE",
+ "@aws//:LICENSE",
"@boringssl//:LICENSE",
+ "@com_google_absl//:LICENSE",
"@com_googlesource_code_re2//:LICENSE",
"@cub_archive//:LICENSE.TXT",
"@curl//:COPYING",
"@eigen_archive//:COPYING.MPL2",
"@farmhash_archive//:COPYING",
"@fft2d//:fft/readme.txt",
+ "@flatbuffers//:LICENSE.txt",
+ "@gast_archive//:PKG-INFO",
"@gemmlowp//:LICENSE",
"@gif_archive//:COPYING",
"@grpc//:LICENSE",
@@ -105,11 +112,15 @@ filegroup(
"@lmdb//:LICENSE",
"@local_config_sycl//sycl:LICENSE.text",
"@grpc//third_party/nanopb:LICENSE.txt",
+ "@nasm//:LICENSE",
"@nsync//:LICENSE",
+ "@pcre//:LICENCE",
"@png_archive//:LICENSE",
"@protobuf_archive//:LICENSE",
"@six_archive//:LICENSE",
"@snappy//:COPYING",
+ "@swig//:LICENSE",
+ "@termcolor_archive//:COPYING.txt",
"@zlib_archive//:zlib.h",
"@org_python_pypi_backports_weakref//:LICENSE",
] + if_mkl([
diff --git a/tensorflow/tools/pip_package/build_pip_package.sh b/tensorflow/tools/pip_package/build_pip_package.sh
index ca8c272a08..dc31e4c5f7 100755
--- a/tensorflow/tools/pip_package/build_pip_package.sh
+++ b/tensorflow/tools/pip_package/build_pip_package.sh
@@ -137,8 +137,8 @@ function main() {
fi
fi
fi
- # Install toco as a binary in aux-bin.
mkdir "${TMPDIR}/tensorflow/aux-bin"
+ # Install toco as a binary in aux-bin.
cp bazel-bin/tensorflow/contrib/lite/toco/toco ${TMPDIR}/tensorflow/aux-bin/
fi
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 3cd4d12100..bc4315c600 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -29,7 +29,7 @@ from setuptools.dist import Distribution
# This version string is semver compatible, but incompatible with pip.
# For pip, we will remove all '-' characters from this string, and use the
# result for pip.
-_VERSION = '1.5.0-rc1'
+_VERSION = '1.5.0'
REQUIRED_PACKAGES = [
'absl-py >= 0.1.6',
@@ -39,7 +39,7 @@ REQUIRED_PACKAGES = [
'numpy >= 1.12.1',
'six >= 1.10.0',
'protobuf >= 3.4.0',
- 'tensorflow-tensorboard >= 0.4.0',
+ 'tensorflow-tensorboard >= 1.5.0, < 1.6.0',
'termcolor >= 1.1.0',
]
@@ -181,9 +181,10 @@ def find_files(pattern, root):
matches = ['../' + x for x in find_files('*', 'external') if '.py' not in x]
-so_lib_paths = [i for i in os.listdir('.')
- if os.path.isdir(i)
- and fnmatch.fnmatch(i, '_solib_*')]
+so_lib_paths = [
+ i for i in os.listdir('.')
+ if os.path.isdir(i) and fnmatch.fnmatch(i, '_solib_*')
+]
for path in so_lib_paths:
matches.extend(
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 2c320cf68a..12d3c739cc 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -114,16 +114,17 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
],
sha256 = "5996380e3e8b981f55d1c8d58e709c00dbb4806ba367be75d0925a68cc2f6478",
strip_prefix = "abseil-cpp-720c017e30339fd1786ce4aac68bc8559736e53f",
+ build_file = str(Label("//third_party:com_google_absl.BUILD")),
)
tf_http_archive(
name = "eigen_archive",
urls = [
- "https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/14e1418fcf12.tar.gz",
- "https://bitbucket.org/eigen/eigen/get/14e1418fcf12.tar.gz",
+ "https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/2355b229ea4c.tar.gz",
+ "https://bitbucket.org/eigen/eigen/get/2355b229ea4c.tar.gz",
],
- sha256 = "2b526c6888639025323fd4f2600533c0f982d304ea48e4f1663e8066bd9f6368",
- strip_prefix = "eigen-eigen-14e1418fcf12",
+ sha256 = "0cadb31a35b514bf2dfd6b5d38205da94ef326ec6908fc3fd7c269948467214f",
+ strip_prefix = "eigen-eigen-2355b229ea4c",
build_file = str(Label("//third_party:eigen.BUILD")),
)
@@ -556,6 +557,18 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
)
tf_http_archive(
+ name = "kafka",
+ urls = [
+ "https://mirror.bazel.build/github.com/edenhill/librdkafka/archive/v0.11.1.tar.gz",
+ "https://github.com/edenhill/librdkafka/archive/v0.11.1.tar.gz",
+ ],
+ sha256 = "dd035d57c8f19b0b612dd6eefe6e5eebad76f506e302cccb7c2066f25a83585e",
+ strip_prefix = "librdkafka-0.11.1",
+ build_file = str(Label("//third_party:kafka/BUILD")),
+ patch_file = str(Label("//third_party/kafka:config.patch")),
+ )
+
+ tf_http_archive(
name = "aws",
urls = [
"https://mirror.bazel.build/github.com/aws/aws-sdk-cpp/archive/1.3.15.tar.gz",
diff --git a/third_party/com_google_absl.BUILD b/third_party/com_google_absl.BUILD
new file mode 100644
index 0000000000..8fca145f75
--- /dev/null
+++ b/third_party/com_google_absl.BUILD
@@ -0,0 +1,5 @@
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"]) # Apache
+
+exports_files(["LICENSE"])
diff --git a/third_party/flatbuffers/flatbuffers.BUILD b/third_party/flatbuffers/flatbuffers.BUILD
index f6b8e6ddb0..824c97be60 100644
--- a/third_party/flatbuffers/flatbuffers.BUILD
+++ b/third_party/flatbuffers/flatbuffers.BUILD
@@ -4,6 +4,8 @@ package(
licenses(["notice"]) # Apache 2.0
+exports_files(["LICENSE.txt"])
+
config_setting(
name = "freebsd",
values = {"cpu": "freebsd"},
diff --git a/third_party/gast.BUILD b/third_party/gast.BUILD
index 06db528ada..4866982e1f 100644
--- a/third_party/gast.BUILD
+++ b/third_party/gast.BUILD
@@ -3,7 +3,7 @@
licenses(["notice"]) # BSD 3-clause
-exports_files(["LICENSE"])
+exports_files(["PKG-INFO"])
py_library(
name = "gast",
diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl
index 8e1dd8a54f..255ae01190 100644
--- a/third_party/gpus/cuda_configure.bzl
+++ b/third_party/gpus/cuda_configure.bzl
@@ -826,7 +826,7 @@ def symlink_genrule_for_dir(repository_ctx, src_dir, dest_dir, genrule_name,
if src_dir != None:
src_dir = _norm_path(src_dir)
dest_dir = _norm_path(dest_dir)
- files = _read_dir(repository_ctx, src_dir)
+ files = '\n'.join(sorted(_read_dir(repository_ctx, src_dir).splitlines()))
# Create a list with the src_dir stripped to use for outputs.
dest_files = files.replace(src_dir, '').splitlines()
src_files = files.splitlines()
diff --git a/third_party/jpeg/jpeg.BUILD b/third_party/jpeg/jpeg.BUILD
index 37924125cf..87a23925c4 100644
--- a/third_party/jpeg/jpeg.BUILD
+++ b/third_party/jpeg/jpeg.BUILD
@@ -34,6 +34,10 @@ libjpegturbo_copts = select({
"-mfloat-abi=softfp",
"-fprefetch-loop-arrays",
],
+ ":linux_ppc64le": [
+ "-mcpu=power8",
+ "-mtune=power8",
+ ],
"//conditions:default": [],
})
@@ -123,11 +127,51 @@ cc_library(
":k8": [":simd_x86_64"],
":armeabi-v7a": [":simd_armv7a"],
":arm64-v8a": [":simd_armv8a"],
+ ":linux_ppc64le": [":simd_altivec"],
"//conditions:default": [":simd_none"],
}),
)
cc_library(
+ name = "simd_altivec",
+ srcs = [
+ "jchuff.h",
+ "jconfig.h",
+ "jdct.h",
+ "jerror.h",
+ "jinclude.h",
+ "jmorecfg.h",
+ "jpegint.h",
+ "jpeglib.h",
+ "jsimd.h",
+ "jsimddct.h",
+ "simd/jccolor-altivec.c",
+ "simd/jcgray-altivec.c",
+ "simd/jcsample.h",
+ "simd/jcsample-altivec.c",
+ "simd/jdcolor-altivec.c",
+ "simd/jdmerge-altivec.c",
+ "simd/jdsample-altivec.c",
+ "simd/jfdctfst-altivec.c",
+ "simd/jfdctint-altivec.c",
+ "simd/jidctfst-altivec.c",
+ "simd/jidctint-altivec.c",
+ "simd/jquanti-altivec.c",
+ "simd/jsimd.h",
+ "simd/jsimd_altivec.h",
+ "simd/jsimd_powerpc.c",
+ ],
+ hdrs = [
+ "simd/jccolext-altivec.c", # should have been named .inc
+ "simd/jcgryext-altivec.c", # should have been named .inc
+ "simd/jdcolext-altivec.c", # should have been named .inc
+ "simd/jdmrgext-altivec.c", # should have been named .inc
+ ],
+ copts = libjpegturbo_copts,
+ nocopts = libjpegturbo_nocopts,
+)
+
+cc_library(
name = "simd_x86_64",
srcs = [
"jchuff.h",
@@ -381,6 +425,7 @@ genrule(
":k8": "cp $(location jconfig_nowin_simd.h) $@",
":armeabi-v7a": "cp $(location jconfig_nowin_simd.h) $@",
":arm64-v8a": "cp $(location jconfig_nowin_simd.h) $@",
+ ":linux_ppc64le": "cp $(location jconfig_nowin_simd.h) $@",
"//conditions:default": "cp $(location jconfig_nowin_nosimd.h) $@",
}),
)
@@ -498,3 +543,8 @@ config_setting(
name = "windows_msvc",
values = {"cpu": "x64_windows_msvc"},
)
+
+config_setting(
+ name = "linux_ppc64le",
+ values = {"cpu": "ppc"},
+)
diff --git a/third_party/kafka/BUILD b/third_party/kafka/BUILD
new file mode 100644
index 0000000000..a61a9e1f6c
--- /dev/null
+++ b/third_party/kafka/BUILD
@@ -0,0 +1,147 @@
+# Description:
+# Kafka C/C++ (librdkafka) client library
+
+licenses(["notice"]) # 2-clause BSD license
+
+exports_files(["LICENSE"])
+
+cc_library(
+ name = "kafka",
+ srcs = [
+ "config.h",
+ "src-cpp/ConfImpl.cpp",
+ "src-cpp/ConsumerImpl.cpp",
+ "src-cpp/HandleImpl.cpp",
+ "src-cpp/KafkaConsumerImpl.cpp",
+ "src-cpp/MessageImpl.cpp",
+ "src-cpp/MetadataImpl.cpp",
+ "src-cpp/QueueImpl.cpp",
+ "src-cpp/RdKafka.cpp",
+ "src-cpp/TopicImpl.cpp",
+ "src-cpp/TopicPartitionImpl.cpp",
+ "src/crc32c.c",
+ "src/crc32c.h",
+ "src/lz4.c",
+ "src/lz4.h",
+ "src/lz4frame.c",
+ "src/lz4frame.h",
+ "src/lz4frame_static.h",
+ "src/lz4hc.c",
+ "src/lz4hc.h",
+ "src/lz4opt.h",
+ "src/queue.h",
+ "src/rd.h",
+ "src/rdaddr.c",
+ "src/rdaddr.h",
+ "src/rdatomic.h",
+ "src/rdavg.h",
+ "src/rdavl.c",
+ "src/rdavl.h",
+ "src/rdbuf.c",
+ "src/rdbuf.h",
+ "src/rdcrc32.h",
+ "src/rddl.h",
+ "src/rdendian.h",
+ "src/rdgz.c",
+ "src/rdgz.h",
+ "src/rdinterval.h",
+ "src/rdkafka.c",
+ "src/rdkafka.h",
+ "src/rdkafka_assignor.c",
+ "src/rdkafka_assignor.h",
+ "src/rdkafka_broker.c",
+ "src/rdkafka_broker.h",
+ "src/rdkafka_buf.c",
+ "src/rdkafka_buf.h",
+ "src/rdkafka_cgrp.c",
+ "src/rdkafka_cgrp.h",
+ "src/rdkafka_conf.c",
+ "src/rdkafka_conf.h",
+ "src/rdkafka_event.h",
+ "src/rdkafka_feature.c",
+ "src/rdkafka_feature.h",
+ "src/rdkafka_int.h",
+ "src/rdkafka_interceptor.c",
+ "src/rdkafka_interceptor.h",
+ "src/rdkafka_lz4.c",
+ "src/rdkafka_lz4.h",
+ "src/rdkafka_metadata.c",
+ "src/rdkafka_metadata.h",
+ "src/rdkafka_metadata_cache.c",
+ "src/rdkafka_msg.c",
+ "src/rdkafka_msg.h",
+ "src/rdkafka_msgset.h",
+ "src/rdkafka_msgset_reader.c",
+ "src/rdkafka_msgset_writer.c",
+ "src/rdkafka_offset.c",
+ "src/rdkafka_offset.h",
+ "src/rdkafka_op.c",
+ "src/rdkafka_op.h",
+ "src/rdkafka_partition.c",
+ "src/rdkafka_partition.h",
+ "src/rdkafka_pattern.c",
+ "src/rdkafka_pattern.h",
+ "src/rdkafka_proto.h",
+ "src/rdkafka_queue.c",
+ "src/rdkafka_queue.h",
+ "src/rdkafka_range_assignor.c",
+ "src/rdkafka_request.c",
+ "src/rdkafka_request.h",
+ "src/rdkafka_roundrobin_assignor.c",
+ "src/rdkafka_sasl.c",
+ "src/rdkafka_sasl.h",
+ "src/rdkafka_sasl_int.h",
+ "src/rdkafka_sasl_plain.c",
+ "src/rdkafka_subscription.c",
+ "src/rdkafka_subscription.h",
+ "src/rdkafka_timer.c",
+ "src/rdkafka_timer.h",
+ "src/rdkafka_topic.c",
+ "src/rdkafka_topic.h",
+ "src/rdkafka_transport.c",
+ "src/rdkafka_transport.h",
+ "src/rdkafka_transport_int.h",
+ "src/rdlist.c",
+ "src/rdlist.h",
+ "src/rdlog.c",
+ "src/rdlog.h",
+ "src/rdports.c",
+ "src/rdports.h",
+ "src/rdposix.h",
+ "src/rdrand.c",
+ "src/rdrand.h",
+ "src/rdregex.c",
+ "src/rdregex.h",
+ "src/rdstring.c",
+ "src/rdstring.h",
+ "src/rdsysqueue.h",
+ "src/rdtime.h",
+ "src/rdtypes.h",
+ "src/rdunittest.c",
+ "src/rdunittest.h",
+ "src/rdvarint.c",
+ "src/rdvarint.h",
+ "src/snappy.c",
+ "src/snappy.h",
+ "src/tinycthread.c",
+ "src/tinycthread.h",
+ "src/xxhash.c",
+ "src/xxhash.h",
+ ],
+ hdrs = [
+ "config.h",
+ ],
+ defines = [
+ ],
+ includes = [
+ "src",
+ "src-cpp",
+ ],
+ linkopts = [
+ "-lpthread",
+ ],
+ visibility = ["//visibility:public"],
+ deps = [
+ "@boringssl//:ssl",
+ ],
+)
diff --git a/third_party/kafka/config.patch b/third_party/kafka/config.patch
new file mode 100644
index 0000000000..fa5c2d35b4
--- /dev/null
+++ b/third_party/kafka/config.patch
@@ -0,0 +1,44 @@
+diff -Naur a/config.h b/config.h
+--- a/config.h 1970-01-01 00:00:00.000000000 +0000
++++ b/config.h 2017-10-28 00:57:03.316957390 +0000
+@@ -0,0 +1,40 @@
++#pragma once
++#define WITHOUT_OPTIMIZATION 0
++#define ENABLE_DEVEL 0
++#define ENABLE_REFCNT_DEBUG 0
++#define ENABLE_SHAREDPTR_DEBUG 0
++
++#define HAVE_ATOMICS_32 1
++#define HAVE_ATOMICS_32_SYNC 1
++
++#if (HAVE_ATOMICS_32)
++# if (HAVE_ATOMICS_32_SYNC)
++# define ATOMIC_OP32(OP1,OP2,PTR,VAL) __sync_ ## OP1 ## _and_ ## OP2(PTR, VAL)
++# else
++# define ATOMIC_OP32(OP1,OP2,PTR,VAL) __atomic_ ## OP1 ## _ ## OP2(PTR, VAL, __ATOMIC_SEQ_CST)
++# endif
++#endif
++
++#define HAVE_ATOMICS_64 1
++#define HAVE_ATOMICS_64_SYNC 1
++
++#if (HAVE_ATOMICS_64)
++# if (HAVE_ATOMICS_64_SYNC)
++# define ATOMIC_OP64(OP1,OP2,PTR,VAL) __sync_ ## OP1 ## _and_ ## OP2(PTR, VAL)
++# else
++# define ATOMIC_OP64(OP1,OP2,PTR,VAL) __atomic_ ## OP1 ## _ ## OP2(PTR, VAL, __ATOMIC_SEQ_CST)
++# endif
++#endif
++
++
++#define WITH_ZLIB 1
++#define WITH_LIBDL 1
++#define WITH_PLUGINS 0
++#define WITH_SNAPPY 1
++#define WITH_SOCKEM 1
++#define WITH_SSL 1
++#define WITH_SASL 0
++#define WITH_SASL_SCRAM 0
++#define WITH_SASL_CYRUS 0
++#define HAVE_REGEX 1
++#define HAVE_STRNDUP 1
diff --git a/third_party/pcre.BUILD b/third_party/pcre.BUILD
index e2cdec4029..3a8e7a10b4 100644
--- a/third_party/pcre.BUILD
+++ b/third_party/pcre.BUILD
@@ -1,6 +1,6 @@
licenses(["notice"]) # BSD
-exports_files(["COPYING"])
+exports_files(["LICENCE"])
cc_library(
name = "pcre",
diff --git a/third_party/py/python_configure.bzl b/third_party/py/python_configure.bzl
index c16eb3a12a..954f21f5f8 100644
--- a/third_party/py/python_configure.bzl
+++ b/third_party/py/python_configure.bzl
@@ -118,7 +118,7 @@ def _symlink_genrule_for_dir(repository_ctx, src_dir, dest_dir, genrule_name,
if src_dir != None:
src_dir = _norm_path(src_dir)
dest_dir = _norm_path(dest_dir)
- files = _read_dir(repository_ctx, src_dir)
+ files = '\n'.join(sorted(_read_dir(repository_ctx, src_dir).splitlines()))
# Create a list with the src_dir stripped to use for outputs.
dest_files = files.replace(src_dir, '').splitlines()
src_files = files.splitlines()
diff --git a/third_party/termcolor.BUILD b/third_party/termcolor.BUILD
index 6000e3289d..655d7cb85e 100644
--- a/third_party/termcolor.BUILD
+++ b/third_party/termcolor.BUILD
@@ -3,7 +3,7 @@
licenses(["notice"]) # MIT
-exports_files(["LICENSE"])
+exports_files(["COPYING.txt"])
py_library(
name = "termcolor",