From 73e38c29c74d9d9bf7128bf4737a410ff005611e Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Mon, 2 Jul 2018 17:07:06 -0700 Subject: Merge changes from github. PiperOrigin-RevId: 203037623 --- README.md | 2 + RELEASE.md | 38 +- tensorflow/BUILD | 32 +- tensorflow/cc/gradients/array_grad.cc | 52 +++ tensorflow/cc/gradients/array_grad_test.cc | 7 + tensorflow/compiler/xla/service/cpu/BUILD | 9 +- tensorflow/contrib/BUILD | 2 +- .../contrib/autograph/converters/control_flow.py | 1 - tensorflow/contrib/cmake/CMakeLists.txt | 17 +- tensorflow/contrib/cmake/external/boringssl.cmake | 2 +- tensorflow/contrib/cmake/tf_core_framework.cmake | 9 - tensorflow/contrib/cmake/tf_core_kernels.cmake | 13 +- tensorflow/contrib/cmake/tf_stream_executor.cmake | 2 - .../contrib/estimator/python/estimator/head.py | 3 +- .../estimator/python/estimator/head_test.py | 27 ++ .../gan/python/estimator/python/head_impl.py | 13 +- .../gan/python/losses/python/losses_impl_test.py | 2 +- tensorflow/contrib/image/kernels/image_ops.cc | 2 + tensorflow/contrib/image/kernels/image_ops.h | 25 +- tensorflow/contrib/image/ops/image_ops.cc | 2 +- .../image/python/kernel_tests/image_ops_test.py | 3 +- tensorflow/contrib/image/python/ops/image_ops.py | 3 +- .../contrib/layers/python/layers/layers_test.py | 2 +- tensorflow/contrib/lite/java/demo/app/build.gradle | 36 ++ .../contrib/lite/kernels/expand_dims_test.cc | 4 +- .../contrib/lite/kernels/maximum_minimum_test.cc | 4 +- tensorflow/contrib/lite/kernels/neg_test.cc | 4 +- tensorflow/contrib/lite/kernels/select_test.cc | 24 +- .../contrib/lite/kernels/strided_slice_test.cc | 15 +- tensorflow/contrib/lite/kernels/test_util_test.cc | 12 +- tensorflow/contrib/lite/kernels/tile_test.cc | 16 +- tensorflow/contrib/lite/kernels/topk_v2_test.cc | 24 +- tensorflow/contrib/lite/python/tflite_convert.py | 2 +- tensorflow/contrib/mpi_collectives/BUILD | 1 + .../contrib/mpi_collectives/kernels/mpi_ops.cc | 2 +- tensorflow/contrib/opt/__init__.py | 1 + .../contrib/quantize/python/fold_batch_norms.py | 14 +- .../quantize/python/fold_batch_norms_test.py | 6 +- .../python/util/receptive_field_test.py | 2 +- .../tensorrt/test/tf_trt_integration_test.py | 401 +++++++++++++++------ .../core/api_def/base_api/api_def_GatherNd.pbtxt | 2 +- .../core/api_def/base_api/api_def_LinSpace.pbtxt | 6 +- .../base_api/api_def_MatrixExponential.pbtxt | 2 +- .../api_def/base_api/api_def_MatrixLogarithm.pbtxt | 2 +- .../core/api_def/base_api/api_def_ReduceJoin.pbtxt | 2 +- .../api_def/base_api/api_def_ScatterNdAdd.pbtxt | 6 +- .../base_api/api_def_ScatterNdNonAliasingAdd.pbtxt | 6 +- .../api_def/base_api/api_def_ScatterNdSub.pbtxt | 6 +- .../api_def/base_api/api_def_ScatterNdUpdate.pbtxt | 6 +- .../core/api_def/base_api/api_def_Softmax.pbtxt | 2 +- .../base_api/api_def_SparseApplyAdagrad.pbtxt | 4 +- .../api_def_SparseApplyCenteredRMSProp.pbtxt | 6 +- .../api_def/base_api/api_def_SparseApplyFtrl.pbtxt | 10 +- .../base_api/api_def_SparseApplyMomentum.pbtxt | 4 +- .../api_def_SparseApplyProximalAdagrad.pbtxt | 8 +- ...pi_def_SparseApplyProximalGradientDescent.pbtxt | 4 +- .../base_api/api_def_SparseApplyRMSProp.pbtxt | 6 +- .../api_def/base_api/api_def_SparseSliceGrad.pbtxt | 40 ++ .../base_api/api_def_UnsortedSegmentSum.pbtxt | 2 +- .../api_def/python_api/api_def_BroadcastTo.pbtxt | 4 - .../python_api/api_def_SparseSliceGrad.pbtxt | 4 + tensorflow/core/kernels/BUILD | 7 + tensorflow/core/kernels/conv_ops_test.cc | 4 +- tensorflow/core/kernels/mkl_concat_op.cc | 6 +- tensorflow/core/kernels/sparse_slice_grad_op.cc | 126 +++++++ tensorflow/core/lib/db/sqlite_test.cc | 15 + tensorflow/core/ops/sparse_ops.cc | 14 + tensorflow/core/ops/sparse_ops_test.cc | 12 + tensorflow/docs_src/get_started/_index.yaml | 12 +- tensorflow/docs_src/get_started/leftnav_files | 6 +- tensorflow/docs_src/get_started/next_steps.md | 2 +- tensorflow/docs_src/guide/custom_estimators.md | 8 +- tensorflow/docs_src/guide/keras.md | 24 +- tensorflow/docs_src/install/install_sources.md | 22 +- tensorflow/docs_src/mobile/tflite/demo_android.md | 23 +- tensorflow/docs_src/tutorials/layers.md | 45 +-- tensorflow/go/op/wrappers.go | 12 +- tensorflow/java/src/gen/cc/source_writer.cc | 1 + .../java/src/main/java/org/tensorflow/Graph.java | 79 ++++ .../java/org/tensorflow/op/core/Gradients.java | 153 ++++++++ tensorflow/java/src/main/native/graph_jni.cc | 54 +++ tensorflow/java/src/main/native/graph_jni.h | 9 + tensorflow/java/src/main/native/session_jni.cc | 32 +- tensorflow/java/src/main/native/utils_jni.cc | 53 +++ tensorflow/java/src/main/native/utils_jni.h | 33 ++ .../src/test/java/org/tensorflow/GraphTest.java | 103 ++++++ .../src/test/java/org/tensorflow/SessionTest.java | 38 +- .../src/test/java/org/tensorflow/TestUtil.java | 34 +- tensorflow/python/estimator/model_fn.py | 4 +- tensorflow/python/framework/ops.py | 30 +- tensorflow/python/framework/ops_test.py | 9 + .../python/grappler/layout_optimizer_test.py | 4 +- tensorflow/python/kernel_tests/BUILD | 1 + tensorflow/python/kernel_tests/init_ops_test.py | 40 +- tensorflow/python/kernel_tests/shape_ops_test.py | 23 ++ .../python/kernel_tests/sparse_slice_op_test.py | 22 +- tensorflow/python/ops/array_grad.py | 8 +- tensorflow/python/ops/control_flow_ops.py | 1 + tensorflow/python/ops/init_ops.py | 24 +- tensorflow/python/ops/losses/losses_impl.py | 3 +- tensorflow/python/ops/nn_ops.py | 3 +- tensorflow/python/ops/sparse_grad.py | 29 ++ tensorflow/stream_executor/BUILD | 9 + .../tools/api/generator/create_python_api.py | 2 +- .../tensorflow.initializers.variance_scaling.pbtxt | 2 +- ...flow.keras.initializers.-variance-scaling.pbtxt | 2 +- tensorflow/tools/api/golden/tensorflow.pbtxt | 4 + .../tensorflow.variance_scaling_initializer.pbtxt | 2 +- tensorflow/tools/ci_build/Dockerfile.cpu.ppc64le | 1 + tensorflow/tools/ci_build/Dockerfile.gpu.ppc64le | 1 + tensorflow/tools/ci_build/Dockerfile.rbe.cpu | 4 +- .../tools/ci_build/ci_parameterized_build.sh | 8 + .../tools/ci_build/install/install_hdf5_ppc64le.sh | 30 ++ .../ci_build/linux/mkl/build-dev-container.sh | 53 +++ tensorflow/tools/ci_build/pi/build_raspberry_pi.sh | 4 + tensorflow/tools/ci_build/update_version.py | 2 +- tensorflow/tools/docker/Dockerfile.devel-mkl | 128 +++++++ tensorflow/tools/docker/Dockerfile.mkl | 75 ++++ .../tools/docker/parameterized_docker_build.sh | 142 ++++++-- tensorflow/tools/pip_package/setup.py | 12 +- tensorflow/workspace.bzl | 8 +- third_party/eigen.BUILD | 6 + third_party/eigen3/BUILD | 60 ++- third_party/kafka/BUILD | 5 +- third_party/repo.bzl | 1 - third_party/sqlite.BUILD | 1 + third_party/toolchains/BUILD | 22 ++ 127 files changed, 2136 insertions(+), 544 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_SparseSliceGrad.pbtxt delete mode 100644 tensorflow/core/api_def/python_api/api_def_BroadcastTo.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseSliceGrad.pbtxt create mode 100644 tensorflow/core/kernels/sparse_slice_grad_op.cc create mode 100644 tensorflow/java/src/main/java/org/tensorflow/op/core/Gradients.java create mode 100644 tensorflow/java/src/main/native/utils_jni.cc create mode 100644 tensorflow/java/src/main/native/utils_jni.h create mode 100755 tensorflow/tools/ci_build/install/install_hdf5_ppc64le.sh create mode 100755 tensorflow/tools/ci_build/linux/mkl/build-dev-container.sh create mode 100755 tensorflow/tools/docker/Dockerfile.devel-mkl create mode 100755 tensorflow/tools/docker/Dockerfile.mkl create mode 100644 third_party/toolchains/BUILD diff --git a/README.md b/README.md index 42d7bbc104..05fcb23f7e 100644 --- a/README.md +++ b/README.md @@ -96,6 +96,8 @@ The TensorFlow project strives to abide by generally accepted best practices in | --- | --- | --- | | **IBM s390x** | [![Build Status](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/badge/icon)](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/) | TBA | | **IBM ppc64le CPU** | [![Build Status](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_CPU/badge/icon)](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_CPU/) | TBA | +| **IBM ppc64le GPU** | [![Build Status](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_PPC64LE_GPU/badge/icon)](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_PPC64LE_GPU/) | TBA | +| **Linux CPU with IntelĀ® MKL-DNNĀ®** | [![Build Status](https://tensorflow-ci.intel.com/job/tensorflow-mkl-linux-cpu/badge/icon)](https://tensorflow-ci.intel.com/job/tensorflow-mkl-linux-cpu/) | TBA | ## For more information diff --git a/RELEASE.md b/RELEASE.md index 377a8eda37..4b03394427 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,18 +1,38 @@ # Release 1.9.0 ## Major Features And Improvements -* Update tf.keras to the Keras 2.1.6 API. +* Updated docs for `tf.keras`: New Keras-based [get started](http://tensorflow.org/versions/r1.9/get_started), + and [programmers guide page](http://tensorflow.org/versions/r1.9/programmers_guide/keras). +* Update `tf.keras` to the Keras 2.1.6 API. +* Added [`tf.keras.layers.CuDNNGRU`](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/keras/layers/CuDNNGRU) and [`tf.keras.layers.CuDNNLSTM`](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/keras/layers/CuDNNLSTM) layers. [Try it](https://colab.sandbox.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb?linkId=53292082). +* Adding support of core [feature columns](https://www.tensorflow.org/get_started/feature_columns) and [losses](https://www.tensorflow.org/api_docs/python/tf/losses) to [gradient boosted trees estimators](https://github.com/tensorflow/models/tree/master/official/boosted_trees). +* The [python interface](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/contrib/lite) + for the [TFLite Optimizing Converter](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/toco/README.md) + has been expanded, and the command line interface (AKA: `toco`, `tflite_convert`) is once again + included in the standard `pip` installation. +* Improved data-loading and text processing with: + * [`tf.decode_compressed`](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/decode_compressed) + * [`tf.string_strip`](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/string_strip) + * [`tf.strings.regex_full_match`](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/strings/regex_full_match) +* Added experimental support for new pre-made Estimators: + * [`tf.contrib.estimator.BaselineEstimator`](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/contrib/estimator/BaselineEstimator) + * [`tf.contrib.estimator.RNNClassifier`](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/contrib/estimator/RNNEstimator) + * [`tf.contrib.estimator.RNNEstimator`](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/contrib/estimator/RNNClassifier) +* The [distributions.Bijector](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/contrib/distributions/bijectors/Bijector) + API supports broadcasting for Bijectors with new API changes. + +## Breaking Chances + * If you're opening empty variable scopes; replace `variable_scope('', ...)` by + `variable_scope(tf.get_variable_scope(), ...)`. + * Headers used for building custom ops have been moved from site-packages/external into site-packages/tensorflow/include/external. + +## Bug Fixes and Other Changes + * `tfe.Network` is deprecated. Please inherit from `tf.keras.Model`. -* Adding support of core feature columns and losses to gradient boosted trees estimators. -* The distributions.Bijector API supports broadcasting for Bijectors with new API changes. See [here](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/distributions/bijectors/Bijector) for more details. * Layered variable names have changed in the following conditions: * Using `tf.keras.layers` with custom variable scopes. - * Using `tf.layers` in a subclassed `tf.keras.Model` class. See [here](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/layers) for more details - -## Breaking Changes - * If you're opening empty variable scopes; replace `variable_scope`('', ...) by `variable_scope`(`tf.get_variable_scope()`, ...). - -## Bug Fixes and Other Changes + * Using `tf.layers` in a subclassed `tf.keras.Model` class. See + [here](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/layers) for more details * `tf.data`: * The `DatasetBase::DebugString()` method is now `const`. * Added the `tf.contrib.data.sample_from_datasets()` API for randomly sampling from multiple datasets. diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 0bce474dfa..f362900387 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -438,6 +438,22 @@ filegroup( data = glob(["docs_src/**/*.md"]), ) +cc_library( + name = "grpc", + deps = select({ + ":linux_s390x": ["@grpc//:grpc_unsecure"], + "//conditions:default": ["@grpc"], + }), +) + +cc_library( + name = "grpc++", + deps = select({ + ":linux_s390x": ["@grpc//:grpc++_unsecure"], + "//conditions:default": ["@grpc//:grpc++"], + }), +) + # A shared object which includes registration mechanisms for ops and # kernels. Does not include the implementations of any ops or kernels. Instead, # the library which loads libtensorflow_framework.so @@ -587,19 +603,3 @@ py_library( visibility = ["//visibility:public"], deps = ["//tensorflow/python:no_contrib"], ) - -cc_library( - name = "grpc", - deps = select({ - ":linux_s390x": ["@grpc//:grpc_unsecure"], - "//conditions:default": ["@grpc"], - }), -) - -cc_library( - name = "grpc++", - deps = select({ - ":linux_s390x": ["@grpc//:grpc++_unsecure"], - "//conditions:default": ["@grpc//:grpc++"], - }), -) diff --git a/tensorflow/cc/gradients/array_grad.cc b/tensorflow/cc/gradients/array_grad.cc index ff348fadb2..b353accddc 100644 --- a/tensorflow/cc/gradients/array_grad.cc +++ b/tensorflow/cc/gradients/array_grad.cc @@ -421,6 +421,58 @@ Status StridedSliceGradHelper(const Scope& scope, const Operation& op, } REGISTER_GRADIENT_OP("StridedSlice", StridedSliceGradHelper); +Status SliceGrad(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, + std::vector* grad_outputs) { + // Propagate the incoming gradient along all the selected values, + // and zero everywhere else. Use the Pad operator for this. + // + // First create an Nx2 padding where N is the number of input + // dimensions. The first column is the number of prepended zeros + // for each dimension, and the second column is the number of + // appended zeros. + // + // The first column is just the begin vector. + // The second column is the shape of the input element-wise + // subtracted by begin+size + + // Running example: + // input.shape = [3, 5, 3] + // begin = [1, 2, 1], size = [1, 3, 2] + Input input = op.input(0); + Input begin = op.input(1); + // input_rank = 3 + auto input_rank = Rank(scope, input); + // slice_size = [1, 3, 2] + auto slice_size = Shape(scope, op.output(0)); + // padding_shape = [3, 1] + auto padding_shape = Stack(scope, {input_rank, 1}); + // before_padding = [[1] + // [2] + // [1]] + Input before_padding = Reshape(scope, begin, padding_shape); + // after_padding_sizes = shape(input) - slice_size - begin + // = [3, 5, 3] - [1, 3, 2] - [1, 2, 1] + // = [1, 0, 0] + auto after_padding_sizes = + Sub(scope, Sub(scope, Shape(scope, input), slice_size), begin); + // after_padding = [[1] + // [0] + // [0]] + Input after_padding = Reshape(scope, after_padding_sizes, padding_shape); + // paddings = [[1 1] + // [2 0] + // [1 0]] + auto paddings = + Concat(scope, {before_padding, after_padding}, Const(scope, 1)); + grad_outputs->push_back(Pad(scope, grad_inputs[0], paddings)); + // Nothing propagated for "begin" and "size" inputs + grad_outputs->push_back(NoGradient()); + grad_outputs->push_back(NoGradient()); + return scope.status(); +} +REGISTER_GRADIENT_OP("Slice", SliceGrad); + } // anonymous namespace } // namespace ops } // namespace tensorflow diff --git a/tensorflow/cc/gradients/array_grad_test.cc b/tensorflow/cc/gradients/array_grad_test.cc index de3bd0fc9e..d09275b648 100644 --- a/tensorflow/cc/gradients/array_grad_test.cc +++ b/tensorflow/cc/gradients/array_grad_test.cc @@ -378,5 +378,12 @@ TEST_F(ArrayGradTest, StridedSliceGrad) { RunTest(x, x_shape, y, {1, 2, 2, 2}); } +TEST_F(ArrayGradTest, SliceGrad) { + TensorShape x_shape({3, 5, 3}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape)); + auto y = Slice(scope_, x, {1, 2, 1}, {1, 3, 2}); + RunTest(x, x_shape, y, {1, 3, 2}); +} + } // namespace } // namespace tensorflow diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index f68db13428..3479240610 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -128,7 +128,14 @@ cc_library( "@llvm//:target", # fixdeps: keep "@llvm//:x86_code_gen", # fixdeps: keep "@llvm//:x86_disassembler", # fixdeps: keep - ], + ] + select({ + "//tensorflow:linux_ppc64le": [ + "@llvm//:powerpc_disassembler", + "@llvm//:powerpc_code_gen", + ], + "//conditions:default": [ + ], + }), alwayslink = True, # Contains compiler registration ) diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index 8974e6867d..5ce44c01b8 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -125,9 +125,9 @@ py_library( }) + if_not_windows_cuda([ "//tensorflow/contrib/fused_conv:fused_conv_py", # unresolved symbols, need to export more symbols ]) + if_not_windows([ - "//tensorflow/contrib/ffmpeg:ffmpeg_ops_py", "//tensorflow/contrib/cloud:cloud_py", # depends on bigtable "//tensorflow/contrib/bigtable", # doesn't compile on Windows + "//tensorflow/contrib/ffmpeg:ffmpeg_ops_py", "//tensorflow/contrib/lite/python:lite", # unix dependency, need to fix code ]), ) diff --git a/tensorflow/contrib/autograph/converters/control_flow.py b/tensorflow/contrib/autograph/converters/control_flow.py index 22a671262c..f4a8710627 100644 --- a/tensorflow/contrib/autograph/converters/control_flow.py +++ b/tensorflow/contrib/autograph/converters/control_flow.py @@ -47,7 +47,6 @@ class SymbolNamer(object): class ControlFlowTransformer(converter.Base): """Transforms control flow structures like loops an conditionals.""" - def _create_cond_branch(self, body_name, aliased_orig_names, aliased_new_names, body, returns): if aliased_orig_names: diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index 4ca7a1b28c..a0a5b0e00c 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -299,17 +299,20 @@ include_directories( ${double_conversion_INCLUDE_DIR} ) -if(tensorflow_ENABLE_SSL_SUPPORT) - include(boringssl) - list(APPEND tensorflow_EXTERNAL_LIBRARIES ${boringssl_STATIC_LIBRARIES}) - list(APPEND tensorflow_EXTERNAL_DEPENDENCIES boringssl) - include_directories(${boringssl_INCLUDE_DIR}) -endif() if(tensorflow_ENABLE_GRPC_SUPPORT) + if(tensorflow_ENABLE_SSL_SUPPORT) + include(boringssl) + include_directories(${boringssl_INCLUDE_DIR}) + endif() include(grpc) + include_directories(${GRPC_INCLUDE_DIRS}) + # Place boringssl after grpc as grpc depends on boringssl. list(APPEND tensorflow_EXTERNAL_LIBRARIES ${grpc_STATIC_LIBRARIES}) list(APPEND tensorflow_EXTERNAL_DEPENDENCIES grpc) - include_directories(${GRPC_INCLUDE_DIRS}) + if(tensorflow_ENABLE_SSL_SUPPORT) + list(APPEND tensorflow_EXTERNAL_LIBRARIES ${boringssl_STATIC_LIBRARIES}) + list(APPEND tensorflow_EXTERNAL_DEPENDENCIES boringssl) + endif() endif() if(tensorflow_ENABLE_JEMALLOC_SUPPORT) include(jemalloc) diff --git a/tensorflow/contrib/cmake/external/boringssl.cmake b/tensorflow/contrib/cmake/external/boringssl.cmake index 3c4bb01e24..fbb14b2515 100644 --- a/tensorflow/contrib/cmake/external/boringssl.cmake +++ b/tensorflow/contrib/cmake/external/boringssl.cmake @@ -17,7 +17,7 @@ include (ExternalProject) set(boringssl_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/boringssl/src/boringssl/include) #set(boringssl_EXTRA_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/boringssl/src) set(boringssl_URL https://boringssl.googlesource.com/boringssl) -set(boringssl_TAG ee7aa02) +set(boringssl_TAG 7f8c553d7f4db0a6ce727f2986d41bf8fe8ec4bf) set(boringssl_BUILD ${CMAKE_BINARY_DIR}/boringssl/src/boringssl-build) #set(boringssl_LIBRARIES ${boringssl_BUILD}/obj/so/libboringssl.so) set(boringssl_STATIC_LIBRARIES diff --git a/tensorflow/contrib/cmake/tf_core_framework.cmake b/tensorflow/contrib/cmake/tf_core_framework.cmake index 9f02d6cbab..872b016d2b 100644 --- a/tensorflow/contrib/cmake/tf_core_framework.cmake +++ b/tensorflow/contrib/cmake/tf_core_framework.cmake @@ -236,15 +236,6 @@ if(WIN32) list(APPEND tf_core_lib_srcs ${tf_core_platform_windows_srcs}) endif(WIN32) -if(tensorflow_ENABLE_SSL_SUPPORT) - # Cloud libraries require boringssl. - file(GLOB tf_core_platform_cloud_srcs - "${tensorflow_source_dir}/tensorflow/core/platform/cloud/*.h" - "${tensorflow_source_dir}/tensorflow/core/platform/cloud/*.cc" - ) - list(APPEND tf_core_lib_srcs ${tf_core_platform_cloud_srcs}) -endif() - if (tensorflow_ENABLE_HDFS_SUPPORT) list(APPEND tf_core_platform_hdfs_srcs "${tensorflow_source_dir}/tensorflow/core/platform/hadoop/hadoop_file_system.cc" diff --git a/tensorflow/contrib/cmake/tf_core_kernels.cmake b/tensorflow/contrib/cmake/tf_core_kernels.cmake index 2d76bf530a..844f62649d 100644 --- a/tensorflow/contrib/cmake/tf_core_kernels.cmake +++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake @@ -134,14 +134,13 @@ if(tensorflow_BUILD_CONTRIB_KERNELS) list(APPEND tf_core_kernels_srcs ${tf_contrib_kernels_srcs}) endif(tensorflow_BUILD_CONTRIB_KERNELS) -if(NOT tensorflow_ENABLE_SSL_SUPPORT) - # Cloud libraries require boringssl. - file(GLOB tf_core_kernels_cloud_srcs - "${tensorflow_source_dir}/tensorflow/contrib/cloud/kernels/*.h" - "${tensorflow_source_dir}/tensorflow/contrib/cloud/kernels/*.cc" - ) +# Cloud libraries require curl and boringssl. +# Curl is not supported yet anyway so we remove for now. +file(GLOB tf_core_kernels_cloud_srcs + "${tensorflow_source_dir}/tensorflow/contrib/cloud/kernels/*.h" + "${tensorflow_source_dir}/tensorflow/contrib/cloud/kernels/*.cc" +) list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_cloud_srcs}) -endif() file(GLOB_RECURSE tf_core_kernels_exclude_srcs "${tensorflow_source_dir}/tensorflow/core/kernels/*test*.h" diff --git a/tensorflow/contrib/cmake/tf_stream_executor.cmake b/tensorflow/contrib/cmake/tf_stream_executor.cmake index 2f70e59d54..6d634cb170 100644 --- a/tensorflow/contrib/cmake/tf_stream_executor.cmake +++ b/tensorflow/contrib/cmake/tf_stream_executor.cmake @@ -64,8 +64,6 @@ file(GLOB tf_stream_executor_srcs if (tensorflow_ENABLE_GPU) file(GLOB tf_stream_executor_gpu_srcs "${tensorflow_source_dir}/tensorflow/stream_executor/cuda/*.cc" - "${tensorflow_source_dir}/tensorflow/compiler/xla/statusor.h" - "${tensorflow_source_dir}/tensorflow/compiler/xla/statusor.cc" ) if (NOT tensorflow_BUILD_CC_TESTS) file(GLOB tf_stream_executor_gpu_tests diff --git a/tensorflow/contrib/estimator/python/estimator/head.py b/tensorflow/contrib/estimator/python/estimator/head.py index 9594e5132f..c9d86ef4ab 100644 --- a/tensorflow/contrib/estimator/python/estimator/head.py +++ b/tensorflow/contrib/estimator/python/estimator/head.py @@ -534,7 +534,8 @@ def multi_label_head(n_classes, * An integer `SparseTensor` of class indices. The `dense_shape` must be `[D0, D1, ... DN, ?]` and the values within `[0, n_classes)`. * If `label_vocabulary` is given, a string `SparseTensor`. The `dense_shape` - must be `[D0, D1, ... DN, ?]` and the values within `label_vocabulary`. + must be `[D0, D1, ... DN, ?]` and the values within `label_vocabulary` or a + multi-hot tensor of shape `[D0, D1, ... DN, n_classes]`. If `weight_column` is specified, weights must be of shape `[D0, D1, ... DN]`, or `[D0, D1, ... DN, 1]`. diff --git a/tensorflow/contrib/estimator/python/estimator/head_test.py b/tensorflow/contrib/estimator/python/estimator/head_test.py index b2b57fa06b..7b884402d4 100644 --- a/tensorflow/contrib/estimator/python/estimator/head_test.py +++ b/tensorflow/contrib/estimator/python/estimator/head_test.py @@ -568,6 +568,33 @@ class MultiLabelHead(test.TestCase): expected_loss=expected_loss, expected_metrics=expected_metrics) + def test_eval_with_label_vocabulary_with_multi_hot_input(self): + n_classes = 2 + head = head_lib.multi_label_head( + n_classes, label_vocabulary=['class0', 'class1']) + logits = np.array([[-1., 1.], [-1.5, 1.5]], dtype=np.float32) + labels_multi_hot = np.array([[1, 0], [1, 1]], dtype=np.int64) + # loss = labels * -log(sigmoid(logits)) + + # (1 - labels) * -log(1 - sigmoid(logits)) + # Sum over examples, divide by batch_size. + expected_loss = 0.5 * np.sum( + _sigmoid_cross_entropy(labels=labels_multi_hot, logits=logits)) + keys = metric_keys.MetricKeys + expected_metrics = { + # Average loss over examples. + keys.LOSS_MEAN: expected_loss, + # auc and auc_pr cannot be reliably calculated for only 4 samples, but + # this assert tests that the algorithm remains consistent. + keys.AUC: 0.3333, + keys.AUC_PR: 0.7639, + } + self._test_eval( + head=head, + logits=logits, + labels=labels_multi_hot, + expected_loss=expected_loss, + expected_metrics=expected_metrics) + def test_eval_with_thresholds(self): n_classes = 2 thresholds = [0.25, 0.5, 0.75] diff --git a/tensorflow/contrib/gan/python/estimator/python/head_impl.py b/tensorflow/contrib/gan/python/estimator/python/head_impl.py index 5b5557bd8f..d1441e1eb2 100644 --- a/tensorflow/contrib/gan/python/estimator/python/head_impl.py +++ b/tensorflow/contrib/gan/python/estimator/python/head_impl.py @@ -103,9 +103,20 @@ class GANHead(head._Head): # pylint: disable=protected-access name: name of the head. If provided, summary and metrics keys will be suffixed by `"/" + name`. """ + + if not callable(generator_loss_fn): + raise TypeError('generator_loss_fn must be callable.') + if not callable(discriminator_loss_fn): + raise TypeError('discriminator_loss_fn must be callable.') + if not use_loss_summaries in [True, False, None]: + raise ValueError('use_loss_summaries must be True, False or None.') + if get_hooks_fn is not None and not callable(get_hooks_fn): + raise TypeError('get_hooks_fn must be callable.') + if name is not None and not isinstance(name, str): + raise TypeError('name must be string.') + if get_hooks_fn is None: get_hooks_fn = tfgan_train.get_sequential_train_hooks() - # TODO(joelshor): Validate inputs. if use_loss_summaries in [True, False]: generator_loss_fn = functools.partial( diff --git a/tensorflow/contrib/gan/python/losses/python/losses_impl_test.py b/tensorflow/contrib/gan/python/losses/python/losses_impl_test.py index 2889e93743..9f5fee4542 100644 --- a/tensorflow/contrib/gan/python/losses/python/losses_impl_test.py +++ b/tensorflow/contrib/gan/python/losses/python/losses_impl_test.py @@ -570,7 +570,7 @@ class MutualInformationPenaltyTest(test.TestCase, _PenaltyTest): 'predicted_distributions': self._predicted_distributions, } self._expected_loss = 1.61610 - self._expected_op_name = 'mutual_information_loss/mul' + self._expected_op_name = 'mutual_information_loss/mul_1' self._batch_size = 2 diff --git a/tensorflow/contrib/image/kernels/image_ops.cc b/tensorflow/contrib/image/kernels/image_ops.cc index c2e32da133..022e17d139 100644 --- a/tensorflow/contrib/image/kernels/image_ops.cc +++ b/tensorflow/contrib/image/kernels/image_ops.cc @@ -35,6 +35,7 @@ typedef Eigen::ThreadPoolDevice CPUDevice; template struct FillProjectiveTransform; template struct FillProjectiveTransform; template struct FillProjectiveTransform; +template struct FillProjectiveTransform; template struct FillProjectiveTransform; template struct FillProjectiveTransform; @@ -99,6 +100,7 @@ class ImageProjectiveTransform : public OpKernel { TF_CALL_uint8(REGISTER); TF_CALL_int32(REGISTER); TF_CALL_int64(REGISTER); +TF_CALL_half(REGISTER); TF_CALL_float(REGISTER); TF_CALL_double(REGISTER); diff --git a/tensorflow/contrib/image/kernels/image_ops.h b/tensorflow/contrib/image/kernels/image_ops.h index 8408fd6f2e..209aa24548 100644 --- a/tensorflow/contrib/image/kernels/image_ops.h +++ b/tensorflow/contrib/image/kernels/image_ops.h @@ -21,6 +21,7 @@ limitations under the License. #define EIGEN_USE_THREADS #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/platform/types.h" @@ -110,21 +111,21 @@ class ProjectiveGenerator { // f(x, y_floor) = (x_ceil - x) / (x_ceil - x_floor) * f(x_floor, y_floor) // + (x - x_floor) / (x_ceil - x_floor) * f(x_ceil, y_floor) const float value_yfloor = - (x_ceil - x) * read_with_fill_value(batch, DenseIndex(y_floor), - DenseIndex(x_floor), channel, - fill_value) + - (x - x_floor) * read_with_fill_value(batch, DenseIndex(y_floor), - DenseIndex(x_ceil), channel, - fill_value); + (x_ceil - x) * static_cast(read_with_fill_value( + batch, DenseIndex(y_floor), DenseIndex(x_floor), + channel, fill_value)) + + (x - x_floor) * static_cast(read_with_fill_value( + batch, DenseIndex(y_floor), DenseIndex(x_ceil), + channel, fill_value)); // f(x, y_ceil) = (x_ceil - x) / (x_ceil - x_floor) * f(x_floor, y_ceil) // + (x - x_floor) / (x_ceil - x_floor) * f(x_ceil, y_ceil) const float value_yceil = - (x_ceil - x) * read_with_fill_value(batch, DenseIndex(y_ceil), - DenseIndex(x_floor), channel, - fill_value) + - (x - x_floor) * read_with_fill_value(batch, DenseIndex(y_ceil), - DenseIndex(x_ceil), channel, - fill_value); + (x_ceil - x) * static_cast(read_with_fill_value( + batch, DenseIndex(y_ceil), DenseIndex(x_floor), + channel, fill_value)) + + (x - x_floor) * static_cast(read_with_fill_value( + batch, DenseIndex(y_ceil), DenseIndex(x_ceil), + channel, fill_value)); // f(x, y) = (y_ceil - y) / (y_ceil - y_floor) * f(x, y_floor) // + (y - y_floor) / (y_ceil - y_floor) * f(x, y_ceil) return T((y_ceil - y) * value_yfloor + (y - y_floor) * value_yceil); diff --git a/tensorflow/contrib/image/ops/image_ops.cc b/tensorflow/contrib/image/ops/image_ops.cc index ebdcaea7ab..e59f1bf844 100644 --- a/tensorflow/contrib/image/ops/image_ops.cc +++ b/tensorflow/contrib/image/ops/image_ops.cc @@ -29,7 +29,7 @@ using shape_inference::ShapeHandle; REGISTER_OP("ImageProjectiveTransform") .Input("images: dtype") .Input("transforms: float32") - .Attr("dtype: {uint8, int32, int64, float32, float64}") + .Attr("dtype: {uint8, int32, int64, float16, float32, float64}") .Attr("interpolation: string") .Output("transformed_images: dtype") .SetShapeFn([](InferenceContext* c) { diff --git a/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py b/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py index 6c9ff858ab..62a22dcf34 100644 --- a/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py +++ b/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py @@ -30,7 +30,8 @@ from tensorflow.python.ops import math_ops from tensorflow.python.platform import googletest _DTYPES = set( - [dtypes.uint8, dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64]) + [dtypes.uint8, dtypes.int32, dtypes.int64, + dtypes.float16, dtypes.float32, dtypes.float64]) class ImageOpsTest(test_util.TensorFlowTestCase): diff --git a/tensorflow/contrib/image/python/ops/image_ops.py b/tensorflow/contrib/image/python/ops/image_ops.py index cd984c8054..86b0ffe9a0 100644 --- a/tensorflow/contrib/image/python/ops/image_ops.py +++ b/tensorflow/contrib/image/python/ops/image_ops.py @@ -33,7 +33,8 @@ _image_ops_so = loader.load_op_library( resource_loader.get_path_to_datafile("_image_ops.so")) _IMAGE_DTYPES = set( - [dtypes.uint8, dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64]) + [dtypes.uint8, dtypes.int32, dtypes.int64, + dtypes.float16, dtypes.float32, dtypes.float64]) ops.RegisterShape("ImageConnectedComponents")(common_shapes.call_cpp_shape_fn) ops.RegisterShape("ImageProjectiveTransform")(common_shapes.call_cpp_shape_fn) diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py index 0e8c89fe3a..c5c7269b1f 100644 --- a/tensorflow/contrib/layers/python/layers/layers_test.py +++ b/tensorflow/contrib/layers/python/layers/layers_test.py @@ -1356,7 +1356,7 @@ class DropoutTest(test.TestCase): with self.test_session(): images = np.random.uniform(size=(5, height, width, 3)) output = _layers.dropout(images) - self.assertEqual(output.op.name, 'Dropout/dropout/mul') + self.assertEqual(output.op.name, 'Dropout/dropout_1/mul') output.get_shape().assert_is_compatible_with( ops.convert_to_tensor(images).get_shape()) diff --git a/tensorflow/contrib/lite/java/demo/app/build.gradle b/tensorflow/contrib/lite/java/demo/app/build.gradle index 908549321b..49868c5a75 100644 --- a/tensorflow/contrib/lite/java/demo/app/build.gradle +++ b/tensorflow/contrib/lite/java/demo/app/build.gradle @@ -57,3 +57,39 @@ dependencies { testCompile 'junit:junit:4.12' } + +def modelDownloadUrl = "https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip" +def localCache = "build/intermediates/mobilenet_v1_224_android_quant_2017_11_08.zip" +def targetFolder = "src/main/assets" + +task downloadModel(type: DownloadUrlTask) { + doFirst { + println "Downloading ${modelDownloadUrl}" + } + sourceUrl = "${modelDownloadUrl}" + target = file("${localCache}") +} + +task unzipModel(type: Copy, dependsOn: 'downloadModel') { + doFirst { + println "Unzipping ${localCache}" + } + from zipTree("${localCache}") + into "${targetFolder}" +} + +// Ensure the model file is downloaded and extracted before every build +preBuild.dependsOn unzipModel + +class DownloadUrlTask extends DefaultTask { + @Input + String sourceUrl + + @OutputFile + File target + + @TaskAction + void download() { + ant.get(src: sourceUrl, dest: target) + } +} diff --git a/tensorflow/contrib/lite/kernels/expand_dims_test.cc b/tensorflow/contrib/lite/kernels/expand_dims_test.cc index b755e8ce29..50dc860e5a 100644 --- a/tensorflow/contrib/lite/kernels/expand_dims_test.cc +++ b/tensorflow/contrib/lite/kernels/expand_dims_test.cc @@ -39,7 +39,7 @@ class ExpandDimsOpModel : public SingleOpModel { void SetInputFloat(std::initializer_list data) { PopulateTensor(input_, data); } - void SetAxis(int axis) { PopulateTensor(axis_, {axis}); } + void SetAxis(int axis) { PopulateTensor(axis_, {axis}); } std::vector GetValuesFloat() { return ExtractVector(output_); } std::vector GetOutputShape() { return GetTensorShape(output_); } @@ -51,7 +51,7 @@ class ExpandDimsOpModel : public SingleOpModel { TEST(ExpandDimsOpTest, DifferentAxis) { ExpandDimsOpModel m({2, 2}, TensorType_FLOAT32); - const auto values = {-1.f, 1.f, -2.f, 2.f}; + std::initializer_list values = {-1.f, 1.f, -2.f, 2.f}; m.SetInputFloat(values); m.SetAxis(0); m.Invoke(); diff --git a/tensorflow/contrib/lite/kernels/maximum_minimum_test.cc b/tensorflow/contrib/lite/kernels/maximum_minimum_test.cc index 0752aa1804..fd4d5367c5 100644 --- a/tensorflow/contrib/lite/kernels/maximum_minimum_test.cc +++ b/tensorflow/contrib/lite/kernels/maximum_minimum_test.cc @@ -126,10 +126,10 @@ TEST(MaximumOpTest, FloatWithBroadcastTest) { TEST(MaximumOpTest, Int32WithBroadcastTest) { std::initializer_list data1 = {1, 0, -1, -2, 3, 11}; std::initializer_list data2 = {2}; - TestModel(BuiltinOperator_MAXIMUM, {TensorType_INT32, {3, 1, 2}}, + TestModel(BuiltinOperator_MAXIMUM, {TensorType_INT32, {3, 1, 2}}, {TensorType_INT32, {1}}, {TensorType_INT32, {3, 1, 2}}, data1, data2, {2, 2, 2, 2, 3, 11}); - TestModel(BuiltinOperator_MINIMUM, {TensorType_INT32, {3, 1, 2}}, + TestModel(BuiltinOperator_MINIMUM, {TensorType_INT32, {3, 1, 2}}, {TensorType_INT32, {1}}, {TensorType_INT32, {3, 1, 2}}, data1, data2, {1, 0, -1, -2, 2, 2}); } diff --git a/tensorflow/contrib/lite/kernels/neg_test.cc b/tensorflow/contrib/lite/kernels/neg_test.cc index 3c95ac8cc2..3d3594c60b 100644 --- a/tensorflow/contrib/lite/kernels/neg_test.cc +++ b/tensorflow/contrib/lite/kernels/neg_test.cc @@ -58,9 +58,9 @@ TEST(NegOpModel, NegFloat) { TEST(NegOpModel, NegInt32) { NegOpModel m({TensorType_INT32, {2, 3}}, {TensorType_INT32, {2, 3}}); - m.SetInput({-2, -1, 0, 1, 2, 3}); + m.SetInput({-2, -1, 0, 1, 2, 3}); m.Invoke(); - EXPECT_THAT(m.GetOutput(), ElementsAreArray({2, 1, 0, -1, -2, -3})); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({2, 1, 0, -1, -2, -3})); } TEST(NegOpModel, NegInt64) { diff --git a/tensorflow/contrib/lite/kernels/select_test.cc b/tensorflow/contrib/lite/kernels/select_test.cc index cfe24a5fc9..4664b9acb4 100644 --- a/tensorflow/contrib/lite/kernels/select_test.cc +++ b/tensorflow/contrib/lite/kernels/select_test.cc @@ -88,11 +88,11 @@ TEST(SelectOpTest, SelectUInt8) { TensorType_UINT8); model.PopulateTensor(model.input1(), {false, true, false, false}); - model.PopulateTensor(model.input2(), {1, 2, 3, 4}); - model.PopulateTensor(model.input3(), {5, 6, 7, 8}); + model.PopulateTensor(model.input2(), {1, 2, 3, 4}); + model.PopulateTensor(model.input3(), {5, 6, 7, 8}); model.Invoke(); - EXPECT_THAT(model.GetOutput(), ElementsAreArray({5, 2, 7, 8})); + EXPECT_THAT(model.GetOutput(), ElementsAreArray({5, 2, 7, 8})); EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({1, 1, 1, 4})); } @@ -101,11 +101,11 @@ TEST(SelectOpTest, SelectInt32) { TensorType_INT32); model.PopulateTensor(model.input1(), {false, true, false, false}); - model.PopulateTensor(model.input2(), {1, 2, 3, 4}); - model.PopulateTensor(model.input3(), {5, 6, 7, 8}); + model.PopulateTensor(model.input2(), {1, 2, 3, 4}); + model.PopulateTensor(model.input3(), {5, 6, 7, 8}); model.Invoke(); - EXPECT_THAT(model.GetOutput(), ElementsAreArray({5, 2, 7, 8})); + EXPECT_THAT(model.GetOutput(), ElementsAreArray({5, 2, 7, 8})); EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({1, 1, 1, 4})); } @@ -113,11 +113,11 @@ TEST(SelectOpTest, RankOneSelectInt32) { SelectOpModel model({2}, {2, 1, 2, 1}, {2, 1, 2, 1}, TensorType_INT32); model.PopulateTensor(model.input1(), {false, true}); - model.PopulateTensor(model.input2(), {1, 2, 3, 4}); - model.PopulateTensor(model.input3(), {5, 6, 7, 8}); + model.PopulateTensor(model.input2(), {1, 2, 3, 4}); + model.PopulateTensor(model.input3(), {5, 6, 7, 8}); model.Invoke(); - EXPECT_THAT(model.GetOutput(), ElementsAreArray({5, 6, 3, 4})); + EXPECT_THAT(model.GetOutput(), ElementsAreArray({5, 6, 3, 4})); EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({2, 1, 2, 1})); } @@ -125,11 +125,11 @@ TEST(SelectOpTest, RankZeroSelectInt32) { SelectOpModel model({1}, {1, 2, 2, 1}, {1, 2, 2, 1}, TensorType_INT32); model.PopulateTensor(model.input1(), {false}); - model.PopulateTensor(model.input2(), {1, 2, 3, 4}); - model.PopulateTensor(model.input3(), {5, 6, 7, 8}); + model.PopulateTensor(model.input2(), {1, 2, 3, 4}); + model.PopulateTensor(model.input3(), {5, 6, 7, 8}); model.Invoke(); - EXPECT_THAT(model.GetOutput(), ElementsAreArray({5, 6, 7, 8})); + EXPECT_THAT(model.GetOutput(), ElementsAreArray({5, 6, 7, 8})); EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({1, 2, 2, 1})); } diff --git a/tensorflow/contrib/lite/kernels/strided_slice_test.cc b/tensorflow/contrib/lite/kernels/strided_slice_test.cc index 716b11d432..c5d4f9affb 100644 --- a/tensorflow/contrib/lite/kernels/strided_slice_test.cc +++ b/tensorflow/contrib/lite/kernels/strided_slice_test.cc @@ -21,7 +21,6 @@ limitations under the License. namespace tflite { namespace { -using ::int32; using ::testing::ElementsAreArray; template data) { PopulateTensor(input_, data); } - void SetBegin(std::initializer_list data) { - PopulateTensor(begin_, data); + void SetBegin(std::initializer_list data) { + PopulateTensor(begin_, data); } - void SetEnd(std::initializer_list data) { - PopulateTensor(end_, data); + void SetEnd(std::initializer_list data) { + PopulateTensor(end_, data); } - void SetStrides(std::initializer_list data) { - PopulateTensor(strides_, data); + void SetStrides(std::initializer_list data) { + PopulateTensor(strides_, data); } std::vector GetOutput() { @@ -566,7 +565,7 @@ TEST(StridedSliceOpTest, RunTwice) { } TEST(StridedSliceOpTest, In3D_IdentityShrinkAxis1Uint8) { - StridedSliceOpModel m({2, 3, 2}, {3}, {3}, {3}, 0, 0, + StridedSliceOpModel m({2, 3, 2}, {3}, {3}, {3}, 0, 0, 0, 0, 1); m.SetInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); m.SetBegin({0, 0, 0}); diff --git a/tensorflow/contrib/lite/kernels/test_util_test.cc b/tensorflow/contrib/lite/kernels/test_util_test.cc index 1e10e89061..2365803472 100644 --- a/tensorflow/contrib/lite/kernels/test_util_test.cc +++ b/tensorflow/contrib/lite/kernels/test_util_test.cc @@ -22,22 +22,22 @@ using ::testing::ElementsAreArray; TEST(TestUtilTest, QuantizeVector) { std::vector data = {-1.0, -0.5, 0.0, 0.5, 1.0, 1000.0}; - auto q_data = Quantize(data, /*scale=*/1.0, /*zero_point=*/0); - std::vector expected = {0, 0, 0, 1, 1, 255}; + auto q_data = Quantize(data, /*scale=*/1.0, /*zero_point=*/0); + std::vector expected = {0, 0, 0, 1, 1, 255}; EXPECT_THAT(q_data, ElementsAreArray(expected)); } TEST(TestUtilTest, QuantizeVectorScalingDown) { std::vector data = {-1.0, -0.5, 0.0, 0.5, 1.0, 1000.0}; - auto q_data = Quantize(data, /*scale=*/10.0, /*zero_point=*/0); - std::vector expected = {0, 0, 0, 0, 0, 100}; + auto q_data = Quantize(data, /*scale=*/10.0, /*zero_point=*/0); + std::vector expected = {0, 0, 0, 0, 0, 100}; EXPECT_THAT(q_data, ElementsAreArray(expected)); } TEST(TestUtilTest, QuantizeVectorScalingUp) { std::vector data = {-1.0, -0.5, 0.0, 0.5, 1.0, 1000.0}; - auto q_data = Quantize(data, /*scale=*/0.1, /*zero_point=*/0); - std::vector expected = {0, 0, 0, 5, 10, 255}; + auto q_data = Quantize(data, /*scale=*/0.1, /*zero_point=*/0); + std::vector expected = {0, 0, 0, 5, 10, 255}; EXPECT_THAT(q_data, ElementsAreArray(expected)); } diff --git a/tensorflow/contrib/lite/kernels/tile_test.cc b/tensorflow/contrib/lite/kernels/tile_test.cc index a134a75d56..4f78c224e5 100644 --- a/tensorflow/contrib/lite/kernels/tile_test.cc +++ b/tensorflow/contrib/lite/kernels/tile_test.cc @@ -38,27 +38,27 @@ class TileOpModel : public SingleOpModel { PopulateTensor(input_, data); } - void SetInputUInt8(std::initializer_list data) { - PopulateTensor(input_, data); + void SetInputUInt8(std::initializer_list data) { + PopulateTensor(input_, data); } - void SetInputInt32(std::initializer_list data) { - PopulateTensor(input_, data); + void SetInputInt32(std::initializer_list data) { + PopulateTensor(input_, data); } void SetInputInt64(std::initializer_list data) { PopulateTensor(input_, data); } - void SetMultipliers(std::initializer_list data) { - PopulateTensor(multipliers_, data); + void SetMultipliers(std::initializer_list data) { + PopulateTensor(multipliers_, data); } std::vector GetOutputFloat() { return ExtractVector(output_); } - std::vector GetOutputUInt8() { return ExtractVector(output_); } + std::vector GetOutputUInt8() { return ExtractVector(output_); } - std::vector GetOutputInt32() { return ExtractVector(output_); } + std::vector GetOutputInt32() { return ExtractVector(output_); } std::vector GetOutputInt64() { return ExtractVector(output_); diff --git a/tensorflow/contrib/lite/kernels/topk_v2_test.cc b/tensorflow/contrib/lite/kernels/topk_v2_test.cc index 212f8acc76..2abb89b617 100644 --- a/tensorflow/contrib/lite/kernels/topk_v2_test.cc +++ b/tensorflow/contrib/lite/kernels/topk_v2_test.cc @@ -42,32 +42,32 @@ class TopKV2OpModel : public SingleOpModel { PopulateTensor(input_, data); } - void SetInputUInt8(std::initializer_list data) { - PopulateTensor(input_, data); + void SetInputUInt8(std::initializer_list data) { + PopulateTensor(input_, data); } - void SetInputInt32(std::initializer_list data) { - PopulateTensor(input_, data); + void SetInputInt32(std::initializer_list data) { + PopulateTensor(input_, data); } void SetInputInt64(std::initializer_list data) { PopulateTensor(input_, data); } - std::vector GetIndexes() { - return ExtractVector(output_indexes_); + std::vector GetIndexes() { + return ExtractVector(output_indexes_); } std::vector GetValuesFloat() { return ExtractVector(output_values_); } - std::vector GetValuesUInt8() { - return ExtractVector(output_values_); + std::vector GetValuesUInt8() { + return ExtractVector(output_values_); } - std::vector GetValuesInt32() { - return ExtractVector(output_values_); + std::vector GetValuesInt32() { + return ExtractVector(output_values_); } std::vector GetValuesInt64() { @@ -119,7 +119,7 @@ TEST(TopKV2OpTest, VectorFloat) { EXPECT_THAT(m.GetValuesFloat(), ElementsAreArray(ArrayFloatNear({0.8, 0.2}))); } -// Check that uint8 works. +// Check that uint8_t works. TEST(TopKV2OpTest, TypeUint8) { TopKV2OpModel m({2, 3}, TensorType_UINT8, 2); m.SetInputUInt8({1, 2, 3, 251, 250, 249}); @@ -128,7 +128,7 @@ TEST(TopKV2OpTest, TypeUint8) { EXPECT_THAT(m.GetValuesUInt8(), ElementsAreArray({3, 2, 251, 250})); } -// Check that int32 works. +// Check that int32_t works. TEST(TopKV2OpTest, TypeInt32) { TopKV2OpModel m({2, 3}, TensorType_INT32, 2); m.SetInputInt32({1, 2, 3, 10251, 10250, 10249}); diff --git a/tensorflow/contrib/lite/python/tflite_convert.py b/tensorflow/contrib/lite/python/tflite_convert.py index 286d15984f..9bd1f4f76e 100644 --- a/tensorflow/contrib/lite/python/tflite_convert.py +++ b/tensorflow/contrib/lite/python/tflite_convert.py @@ -105,7 +105,7 @@ def _convert_model(flags): input_arrays = converter.get_input_arrays() std_dev_values = _parse_array(flags.std_dev_values, type_fn=int) mean_values = _parse_array(flags.mean_values, type_fn=int) - quant_stats = zip(mean_values, std_dev_values) + quant_stats = list(zip(mean_values, std_dev_values)) if ((not flags.input_arrays and len(input_arrays) > 1) or (len(input_arrays) != len(quant_stats))): raise ValueError("Mismatching --input_arrays, --std_dev_values, and " diff --git a/tensorflow/contrib/mpi_collectives/BUILD b/tensorflow/contrib/mpi_collectives/BUILD index a7be92a35e..ecac06354d 100644 --- a/tensorflow/contrib/mpi_collectives/BUILD +++ b/tensorflow/contrib/mpi_collectives/BUILD @@ -52,6 +52,7 @@ tf_custom_op_library( deps = [ ":mpi_defines", ":mpi_message_proto_cc", + "//tensorflow/stream_executor:stream_executor_headers_lib", "//third_party/mpi", ], ) diff --git a/tensorflow/contrib/mpi_collectives/kernels/mpi_ops.cc b/tensorflow/contrib/mpi_collectives/kernels/mpi_ops.cc index ed22ee667f..e4b0c2c654 100644 --- a/tensorflow/contrib/mpi_collectives/kernels/mpi_ops.cc +++ b/tensorflow/contrib/mpi_collectives/kernels/mpi_ops.cc @@ -73,7 +73,7 @@ limitations under the License. */ template -using StatusOr = se::port::StatusOr; +using StatusOr = stream_executor::port::StatusOr; using CPUDevice = Eigen::ThreadPoolDevice; using GPUDevice = Eigen::GpuDevice; diff --git a/tensorflow/contrib/opt/__init__.py b/tensorflow/contrib/opt/__init__.py index 65777b1323..3e63e99030 100644 --- a/tensorflow/contrib/opt/__init__.py +++ b/tensorflow/contrib/opt/__init__.py @@ -30,6 +30,7 @@ from tensorflow.contrib.opt.python.training.model_average_optimizer import * from tensorflow.contrib.opt.python.training.moving_average_optimizer import * from tensorflow.contrib.opt.python.training.multitask_optimizer_wrapper import * from tensorflow.contrib.opt.python.training.nadam_optimizer import * +from tensorflow.contrib.opt.python.training.weight_decay_optimizers import * from tensorflow.contrib.opt.python.training.powersign import * from tensorflow.contrib.opt.python.training.variable_clipping_optimizer import * from tensorflow.contrib.opt.python.training.weight_decay_optimizers import * diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms.py b/tensorflow/contrib/quantize/python/fold_batch_norms.py index 804cd8d72d..e3c4899830 100644 --- a/tensorflow/contrib/quantize/python/fold_batch_norms.py +++ b/tensorflow/contrib/quantize/python/fold_batch_norms.py @@ -506,7 +506,7 @@ def _FoldUnfusedBatchNorms(graph, is_training, freeze_batch_norm_delay): def _IsValidUnfusedBatchNorm(graph, context): """Checks that the output of the unfused batch norm has consumers.""" add_shift = graph.get_operation_by_name( - context + '/BatchNorm/batchnorm/add_1') + context + '/BatchNorm/batchnorm_1/add_1') # Ensure that the output tensor of batch norm has consumers, otherwise this # is a dangling node and not a match. return bool(add_shift.outputs[0].consumers()) @@ -599,7 +599,7 @@ def _GetBatchNormParams(graph, context, has_scaling): op_suffix_mean = '/BatchNorm/moments/Squeeze' op_suffix_variance = '/BatchNorm/moments/Squeeze_1' - op_suffix_epsilon = '/BatchNorm/batchnorm/add/y' + op_suffix_epsilon = '/BatchNorm/batchnorm_1/add/y' op_suffix_bn_decay_mean = '/BatchNorm/AssignMovingAvg/decay' op_suffix_bn_decay_var = '/BatchNorm/AssignMovingAvg_1/decay' @@ -675,12 +675,12 @@ def _CreateFoldedOp(graph, context, has_scaling, freeze_batch_norm_delay, Returns: A pair of Operations, the first is the original consumer node of the batch - norm (../BatchNorm/batchnorm/add_1), the second is the consumer node of + norm (../BatchNorm/batchnorm_1/add_1), the second is the consumer node of the folded graph (add_fold). """ mul_scale_name = 'mul_1' if has_scaling else 'mul' mul_scale = graph.get_operation_by_name(context + - '/BatchNorm/batchnorm/' + + '/BatchNorm/batchnorm_1/' + mul_scale_name) op_below = mul_scale.inputs[0].op # Skip over the BatchToSpace operation in the case of atrous convolutions. @@ -707,7 +707,7 @@ def _CreateFoldedOp(graph, context, has_scaling, freeze_batch_norm_delay, ] scale_name = 'mul' if has_scaling else 'Rsqrt' scale = graph.get_operation_by_name( - context + '/BatchNorm/batchnorm/' + scale_name) + context + '/BatchNorm/batchnorm_1/' + scale_name) scale = array_ops.reshape(scale.outputs[0], new_shape, context + '/scale_reshape') @@ -735,7 +735,7 @@ def _CreateFoldedOp(graph, context, has_scaling, freeze_batch_norm_delay, [(1, mul_fold.outputs[0])]) add_shift = graph.get_operation_by_name( - context + '/BatchNorm/batchnorm/add_1') + context + '/BatchNorm/batchnorm_1/add_1') corrected_output = conv_or_fc_folded.outputs[0] # Copy the batch to space operation if we have a atrous convolution. @@ -930,7 +930,7 @@ def _HasScaling(graph, input_to_ops_map, bn): Returns: A boolean indicating whether this batch norm layer has scaling enabled. """ - rsqrt_op = graph.get_operation_by_name(bn + '/BatchNorm/batchnorm/Rsqrt') + rsqrt_op = graph.get_operation_by_name(bn + '/BatchNorm/batchnorm_1/Rsqrt') rsqrt_consumers = input_to_ops_map.ConsumerOperations(rsqrt_op) return sum(1 for op in rsqrt_consumers if op.type == 'Mul') == 1 diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms_test.py b/tensorflow/contrib/quantize/python/fold_batch_norms_test.py index 272afcdf07..7c907ffd92 100644 --- a/tensorflow/contrib/quantize/python/fold_batch_norms_test.py +++ b/tensorflow/contrib/quantize/python/fold_batch_norms_test.py @@ -600,13 +600,13 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): if has_scaling: if fused: return scope + '/BatchNorm_Fold/mul' - return scope + '/BatchNorm/batchnorm/mul' - return scope + '/BatchNorm/batchnorm/Rsqrt' + return scope + '/BatchNorm/batchnorm_1/mul' + return scope + '/BatchNorm/batchnorm_1/Rsqrt' def _BathNormBiasName(self, scope, fused): if fused: return scope + '/BatchNorm_Fold/bias' - return scope + '/BatchNorm/batchnorm/sub' + return scope + '/BatchNorm/batchnorm_1/sub' def _WeightInit(self, stddev): """Returns a truncated normal variable initializer. diff --git a/tensorflow/contrib/receptive_field/python/util/receptive_field_test.py b/tensorflow/contrib/receptive_field/python/util/receptive_field_test.py index cf55da2723..a42bbca611 100644 --- a/tensorflow/contrib/receptive_field/python/util/receptive_field_test.py +++ b/tensorflow/contrib/receptive_field/python/util/receptive_field_test.py @@ -385,7 +385,7 @@ class ReceptiveFieldTest(test.TestCase): effective_stride_y, effective_padding_x, effective_padding_y) = ( receptive_field.compute_receptive_field_from_graph_def( graph_def, input_node, output_node, - ['Dropout/dropout/random_uniform'])) + ['Dropout/dropout_1/random_uniform'])) self.assertEqual(receptive_field_x, 3) self.assertEqual(receptive_field_y, 3) self.assertEqual(effective_stride_x, 4) diff --git a/tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py b/tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py index 0403b652d7..d9c41f90d0 100644 --- a/tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py +++ b/tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py @@ -18,131 +18,330 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from collections import namedtuple +import itertools import warnings import numpy as np +import six from tensorflow.contrib import tensorrt as trt -from tensorflow.core.protobuf import config_pb2 as cpb2 -from tensorflow.python.framework import constant_op as cop -from tensorflow.python.framework import dtypes as dtypes -from tensorflow.python.framework import importer as importer -from tensorflow.python.framework import ops as ops +from tensorflow.core.protobuf import config_pb2 +from tensorflow.core.protobuf import rewriter_config_pb2 +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import importer +from tensorflow.python.framework import ops from tensorflow.python.framework import test_util -from tensorflow.python.ops import array_ops as aops -from tensorflow.python.ops import nn as nn -from tensorflow.python.ops import nn_ops as nn_ops -from tensorflow.python.platform import googletest +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn +from tensorflow.python.ops import nn_ops +from tensorflow.python.platform import test +INPUT_NAME = "input" +OUTPUT_NAME = "output" +INPUT_DIMS = [100, 24, 24, 2] +MODE_FP32 = "FP32" +MODE_FP16 = "FP16" +MODE_INT8 = "INT8" -class IntegrationTest(test_util.TensorFlowTestCase): +if six.PY2: + to_bytes = lambda s: s + to_string = lambda s: s +else: + to_bytes = lambda s: s.encode("utf-8", errors="surrogateescape") + to_string = lambda s: s.decode("utf-8") + + +# TODO(aaroey): test graph with different dtypes. +def GetSingleEngineGraphDef(dtype=dtypes.float32): + """Create a graph containing single segment.""" + g = ops.Graph() + with g.as_default(): + inp = array_ops.placeholder( + dtype=dtype, shape=[None] + INPUT_DIMS[1:], name=INPUT_NAME) + with g.device("/GPU:0"): + conv_filter = constant_op.constant( + [[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]]], + name="weights", + dtype=dtype) + conv = nn.conv2d( + input=inp, + filter=conv_filter, + strides=[1, 2, 2, 1], + padding="SAME", + name="conv") + bias = constant_op.constant( + [4., 1.5, 2., 3., 5., 7.], name="bias", dtype=dtype) + added = nn.bias_add(conv, bias, name="bias_add") + relu = nn.relu(added, "relu") + identity = array_ops.identity(relu, "identity") + pool = nn_ops.max_pool( + identity, [1, 2, 2, 1], [1, 2, 2, 1], "VALID", name="max_pool") + array_ops.squeeze(pool, name=OUTPUT_NAME) + return g.as_graph_def() + + +# TODO(aaroey): test graph with different dtypes. +def GetMultiEngineGraphDef(dtype=dtypes.float32): + """Create a graph containing multiple segment.""" + g = ops.Graph() + with g.as_default(): + inp = array_ops.placeholder( + dtype=dtype, shape=[None] + INPUT_DIMS[1:], name=INPUT_NAME) + with g.device("/GPU:0"): + conv_filter = constant_op.constant( + [[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]]], + name="weights", + dtype=dtype) + conv = nn.conv2d( + input=inp, + filter=conv_filter, + strides=[1, 2, 2, 1], + padding="SAME", + name="conv") + c1 = constant_op.constant( + np.random.randn(INPUT_DIMS[0], 12, 12, 6), dtype=dtype) + p = conv * c1 + c2 = constant_op.constant( + np.random.randn(INPUT_DIMS[0], 12, 12, 6), dtype=dtype) + q = conv / c2 + + edge = math_ops.sin(q) + edge /= edge + r = edge + edge + + p -= edge + q *= edge + s = p + q + s -= r + array_ops.squeeze(s, name=OUTPUT_NAME) + return g.as_graph_def() + + +TestGraph = namedtuple("TestGraph", + ["gdef", "num_expected_engines", "expected_output_dims"]) + +TEST_GRAPHS = { + "SingleEngineGraph": + TestGraph( + gdef=GetSingleEngineGraphDef(), + num_expected_engines=1, + expected_output_dims=(100, 6, 6, 6)), + "MultiEngineGraph": + TestGraph( + gdef=GetMultiEngineGraphDef(), + num_expected_engines=2, + expected_output_dims=(100, 12, 12, 6)), + # TODO(aaroey): add a large complex graph to test. +} + + +class TfTrtIntegrationTest(test_util.TensorFlowTestCase): """Class to test Tensorflow-TensorRT integration.""" def setUp(self): """Setup method.""" - super(IntegrationTest, self).setUp() + super(TfTrtIntegrationTest, self).setUp() warnings.simplefilter("always") - inp_dims = (100, 24, 24, 2) - self._input = np.random.random_sample(inp_dims) - self._original_graph = self.get_simple_graph_def() - self._gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50) - self._config = cpb2.ConfigProto(gpu_options=self._gpu_options) - self._reference = self.run_graph(self._original_graph, self._input) - - def get_simple_graph_def(self): - """Create a simple graph and return its graph_def.""" - g = ops.Graph() - with g.as_default(): - a = aops.placeholder( - dtype=dtypes.float32, shape=(None, 24, 24, 2), name="input") - e = cop.constant( - [[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]]], - name="weights", - dtype=dtypes.float32) - conv = nn.conv2d( - input=a, filter=e, strides=[1, 2, 2, 1], padding="SAME", name="conv") - b = cop.constant( - [4., 1.5, 2., 3., 5., 7.], name="bias", dtype=dtypes.float32) - t = nn.bias_add(conv, b, name="biasAdd") - relu = nn.relu(t, "relu") - idty = aops.identity(relu, "ID") - v = nn_ops.max_pool( - idty, [1, 2, 2, 1], [1, 2, 2, 1], "VALID", name="max_pool") - aops.squeeze(v, name="output") - return g.as_graph_def() - - def run_graph(self, gdef, dumm_inp): - """Run given graphdef once.""" - ops.reset_default_graph() + self._input = np.random.random_sample(INPUT_DIMS) + + def _GetConfigProto(self, + use_optimizer, + precision_mode=None, + is_dynamic_op=None): + if use_optimizer: + rewriter_cfg = rewriter_config_pb2.RewriterConfig() + rewriter_cfg.optimizers.extend(["constfold", "layout"]) + custom_op = rewriter_cfg.custom_optimizers.add() + custom_op.name = "TensorRTOptimizer" + custom_op.parameter_map["minimum_segment_size"].i = 3 + custom_op.parameter_map["max_batch_size"].i = self._input.shape[0] + custom_op.parameter_map["is_dynamic_op"].b = is_dynamic_op + custom_op.parameter_map["max_workspace_size_bytes"].i = 1 << 25 + custom_op.parameter_map["precision_mode"].s = to_bytes(precision_mode) + graph_options = config_pb2.GraphOptions(rewrite_options=rewriter_cfg) + else: + graph_options = config_pb2.GraphOptions() + + gpu_options = config_pb2.GPUOptions() + if trt.trt_convert.get_linked_tensorrt_version()[0] == 3: + gpu_options.per_process_gpu_memory_fraction = 0.50 + + config = config_pb2.ConfigProto( + gpu_options=gpu_options, graph_options=graph_options) + return config + + def _RunGraph(self, graph_key, gdef, input_data, config, num_runs=2): + """Run given graphdef multiple times.""" g = ops.Graph() with g.as_default(): inp, out = importer.import_graph_def( - graph_def=gdef, return_elements=["input", "output"]) + graph_def=gdef, return_elements=[INPUT_NAME, OUTPUT_NAME], name="") inp = inp.outputs[0] out = out.outputs[0] with self.test_session( - graph=g, config=self._config, use_gpu=True, force_gpu=True) as sess: - val = sess.run(out, {inp: dumm_inp}) + graph=g, config=config, use_gpu=True, force_gpu=True) as sess: + val = None + # Defaults to 2 runs to verify result across multiple runs is same. + for _ in range(num_runs): + new_val = sess.run(out, {inp: input_data}) + self.assertEquals(TEST_GRAPHS[graph_key].expected_output_dims, + new_val.shape) + if val is not None: + self.assertAllEqual(new_val, val) + val = new_val return val # Use real data that is representative of the inference dataset # for calibration. For this test script it is random data. - def run_calibration(self, gdef, dumm_inp): - """Run given calibration graph multiple times.""" - ops.reset_default_graph() - g = ops.Graph() - with g.as_default(): - inp, out = importer.import_graph_def( - graph_def=gdef, return_elements=["input", "output"]) - inp = inp.outputs[0] - out = out.outputs[0] - # run over real calibration data here, we are mimicking a calibration - # set of 30 different batches. Use as much calibration data as you want - with self.test_session( - graph=g, config=self._config, use_gpu=True, force_gpu=True) as sess: - for _ in range(30): - val = sess.run(out, {inp: dumm_inp}) - return val + def _RunCalibration(self, graph_key, gdef, input_data, config): + """Run calibration on given graph.""" + return self._RunGraph(graph_key, gdef, input_data, config, 30) - def get_trt_graph(self, mode): + def _GetTrtGraph(self, gdef, precision_mode, is_dynamic_op): """Return trt converted graph.""" - if mode in ["FP32", "FP16", "INT8"]: - return trt.create_inference_graph( - input_graph_def=self._original_graph, - outputs=["output"], - max_batch_size=self._input.shape[0], - max_workspace_size_bytes=1 << 25, - precision_mode=mode, # TRT Engine precision "FP32","FP16" or "INT8" - minimum_segment_size=2 # minimum number of nodes in an engine - ) - return None - - def testFP32(self): - """Test FP32 conversion. Results should be identical to native case.""" - trt_graph = self.get_trt_graph("FP32") - result = self.run_graph(trt_graph, self._input) - self.assertAllEqual(self._reference, result) - result1 = self.run_graph(trt_graph, self._input) - self.assertAllEqual(result1, result) - - def testFP16(self): - """Test FP16 conversion. Results may be different from native case.""" - trt_graph = self.get_trt_graph("FP16") - result = self.run_graph(trt_graph, self._input) - self.assertAllClose(self._reference, result, rtol=1.e-03) - result1 = self.run_graph(trt_graph, self._input) - self.assertAllEqual(result1, result) - - def testINT8(self): - """Test INT8 conversion. Results may be different from native case.""" - calib_graph = self.get_trt_graph("INT8") - result = self.run_calibration(calib_graph, self._input) - self.assertAllEqual(self._reference, result) - int8_graph = trt.calib_graph_to_infer_graph(calib_graph) - result = self.run_graph(int8_graph, self._input) - self.assertAllClose(self._reference, result, rtol=1.e-03) - result1 = self.run_graph(int8_graph, self._input) - self.assertAllEqual(result1, result) + return trt.create_inference_graph( + input_graph_def=gdef, + outputs=[OUTPUT_NAME], + max_batch_size=self._input.shape[0], + max_workspace_size_bytes=1 << 25, + precision_mode=precision_mode, + minimum_segment_size=2, + is_dynamic_op=is_dynamic_op) + + def _VerifyGraphDef(self, + graph_key, + gdef, + precision_mode=None, + is_calibrated=None, + dynamic_engine=None): + num_engines = 0 + for n in gdef.node: + if n.op == "TRTEngineOp": + num_engines += 1 + self.assertNotEqual("", n.attr["serialized_segment"].s) + self.assertNotEqual("", n.attr["segment_funcdef_name"].s) + self.assertEquals(n.attr["precision_mode"].s, precision_mode) + self.assertEquals(n.attr["static_engine"].b, not dynamic_engine) + if precision_mode == MODE_INT8 and is_calibrated: + self.assertNotEqual("", n.attr["calibration_data"].s) + else: + self.assertEquals("", n.attr["calibration_data"].s) + if precision_mode is None: + self.assertEquals(num_engines, 0) + else: + self.assertEquals(num_engines, + TEST_GRAPHS[graph_key].num_expected_engines) + + def _RunTest(self, graph_key, use_optimizer, precision_mode, + dynamic_infer_engine, dynamic_calib_engine): + assert precision_mode in [MODE_FP32, MODE_FP16, MODE_INT8] + input_gdef = TEST_GRAPHS[graph_key].gdef + self._VerifyGraphDef(graph_key, input_gdef) + + # Get reference result without running trt. + config_no_trt = self._GetConfigProto(False) + print("Running original graph w/o trt, config:\n%s" % str(config_no_trt)) + ref_result = self._RunGraph(graph_key, input_gdef, self._input, + config_no_trt) + + # Run calibration if necessary. + if precision_mode == MODE_INT8: + + calib_config = self._GetConfigProto(use_optimizer, precision_mode, + dynamic_calib_engine) + print("Running calibration graph, config:\n%s" % str(calib_config)) + if use_optimizer: + self.assertTrue(False) + # TODO(aaroey): uncomment this and get infer_gdef when this mode is + # supported. + # result = self._RunCalibration(graph_key, input_gdef, self._input, + # calib_config) + else: + calib_gdef = self._GetTrtGraph(input_gdef, precision_mode, + dynamic_calib_engine) + self._VerifyGraphDef(graph_key, calib_gdef, precision_mode, False, + dynamic_calib_engine) + result = self._RunCalibration(graph_key, calib_gdef, self._input, + calib_config) + infer_gdef = trt.calib_graph_to_infer_graph(calib_gdef) + self._VerifyGraphDef(graph_key, infer_gdef, precision_mode, True, + dynamic_calib_engine) + self.assertAllClose(ref_result, result, rtol=1.e-03) + else: + infer_gdef = input_gdef + + # Run inference. + infer_config = self._GetConfigProto(use_optimizer, precision_mode, + dynamic_infer_engine) + print("Running final inference graph, config:\n%s" % str(infer_config)) + if use_optimizer: + result = self._RunGraph(graph_key, infer_gdef, self._input, infer_config) + else: + trt_infer_gdef = self._GetTrtGraph(infer_gdef, precision_mode, + dynamic_infer_engine) + self._VerifyGraphDef(graph_key, trt_infer_gdef, precision_mode, True, + dynamic_infer_engine) + result = self._RunGraph(graph_key, trt_infer_gdef, self._input, + infer_config) + self.assertAllClose(ref_result, result, rtol=1.e-03) + + def testIdempotence(self): + # Test that applying tensorrt optimizer or offline conversion tools multiple + # times to the same graph will result in same graph. + # TODO(aaroey): implement this. + pass + + +def GetTests(): + + def _GetTest(g, u, p, i, c): + + def _Test(self): + print("Running test with parameters: graph_key=%s, use_optimizer=%s, " + "precision_mode=%s, dynamic_infer_engine=%s, " + "dynamic_calib_engine=%s" % (g, u, p, i, c)) + self._RunTest(g, u, p, i, c) + + return _Test + + use_optimizer_options = [False, True] + precision_mode_options = [MODE_FP32, MODE_FP16, MODE_INT8] + dynamic_infer_engine_options = [False, True] + dynamic_calib_engine_options = [False, True] + for (graph_key, use_optimizer, precision_mode, + dynamic_infer_engine, dynamic_calib_engine) in itertools.product( + TEST_GRAPHS, use_optimizer_options, precision_mode_options, + dynamic_infer_engine_options, dynamic_calib_engine_options): + if precision_mode == MODE_INT8: + if not dynamic_calib_engine and dynamic_infer_engine: + # TODO(aaroey): test this case, the conversion from static calibration + # engine to dynamic inference engine should be a noop. + continue + if use_optimizer: + # TODO(aaroey): if use_optimizer is True we need to get the inference + # graphdef using custom python wrapper class, which is not currently + # supported yet. + continue + if not dynamic_calib_engine: + # TODO(aaroey): construction of static calibration engine is not + # supported yet. + continue + if dynamic_calib_engine and not dynamic_infer_engine: + # TODO(aaroey): construction of static inference engine using dynamic + # calibration engine is not supported yet. + continue + else: # In non int8 mode. + if dynamic_calib_engine: + # dynamic_calib_engine doesn't affect non-int8 modes, so just let + # related tests run once on dynamic_calib_engine=False. + continue + yield _GetTest(graph_key, use_optimizer, precision_mode, + dynamic_infer_engine, dynamic_calib_engine) if __name__ == "__main__": - googletest.main() + for index, t in enumerate(GetTests()): + setattr(TfTrtIntegrationTest, "testTfTRT_" + str(index), t) + test.main() diff --git a/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt b/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt index 6cd76ff340..342a1f6b05 100644 --- a/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt @@ -25,7 +25,7 @@ END (K-1)-dimensional tensor of indices into `params`, where each element defines a slice of `params`: - output[i_0, ..., i_{K-2}] = params[indices[i0, ..., i_{K-2}]] + output[\\(i_0, ..., i_{K-2}\\)] = params[indices[\\(i_0, ..., i_{K-2}\\)]] Whereas in @{tf.gather} `indices` defines slices into the first dimension of `params`, in `tf.gather_nd`, `indices` defines slices into the diff --git a/tensorflow/core/api_def/base_api/api_def_LinSpace.pbtxt b/tensorflow/core/api_def/base_api/api_def_LinSpace.pbtxt index 94a4ef574d..f706810662 100644 --- a/tensorflow/core/api_def/base_api/api_def_LinSpace.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_LinSpace.pbtxt @@ -3,19 +3,19 @@ op { in_arg { name: "start" description: < l1 else 0.0 -accum = accum_new +$$accum_new = accum + grad * grad$$ +$$linear += grad + (accum_{new}^{-lr_{power}} - accum^{-lr_{power}} / lr * var$$ +$$quadratic = 1.0 / (accum_{new}^{lr_{power}} * lr) + 2 * l2$$ +$$var = (sign(linear) * l1 - linear) / quadratic\ if\ |linear| > l1\ else\ 0.0$$ +$$accum = accum_{new}$$ END } diff --git a/tensorflow/core/api_def/base_api/api_def_SparseApplyMomentum.pbtxt b/tensorflow/core/api_def/base_api/api_def_SparseApplyMomentum.pbtxt index 8d9ac9ea3f..17dbb488de 100644 --- a/tensorflow/core/api_def/base_api/api_def_SparseApplyMomentum.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_SparseApplyMomentum.pbtxt @@ -64,7 +64,7 @@ Set use_nesterov = True if you want to use Nesterov momentum. That is for rows we have grad for, we update var and accum as follows: -accum = accum * momentum + grad -var -= lr * accum +$$accum = accum * momentum + grad$$ +$$var -= lr * accum$$ END } diff --git a/tensorflow/core/api_def/base_api/api_def_SparseApplyProximalAdagrad.pbtxt b/tensorflow/core/api_def/base_api/api_def_SparseApplyProximalAdagrad.pbtxt index 80541b91c7..0b24f2ddd1 100644 --- a/tensorflow/core/api_def/base_api/api_def_SparseApplyProximalAdagrad.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_SparseApplyProximalAdagrad.pbtxt @@ -58,9 +58,9 @@ END summary: "Sparse update entries in \'*var\' and \'*accum\' according to FOBOS algorithm." description: <