aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Yifei Feng <yifeif@google.com>2018-07-02 17:07:06 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-07-02 17:10:57 -0700
commit73e38c29c74d9d9bf7128bf4737a410ff005611e (patch)
treef84c84429850d1b38cb4c0f0df24aadfefc7db8e
parenteacdfdf6c0353ac0578afbd962dbbafa6121c28f (diff)
Merge changes from github.
PiperOrigin-RevId: 203037623
-rw-r--r--README.md2
-rw-r--r--RELEASE.md38
-rw-r--r--tensorflow/BUILD32
-rw-r--r--tensorflow/cc/gradients/array_grad.cc52
-rw-r--r--tensorflow/cc/gradients/array_grad_test.cc7
-rw-r--r--tensorflow/compiler/xla/service/cpu/BUILD9
-rw-r--r--tensorflow/contrib/BUILD2
-rw-r--r--tensorflow/contrib/autograph/converters/control_flow.py1
-rw-r--r--tensorflow/contrib/cmake/CMakeLists.txt17
-rw-r--r--tensorflow/contrib/cmake/external/boringssl.cmake2
-rw-r--r--tensorflow/contrib/cmake/tf_core_framework.cmake9
-rw-r--r--tensorflow/contrib/cmake/tf_core_kernels.cmake13
-rw-r--r--tensorflow/contrib/cmake/tf_stream_executor.cmake2
-rw-r--r--tensorflow/contrib/estimator/python/estimator/head.py3
-rw-r--r--tensorflow/contrib/estimator/python/estimator/head_test.py27
-rw-r--r--tensorflow/contrib/gan/python/estimator/python/head_impl.py13
-rw-r--r--tensorflow/contrib/gan/python/losses/python/losses_impl_test.py2
-rw-r--r--tensorflow/contrib/image/kernels/image_ops.cc2
-rw-r--r--tensorflow/contrib/image/kernels/image_ops.h25
-rw-r--r--tensorflow/contrib/image/ops/image_ops.cc2
-rw-r--r--tensorflow/contrib/image/python/kernel_tests/image_ops_test.py3
-rw-r--r--tensorflow/contrib/image/python/ops/image_ops.py3
-rw-r--r--tensorflow/contrib/layers/python/layers/layers_test.py2
-rw-r--r--tensorflow/contrib/lite/java/demo/app/build.gradle36
-rw-r--r--tensorflow/contrib/lite/kernels/expand_dims_test.cc4
-rw-r--r--tensorflow/contrib/lite/kernels/maximum_minimum_test.cc4
-rw-r--r--tensorflow/contrib/lite/kernels/neg_test.cc4
-rw-r--r--tensorflow/contrib/lite/kernels/select_test.cc24
-rw-r--r--tensorflow/contrib/lite/kernels/strided_slice_test.cc15
-rw-r--r--tensorflow/contrib/lite/kernels/test_util_test.cc12
-rw-r--r--tensorflow/contrib/lite/kernels/tile_test.cc16
-rw-r--r--tensorflow/contrib/lite/kernels/topk_v2_test.cc24
-rw-r--r--tensorflow/contrib/lite/python/tflite_convert.py2
-rw-r--r--tensorflow/contrib/mpi_collectives/BUILD1
-rw-r--r--tensorflow/contrib/mpi_collectives/kernels/mpi_ops.cc2
-rw-r--r--tensorflow/contrib/opt/__init__.py1
-rw-r--r--tensorflow/contrib/quantize/python/fold_batch_norms.py14
-rw-r--r--tensorflow/contrib/quantize/python/fold_batch_norms_test.py6
-rw-r--r--tensorflow/contrib/receptive_field/python/util/receptive_field_test.py2
-rw-r--r--tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py401
-rw-r--r--tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt2
-rw-r--r--tensorflow/core/api_def/base_api/api_def_LinSpace.pbtxt6
-rw-r--r--tensorflow/core/api_def/base_api/api_def_MatrixExponential.pbtxt2
-rw-r--r--tensorflow/core/api_def/base_api/api_def_MatrixLogarithm.pbtxt2
-rw-r--r--tensorflow/core/api_def/base_api/api_def_ReduceJoin.pbtxt2
-rw-r--r--tensorflow/core/api_def/base_api/api_def_ScatterNdAdd.pbtxt6
-rw-r--r--tensorflow/core/api_def/base_api/api_def_ScatterNdNonAliasingAdd.pbtxt6
-rw-r--r--tensorflow/core/api_def/base_api/api_def_ScatterNdSub.pbtxt6
-rw-r--r--tensorflow/core/api_def/base_api/api_def_ScatterNdUpdate.pbtxt6
-rw-r--r--tensorflow/core/api_def/base_api/api_def_Softmax.pbtxt2
-rw-r--r--tensorflow/core/api_def/base_api/api_def_SparseApplyAdagrad.pbtxt4
-rw-r--r--tensorflow/core/api_def/base_api/api_def_SparseApplyCenteredRMSProp.pbtxt6
-rw-r--r--tensorflow/core/api_def/base_api/api_def_SparseApplyFtrl.pbtxt10
-rw-r--r--tensorflow/core/api_def/base_api/api_def_SparseApplyMomentum.pbtxt4
-rw-r--r--tensorflow/core/api_def/base_api/api_def_SparseApplyProximalAdagrad.pbtxt8
-rw-r--r--tensorflow/core/api_def/base_api/api_def_SparseApplyProximalGradientDescent.pbtxt4
-rw-r--r--tensorflow/core/api_def/base_api/api_def_SparseApplyRMSProp.pbtxt6
-rw-r--r--tensorflow/core/api_def/base_api/api_def_SparseSliceGrad.pbtxt40
-rw-r--r--tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt2
-rw-r--r--tensorflow/core/api_def/python_api/api_def_BroadcastTo.pbtxt4
-rw-r--r--tensorflow/core/api_def/python_api/api_def_SparseSliceGrad.pbtxt4
-rw-r--r--tensorflow/core/kernels/BUILD7
-rw-r--r--tensorflow/core/kernels/conv_ops_test.cc4
-rw-r--r--tensorflow/core/kernels/mkl_concat_op.cc6
-rw-r--r--tensorflow/core/kernels/sparse_slice_grad_op.cc126
-rw-r--r--tensorflow/core/lib/db/sqlite_test.cc15
-rw-r--r--tensorflow/core/ops/sparse_ops.cc14
-rw-r--r--tensorflow/core/ops/sparse_ops_test.cc12
-rw-r--r--tensorflow/docs_src/get_started/_index.yaml12
-rw-r--r--tensorflow/docs_src/get_started/leftnav_files6
-rw-r--r--tensorflow/docs_src/get_started/next_steps.md2
-rw-r--r--tensorflow/docs_src/guide/custom_estimators.md8
-rw-r--r--tensorflow/docs_src/guide/keras.md24
-rw-r--r--tensorflow/docs_src/install/install_sources.md22
-rw-r--r--tensorflow/docs_src/mobile/tflite/demo_android.md23
-rw-r--r--tensorflow/docs_src/tutorials/layers.md45
-rw-r--r--tensorflow/go/op/wrappers.go12
-rw-r--r--tensorflow/java/src/gen/cc/source_writer.cc1
-rw-r--r--tensorflow/java/src/main/java/org/tensorflow/Graph.java79
-rw-r--r--tensorflow/java/src/main/java/org/tensorflow/op/core/Gradients.java153
-rw-r--r--tensorflow/java/src/main/native/graph_jni.cc54
-rw-r--r--tensorflow/java/src/main/native/graph_jni.h9
-rw-r--r--tensorflow/java/src/main/native/session_jni.cc32
-rw-r--r--tensorflow/java/src/main/native/utils_jni.cc53
-rw-r--r--tensorflow/java/src/main/native/utils_jni.h33
-rw-r--r--tensorflow/java/src/test/java/org/tensorflow/GraphTest.java103
-rw-r--r--tensorflow/java/src/test/java/org/tensorflow/SessionTest.java38
-rw-r--r--tensorflow/java/src/test/java/org/tensorflow/TestUtil.java34
-rw-r--r--tensorflow/python/estimator/model_fn.py4
-rw-r--r--tensorflow/python/framework/ops.py30
-rw-r--r--tensorflow/python/framework/ops_test.py9
-rw-r--r--tensorflow/python/grappler/layout_optimizer_test.py4
-rw-r--r--tensorflow/python/kernel_tests/BUILD1
-rw-r--r--tensorflow/python/kernel_tests/init_ops_test.py40
-rw-r--r--tensorflow/python/kernel_tests/shape_ops_test.py23
-rw-r--r--tensorflow/python/kernel_tests/sparse_slice_op_test.py22
-rw-r--r--tensorflow/python/ops/array_grad.py8
-rw-r--r--tensorflow/python/ops/control_flow_ops.py1
-rw-r--r--tensorflow/python/ops/init_ops.py24
-rw-r--r--tensorflow/python/ops/losses/losses_impl.py3
-rw-r--r--tensorflow/python/ops/nn_ops.py3
-rw-r--r--tensorflow/python/ops/sparse_grad.py29
-rw-r--r--tensorflow/stream_executor/BUILD9
-rw-r--r--tensorflow/tools/api/generator/create_python_api.py2
-rw-r--r--tensorflow/tools/api/golden/tensorflow.initializers.variance_scaling.pbtxt2
-rw-r--r--tensorflow/tools/api/golden/tensorflow.keras.initializers.-variance-scaling.pbtxt2
-rw-r--r--tensorflow/tools/api/golden/tensorflow.pbtxt4
-rw-r--r--tensorflow/tools/api/golden/tensorflow.variance_scaling_initializer.pbtxt2
-rw-r--r--tensorflow/tools/ci_build/Dockerfile.cpu.ppc64le1
-rw-r--r--tensorflow/tools/ci_build/Dockerfile.gpu.ppc64le1
-rw-r--r--tensorflow/tools/ci_build/Dockerfile.rbe.cpu4
-rwxr-xr-xtensorflow/tools/ci_build/ci_parameterized_build.sh8
-rwxr-xr-xtensorflow/tools/ci_build/install/install_hdf5_ppc64le.sh30
-rwxr-xr-xtensorflow/tools/ci_build/linux/mkl/build-dev-container.sh53
-rwxr-xr-xtensorflow/tools/ci_build/pi/build_raspberry_pi.sh4
-rwxr-xr-xtensorflow/tools/ci_build/update_version.py2
-rwxr-xr-xtensorflow/tools/docker/Dockerfile.devel-mkl128
-rwxr-xr-xtensorflow/tools/docker/Dockerfile.mkl75
-rwxr-xr-xtensorflow/tools/docker/parameterized_docker_build.sh142
-rw-r--r--tensorflow/tools/pip_package/setup.py12
-rw-r--r--tensorflow/workspace.bzl8
-rw-r--r--third_party/eigen.BUILD6
-rw-r--r--third_party/eigen3/BUILD60
-rw-r--r--third_party/kafka/BUILD5
-rw-r--r--third_party/repo.bzl1
-rw-r--r--third_party/sqlite.BUILD1
-rw-r--r--third_party/toolchains/BUILD22
127 files changed, 2136 insertions, 544 deletions
diff --git a/README.md b/README.md
index 42d7bbc104..05fcb23f7e 100644
--- a/README.md
+++ b/README.md
@@ -96,6 +96,8 @@ The TensorFlow project strives to abide by generally accepted best practices in
| --- | --- | --- |
| **IBM s390x** | [![Build Status](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/badge/icon)](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/) | TBA |
| **IBM ppc64le CPU** | [![Build Status](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_CPU/badge/icon)](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_CPU/) | TBA |
+| **IBM ppc64le GPU** | [![Build Status](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_PPC64LE_GPU/badge/icon)](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_PPC64LE_GPU/) | TBA |
+| **Linux CPU with IntelĀ® MKL-DNNĀ®** | [![Build Status](https://tensorflow-ci.intel.com/job/tensorflow-mkl-linux-cpu/badge/icon)](https://tensorflow-ci.intel.com/job/tensorflow-mkl-linux-cpu/) | TBA |
## For more information
diff --git a/RELEASE.md b/RELEASE.md
index 377a8eda37..4b03394427 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,18 +1,38 @@
# Release 1.9.0
## Major Features And Improvements
-* Update tf.keras to the Keras 2.1.6 API.
+* Updated docs for `tf.keras`: New Keras-based [get started](http://tensorflow.org/versions/r1.9/get_started),
+ and [programmers guide page](http://tensorflow.org/versions/r1.9/programmers_guide/keras).
+* Update `tf.keras` to the Keras 2.1.6 API.
+* Added [`tf.keras.layers.CuDNNGRU`](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/keras/layers/CuDNNGRU) and [`tf.keras.layers.CuDNNLSTM`](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/keras/layers/CuDNNLSTM) layers. [Try it](https://colab.sandbox.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb?linkId=53292082).
+* Adding support of core [feature columns](https://www.tensorflow.org/get_started/feature_columns) and [losses](https://www.tensorflow.org/api_docs/python/tf/losses) to [gradient boosted trees estimators](https://github.com/tensorflow/models/tree/master/official/boosted_trees).
+* The [python interface](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/contrib/lite)
+ for the [TFLite Optimizing Converter](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/toco/README.md)
+ has been expanded, and the command line interface (AKA: `toco`, `tflite_convert`) is once again
+ included in the standard `pip` installation.
+* Improved data-loading and text processing with:
+ * [`tf.decode_compressed`](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/decode_compressed)
+ * [`tf.string_strip`](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/string_strip)
+ * [`tf.strings.regex_full_match`](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/strings/regex_full_match)
+* Added experimental support for new pre-made Estimators:
+ * [`tf.contrib.estimator.BaselineEstimator`](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/contrib/estimator/BaselineEstimator)
+ * [`tf.contrib.estimator.RNNClassifier`](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/contrib/estimator/RNNEstimator)
+ * [`tf.contrib.estimator.RNNEstimator`](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/contrib/estimator/RNNClassifier)
+* The [distributions.Bijector](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/contrib/distributions/bijectors/Bijector)
+ API supports broadcasting for Bijectors with new API changes.
+
+## Breaking Chances
+ * If you're opening empty variable scopes; replace `variable_scope('', ...)` by
+ `variable_scope(tf.get_variable_scope(), ...)`.
+ * Headers used for building custom ops have been moved from site-packages/external into site-packages/tensorflow/include/external.
+
+## Bug Fixes and Other Changes
+
* `tfe.Network` is deprecated. Please inherit from `tf.keras.Model`.
-* Adding support of core feature columns and losses to gradient boosted trees estimators.
-* The distributions.Bijector API supports broadcasting for Bijectors with new API changes. See [here](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/distributions/bijectors/Bijector) for more details.
* Layered variable names have changed in the following conditions:
* Using `tf.keras.layers` with custom variable scopes.
- * Using `tf.layers` in a subclassed `tf.keras.Model` class. See [here](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/layers) for more details
-
-## Breaking Changes
- * If you're opening empty variable scopes; replace `variable_scope`('', ...) by `variable_scope`(`tf.get_variable_scope()`, ...).
-
-## Bug Fixes and Other Changes
+ * Using `tf.layers` in a subclassed `tf.keras.Model` class. See
+ [here](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/layers) for more details
* `tf.data`:
* The `DatasetBase::DebugString()` method is now `const`.
* Added the `tf.contrib.data.sample_from_datasets()` API for randomly sampling from multiple datasets.
diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 0bce474dfa..f362900387 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -438,6 +438,22 @@ filegroup(
data = glob(["docs_src/**/*.md"]),
)
+cc_library(
+ name = "grpc",
+ deps = select({
+ ":linux_s390x": ["@grpc//:grpc_unsecure"],
+ "//conditions:default": ["@grpc"],
+ }),
+)
+
+cc_library(
+ name = "grpc++",
+ deps = select({
+ ":linux_s390x": ["@grpc//:grpc++_unsecure"],
+ "//conditions:default": ["@grpc//:grpc++"],
+ }),
+)
+
# A shared object which includes registration mechanisms for ops and
# kernels. Does not include the implementations of any ops or kernels. Instead,
# the library which loads libtensorflow_framework.so
@@ -587,19 +603,3 @@ py_library(
visibility = ["//visibility:public"],
deps = ["//tensorflow/python:no_contrib"],
)
-
-cc_library(
- name = "grpc",
- deps = select({
- ":linux_s390x": ["@grpc//:grpc_unsecure"],
- "//conditions:default": ["@grpc"],
- }),
-)
-
-cc_library(
- name = "grpc++",
- deps = select({
- ":linux_s390x": ["@grpc//:grpc++_unsecure"],
- "//conditions:default": ["@grpc//:grpc++"],
- }),
-)
diff --git a/tensorflow/cc/gradients/array_grad.cc b/tensorflow/cc/gradients/array_grad.cc
index ff348fadb2..b353accddc 100644
--- a/tensorflow/cc/gradients/array_grad.cc
+++ b/tensorflow/cc/gradients/array_grad.cc
@@ -421,6 +421,58 @@ Status StridedSliceGradHelper(const Scope& scope, const Operation& op,
}
REGISTER_GRADIENT_OP("StridedSlice", StridedSliceGradHelper);
+Status SliceGrad(const Scope& scope, const Operation& op,
+ const std::vector<Output>& grad_inputs,
+ std::vector<Output>* grad_outputs) {
+ // Propagate the incoming gradient along all the selected values,
+ // and zero everywhere else. Use the Pad operator for this.
+ //
+ // First create an Nx2 padding where N is the number of input
+ // dimensions. The first column is the number of prepended zeros
+ // for each dimension, and the second column is the number of
+ // appended zeros.
+ //
+ // The first column is just the begin vector.
+ // The second column is the shape of the input element-wise
+ // subtracted by begin+size
+
+ // Running example:
+ // input.shape = [3, 5, 3]
+ // begin = [1, 2, 1], size = [1, 3, 2]
+ Input input = op.input(0);
+ Input begin = op.input(1);
+ // input_rank = 3
+ auto input_rank = Rank(scope, input);
+ // slice_size = [1, 3, 2]
+ auto slice_size = Shape(scope, op.output(0));
+ // padding_shape = [3, 1]
+ auto padding_shape = Stack(scope, {input_rank, 1});
+ // before_padding = [[1]
+ // [2]
+ // [1]]
+ Input before_padding = Reshape(scope, begin, padding_shape);
+ // after_padding_sizes = shape(input) - slice_size - begin
+ // = [3, 5, 3] - [1, 3, 2] - [1, 2, 1]
+ // = [1, 0, 0]
+ auto after_padding_sizes =
+ Sub(scope, Sub(scope, Shape(scope, input), slice_size), begin);
+ // after_padding = [[1]
+ // [0]
+ // [0]]
+ Input after_padding = Reshape(scope, after_padding_sizes, padding_shape);
+ // paddings = [[1 1]
+ // [2 0]
+ // [1 0]]
+ auto paddings =
+ Concat(scope, {before_padding, after_padding}, Const(scope, 1));
+ grad_outputs->push_back(Pad(scope, grad_inputs[0], paddings));
+ // Nothing propagated for "begin" and "size" inputs
+ grad_outputs->push_back(NoGradient());
+ grad_outputs->push_back(NoGradient());
+ return scope.status();
+}
+REGISTER_GRADIENT_OP("Slice", SliceGrad);
+
} // anonymous namespace
} // namespace ops
} // namespace tensorflow
diff --git a/tensorflow/cc/gradients/array_grad_test.cc b/tensorflow/cc/gradients/array_grad_test.cc
index de3bd0fc9e..d09275b648 100644
--- a/tensorflow/cc/gradients/array_grad_test.cc
+++ b/tensorflow/cc/gradients/array_grad_test.cc
@@ -378,5 +378,12 @@ TEST_F(ArrayGradTest, StridedSliceGrad) {
RunTest(x, x_shape, y, {1, 2, 2, 2});
}
+TEST_F(ArrayGradTest, SliceGrad) {
+ TensorShape x_shape({3, 5, 3});
+ auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape));
+ auto y = Slice(scope_, x, {1, 2, 1}, {1, 3, 2});
+ RunTest(x, x_shape, y, {1, 3, 2});
+}
+
} // namespace
} // namespace tensorflow
diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index f68db13428..3479240610 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@@ -128,7 +128,14 @@ cc_library(
"@llvm//:target", # fixdeps: keep
"@llvm//:x86_code_gen", # fixdeps: keep
"@llvm//:x86_disassembler", # fixdeps: keep
- ],
+ ] + select({
+ "//tensorflow:linux_ppc64le": [
+ "@llvm//:powerpc_disassembler",
+ "@llvm//:powerpc_code_gen",
+ ],
+ "//conditions:default": [
+ ],
+ }),
alwayslink = True, # Contains compiler registration
)
diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD
index 8974e6867d..5ce44c01b8 100644
--- a/tensorflow/contrib/BUILD
+++ b/tensorflow/contrib/BUILD
@@ -125,9 +125,9 @@ py_library(
}) + if_not_windows_cuda([
"//tensorflow/contrib/fused_conv:fused_conv_py", # unresolved symbols, need to export more symbols
]) + if_not_windows([
- "//tensorflow/contrib/ffmpeg:ffmpeg_ops_py",
"//tensorflow/contrib/cloud:cloud_py", # depends on bigtable
"//tensorflow/contrib/bigtable", # doesn't compile on Windows
+ "//tensorflow/contrib/ffmpeg:ffmpeg_ops_py",
"//tensorflow/contrib/lite/python:lite", # unix dependency, need to fix code
]),
)
diff --git a/tensorflow/contrib/autograph/converters/control_flow.py b/tensorflow/contrib/autograph/converters/control_flow.py
index 22a671262c..f4a8710627 100644
--- a/tensorflow/contrib/autograph/converters/control_flow.py
+++ b/tensorflow/contrib/autograph/converters/control_flow.py
@@ -47,7 +47,6 @@ class SymbolNamer(object):
class ControlFlowTransformer(converter.Base):
"""Transforms control flow structures like loops an conditionals."""
-
def _create_cond_branch(self, body_name, aliased_orig_names,
aliased_new_names, body, returns):
if aliased_orig_names:
diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt
index 4ca7a1b28c..a0a5b0e00c 100644
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@@ -299,17 +299,20 @@ include_directories(
${double_conversion_INCLUDE_DIR}
)
-if(tensorflow_ENABLE_SSL_SUPPORT)
- include(boringssl)
- list(APPEND tensorflow_EXTERNAL_LIBRARIES ${boringssl_STATIC_LIBRARIES})
- list(APPEND tensorflow_EXTERNAL_DEPENDENCIES boringssl)
- include_directories(${boringssl_INCLUDE_DIR})
-endif()
if(tensorflow_ENABLE_GRPC_SUPPORT)
+ if(tensorflow_ENABLE_SSL_SUPPORT)
+ include(boringssl)
+ include_directories(${boringssl_INCLUDE_DIR})
+ endif()
include(grpc)
+ include_directories(${GRPC_INCLUDE_DIRS})
+ # Place boringssl after grpc as grpc depends on boringssl.
list(APPEND tensorflow_EXTERNAL_LIBRARIES ${grpc_STATIC_LIBRARIES})
list(APPEND tensorflow_EXTERNAL_DEPENDENCIES grpc)
- include_directories(${GRPC_INCLUDE_DIRS})
+ if(tensorflow_ENABLE_SSL_SUPPORT)
+ list(APPEND tensorflow_EXTERNAL_LIBRARIES ${boringssl_STATIC_LIBRARIES})
+ list(APPEND tensorflow_EXTERNAL_DEPENDENCIES boringssl)
+ endif()
endif()
if(tensorflow_ENABLE_JEMALLOC_SUPPORT)
include(jemalloc)
diff --git a/tensorflow/contrib/cmake/external/boringssl.cmake b/tensorflow/contrib/cmake/external/boringssl.cmake
index 3c4bb01e24..fbb14b2515 100644
--- a/tensorflow/contrib/cmake/external/boringssl.cmake
+++ b/tensorflow/contrib/cmake/external/boringssl.cmake
@@ -17,7 +17,7 @@ include (ExternalProject)
set(boringssl_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/boringssl/src/boringssl/include)
#set(boringssl_EXTRA_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/boringssl/src)
set(boringssl_URL https://boringssl.googlesource.com/boringssl)
-set(boringssl_TAG ee7aa02)
+set(boringssl_TAG 7f8c553d7f4db0a6ce727f2986d41bf8fe8ec4bf)
set(boringssl_BUILD ${CMAKE_BINARY_DIR}/boringssl/src/boringssl-build)
#set(boringssl_LIBRARIES ${boringssl_BUILD}/obj/so/libboringssl.so)
set(boringssl_STATIC_LIBRARIES
diff --git a/tensorflow/contrib/cmake/tf_core_framework.cmake b/tensorflow/contrib/cmake/tf_core_framework.cmake
index 9f02d6cbab..872b016d2b 100644
--- a/tensorflow/contrib/cmake/tf_core_framework.cmake
+++ b/tensorflow/contrib/cmake/tf_core_framework.cmake
@@ -236,15 +236,6 @@ if(WIN32)
list(APPEND tf_core_lib_srcs ${tf_core_platform_windows_srcs})
endif(WIN32)
-if(tensorflow_ENABLE_SSL_SUPPORT)
- # Cloud libraries require boringssl.
- file(GLOB tf_core_platform_cloud_srcs
- "${tensorflow_source_dir}/tensorflow/core/platform/cloud/*.h"
- "${tensorflow_source_dir}/tensorflow/core/platform/cloud/*.cc"
- )
- list(APPEND tf_core_lib_srcs ${tf_core_platform_cloud_srcs})
-endif()
-
if (tensorflow_ENABLE_HDFS_SUPPORT)
list(APPEND tf_core_platform_hdfs_srcs
"${tensorflow_source_dir}/tensorflow/core/platform/hadoop/hadoop_file_system.cc"
diff --git a/tensorflow/contrib/cmake/tf_core_kernels.cmake b/tensorflow/contrib/cmake/tf_core_kernels.cmake
index 2d76bf530a..844f62649d 100644
--- a/tensorflow/contrib/cmake/tf_core_kernels.cmake
+++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake
@@ -134,14 +134,13 @@ if(tensorflow_BUILD_CONTRIB_KERNELS)
list(APPEND tf_core_kernels_srcs ${tf_contrib_kernels_srcs})
endif(tensorflow_BUILD_CONTRIB_KERNELS)
-if(NOT tensorflow_ENABLE_SSL_SUPPORT)
- # Cloud libraries require boringssl.
- file(GLOB tf_core_kernels_cloud_srcs
- "${tensorflow_source_dir}/tensorflow/contrib/cloud/kernels/*.h"
- "${tensorflow_source_dir}/tensorflow/contrib/cloud/kernels/*.cc"
- )
+# Cloud libraries require curl and boringssl.
+# Curl is not supported yet anyway so we remove for now.
+file(GLOB tf_core_kernels_cloud_srcs
+ "${tensorflow_source_dir}/tensorflow/contrib/cloud/kernels/*.h"
+ "${tensorflow_source_dir}/tensorflow/contrib/cloud/kernels/*.cc"
+)
list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_cloud_srcs})
-endif()
file(GLOB_RECURSE tf_core_kernels_exclude_srcs
"${tensorflow_source_dir}/tensorflow/core/kernels/*test*.h"
diff --git a/tensorflow/contrib/cmake/tf_stream_executor.cmake b/tensorflow/contrib/cmake/tf_stream_executor.cmake
index 2f70e59d54..6d634cb170 100644
--- a/tensorflow/contrib/cmake/tf_stream_executor.cmake
+++ b/tensorflow/contrib/cmake/tf_stream_executor.cmake
@@ -64,8 +64,6 @@ file(GLOB tf_stream_executor_srcs
if (tensorflow_ENABLE_GPU)
file(GLOB tf_stream_executor_gpu_srcs
"${tensorflow_source_dir}/tensorflow/stream_executor/cuda/*.cc"
- "${tensorflow_source_dir}/tensorflow/compiler/xla/statusor.h"
- "${tensorflow_source_dir}/tensorflow/compiler/xla/statusor.cc"
)
if (NOT tensorflow_BUILD_CC_TESTS)
file(GLOB tf_stream_executor_gpu_tests
diff --git a/tensorflow/contrib/estimator/python/estimator/head.py b/tensorflow/contrib/estimator/python/estimator/head.py
index 9594e5132f..c9d86ef4ab 100644
--- a/tensorflow/contrib/estimator/python/estimator/head.py
+++ b/tensorflow/contrib/estimator/python/estimator/head.py
@@ -534,7 +534,8 @@ def multi_label_head(n_classes,
* An integer `SparseTensor` of class indices. The `dense_shape` must be
`[D0, D1, ... DN, ?]` and the values within `[0, n_classes)`.
* If `label_vocabulary` is given, a string `SparseTensor`. The `dense_shape`
- must be `[D0, D1, ... DN, ?]` and the values within `label_vocabulary`.
+ must be `[D0, D1, ... DN, ?]` and the values within `label_vocabulary` or a
+ multi-hot tensor of shape `[D0, D1, ... DN, n_classes]`.
If `weight_column` is specified, weights must be of shape
`[D0, D1, ... DN]`, or `[D0, D1, ... DN, 1]`.
diff --git a/tensorflow/contrib/estimator/python/estimator/head_test.py b/tensorflow/contrib/estimator/python/estimator/head_test.py
index b2b57fa06b..7b884402d4 100644
--- a/tensorflow/contrib/estimator/python/estimator/head_test.py
+++ b/tensorflow/contrib/estimator/python/estimator/head_test.py
@@ -568,6 +568,33 @@ class MultiLabelHead(test.TestCase):
expected_loss=expected_loss,
expected_metrics=expected_metrics)
+ def test_eval_with_label_vocabulary_with_multi_hot_input(self):
+ n_classes = 2
+ head = head_lib.multi_label_head(
+ n_classes, label_vocabulary=['class0', 'class1'])
+ logits = np.array([[-1., 1.], [-1.5, 1.5]], dtype=np.float32)
+ labels_multi_hot = np.array([[1, 0], [1, 1]], dtype=np.int64)
+ # loss = labels * -log(sigmoid(logits)) +
+ # (1 - labels) * -log(1 - sigmoid(logits))
+ # Sum over examples, divide by batch_size.
+ expected_loss = 0.5 * np.sum(
+ _sigmoid_cross_entropy(labels=labels_multi_hot, logits=logits))
+ keys = metric_keys.MetricKeys
+ expected_metrics = {
+ # Average loss over examples.
+ keys.LOSS_MEAN: expected_loss,
+ # auc and auc_pr cannot be reliably calculated for only 4 samples, but
+ # this assert tests that the algorithm remains consistent.
+ keys.AUC: 0.3333,
+ keys.AUC_PR: 0.7639,
+ }
+ self._test_eval(
+ head=head,
+ logits=logits,
+ labels=labels_multi_hot,
+ expected_loss=expected_loss,
+ expected_metrics=expected_metrics)
+
def test_eval_with_thresholds(self):
n_classes = 2
thresholds = [0.25, 0.5, 0.75]
diff --git a/tensorflow/contrib/gan/python/estimator/python/head_impl.py b/tensorflow/contrib/gan/python/estimator/python/head_impl.py
index 5b5557bd8f..d1441e1eb2 100644
--- a/tensorflow/contrib/gan/python/estimator/python/head_impl.py
+++ b/tensorflow/contrib/gan/python/estimator/python/head_impl.py
@@ -103,9 +103,20 @@ class GANHead(head._Head): # pylint: disable=protected-access
name: name of the head. If provided, summary and metrics keys will be
suffixed by `"/" + name`.
"""
+
+ if not callable(generator_loss_fn):
+ raise TypeError('generator_loss_fn must be callable.')
+ if not callable(discriminator_loss_fn):
+ raise TypeError('discriminator_loss_fn must be callable.')
+ if not use_loss_summaries in [True, False, None]:
+ raise ValueError('use_loss_summaries must be True, False or None.')
+ if get_hooks_fn is not None and not callable(get_hooks_fn):
+ raise TypeError('get_hooks_fn must be callable.')
+ if name is not None and not isinstance(name, str):
+ raise TypeError('name must be string.')
+
if get_hooks_fn is None:
get_hooks_fn = tfgan_train.get_sequential_train_hooks()
- # TODO(joelshor): Validate inputs.
if use_loss_summaries in [True, False]:
generator_loss_fn = functools.partial(
diff --git a/tensorflow/contrib/gan/python/losses/python/losses_impl_test.py b/tensorflow/contrib/gan/python/losses/python/losses_impl_test.py
index 2889e93743..9f5fee4542 100644
--- a/tensorflow/contrib/gan/python/losses/python/losses_impl_test.py
+++ b/tensorflow/contrib/gan/python/losses/python/losses_impl_test.py
@@ -570,7 +570,7 @@ class MutualInformationPenaltyTest(test.TestCase, _PenaltyTest):
'predicted_distributions': self._predicted_distributions,
}
self._expected_loss = 1.61610
- self._expected_op_name = 'mutual_information_loss/mul'
+ self._expected_op_name = 'mutual_information_loss/mul_1'
self._batch_size = 2
diff --git a/tensorflow/contrib/image/kernels/image_ops.cc b/tensorflow/contrib/image/kernels/image_ops.cc
index c2e32da133..022e17d139 100644
--- a/tensorflow/contrib/image/kernels/image_ops.cc
+++ b/tensorflow/contrib/image/kernels/image_ops.cc
@@ -35,6 +35,7 @@ typedef Eigen::ThreadPoolDevice CPUDevice;
template struct FillProjectiveTransform<CPUDevice, uint8>;
template struct FillProjectiveTransform<CPUDevice, int32>;
template struct FillProjectiveTransform<CPUDevice, int64>;
+template struct FillProjectiveTransform<CPUDevice, Eigen::half>;
template struct FillProjectiveTransform<CPUDevice, float>;
template struct FillProjectiveTransform<CPUDevice, double>;
@@ -99,6 +100,7 @@ class ImageProjectiveTransform : public OpKernel {
TF_CALL_uint8(REGISTER);
TF_CALL_int32(REGISTER);
TF_CALL_int64(REGISTER);
+TF_CALL_half(REGISTER);
TF_CALL_float(REGISTER);
TF_CALL_double(REGISTER);
diff --git a/tensorflow/contrib/image/kernels/image_ops.h b/tensorflow/contrib/image/kernels/image_ops.h
index 8408fd6f2e..209aa24548 100644
--- a/tensorflow/contrib/image/kernels/image_ops.h
+++ b/tensorflow/contrib/image/kernels/image_ops.h
@@ -21,6 +21,7 @@ limitations under the License.
#define EIGEN_USE_THREADS
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+
#include "tensorflow/core/framework/tensor_types.h"
#include "tensorflow/core/platform/types.h"
@@ -110,21 +111,21 @@ class ProjectiveGenerator {
// f(x, y_floor) = (x_ceil - x) / (x_ceil - x_floor) * f(x_floor, y_floor)
// + (x - x_floor) / (x_ceil - x_floor) * f(x_ceil, y_floor)
const float value_yfloor =
- (x_ceil - x) * read_with_fill_value(batch, DenseIndex(y_floor),
- DenseIndex(x_floor), channel,
- fill_value) +
- (x - x_floor) * read_with_fill_value(batch, DenseIndex(y_floor),
- DenseIndex(x_ceil), channel,
- fill_value);
+ (x_ceil - x) * static_cast<float>(read_with_fill_value(
+ batch, DenseIndex(y_floor), DenseIndex(x_floor),
+ channel, fill_value)) +
+ (x - x_floor) * static_cast<float>(read_with_fill_value(
+ batch, DenseIndex(y_floor), DenseIndex(x_ceil),
+ channel, fill_value));
// f(x, y_ceil) = (x_ceil - x) / (x_ceil - x_floor) * f(x_floor, y_ceil)
// + (x - x_floor) / (x_ceil - x_floor) * f(x_ceil, y_ceil)
const float value_yceil =
- (x_ceil - x) * read_with_fill_value(batch, DenseIndex(y_ceil),
- DenseIndex(x_floor), channel,
- fill_value) +
- (x - x_floor) * read_with_fill_value(batch, DenseIndex(y_ceil),
- DenseIndex(x_ceil), channel,
- fill_value);
+ (x_ceil - x) * static_cast<float>(read_with_fill_value(
+ batch, DenseIndex(y_ceil), DenseIndex(x_floor),
+ channel, fill_value)) +
+ (x - x_floor) * static_cast<float>(read_with_fill_value(
+ batch, DenseIndex(y_ceil), DenseIndex(x_ceil),
+ channel, fill_value));
// f(x, y) = (y_ceil - y) / (y_ceil - y_floor) * f(x, y_floor)
// + (y - y_floor) / (y_ceil - y_floor) * f(x, y_ceil)
return T((y_ceil - y) * value_yfloor + (y - y_floor) * value_yceil);
diff --git a/tensorflow/contrib/image/ops/image_ops.cc b/tensorflow/contrib/image/ops/image_ops.cc
index ebdcaea7ab..e59f1bf844 100644
--- a/tensorflow/contrib/image/ops/image_ops.cc
+++ b/tensorflow/contrib/image/ops/image_ops.cc
@@ -29,7 +29,7 @@ using shape_inference::ShapeHandle;
REGISTER_OP("ImageProjectiveTransform")
.Input("images: dtype")
.Input("transforms: float32")
- .Attr("dtype: {uint8, int32, int64, float32, float64}")
+ .Attr("dtype: {uint8, int32, int64, float16, float32, float64}")
.Attr("interpolation: string")
.Output("transformed_images: dtype")
.SetShapeFn([](InferenceContext* c) {
diff --git a/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py b/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py
index 6c9ff858ab..62a22dcf34 100644
--- a/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py
+++ b/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py
@@ -30,7 +30,8 @@ from tensorflow.python.ops import math_ops
from tensorflow.python.platform import googletest
_DTYPES = set(
- [dtypes.uint8, dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64])
+ [dtypes.uint8, dtypes.int32, dtypes.int64,
+ dtypes.float16, dtypes.float32, dtypes.float64])
class ImageOpsTest(test_util.TensorFlowTestCase):
diff --git a/tensorflow/contrib/image/python/ops/image_ops.py b/tensorflow/contrib/image/python/ops/image_ops.py
index cd984c8054..86b0ffe9a0 100644
--- a/tensorflow/contrib/image/python/ops/image_ops.py
+++ b/tensorflow/contrib/image/python/ops/image_ops.py
@@ -33,7 +33,8 @@ _image_ops_so = loader.load_op_library(
resource_loader.get_path_to_datafile("_image_ops.so"))
_IMAGE_DTYPES = set(
- [dtypes.uint8, dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64])
+ [dtypes.uint8, dtypes.int32, dtypes.int64,
+ dtypes.float16, dtypes.float32, dtypes.float64])
ops.RegisterShape("ImageConnectedComponents")(common_shapes.call_cpp_shape_fn)
ops.RegisterShape("ImageProjectiveTransform")(common_shapes.call_cpp_shape_fn)
diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py
index 0e8c89fe3a..c5c7269b1f 100644
--- a/tensorflow/contrib/layers/python/layers/layers_test.py
+++ b/tensorflow/contrib/layers/python/layers/layers_test.py
@@ -1356,7 +1356,7 @@ class DropoutTest(test.TestCase):
with self.test_session():
images = np.random.uniform(size=(5, height, width, 3))
output = _layers.dropout(images)
- self.assertEqual(output.op.name, 'Dropout/dropout/mul')
+ self.assertEqual(output.op.name, 'Dropout/dropout_1/mul')
output.get_shape().assert_is_compatible_with(
ops.convert_to_tensor(images).get_shape())
diff --git a/tensorflow/contrib/lite/java/demo/app/build.gradle b/tensorflow/contrib/lite/java/demo/app/build.gradle
index 908549321b..49868c5a75 100644
--- a/tensorflow/contrib/lite/java/demo/app/build.gradle
+++ b/tensorflow/contrib/lite/java/demo/app/build.gradle
@@ -57,3 +57,39 @@ dependencies {
testCompile 'junit:junit:4.12'
}
+
+def modelDownloadUrl = "https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip"
+def localCache = "build/intermediates/mobilenet_v1_224_android_quant_2017_11_08.zip"
+def targetFolder = "src/main/assets"
+
+task downloadModel(type: DownloadUrlTask) {
+ doFirst {
+ println "Downloading ${modelDownloadUrl}"
+ }
+ sourceUrl = "${modelDownloadUrl}"
+ target = file("${localCache}")
+}
+
+task unzipModel(type: Copy, dependsOn: 'downloadModel') {
+ doFirst {
+ println "Unzipping ${localCache}"
+ }
+ from zipTree("${localCache}")
+ into "${targetFolder}"
+}
+
+// Ensure the model file is downloaded and extracted before every build
+preBuild.dependsOn unzipModel
+
+class DownloadUrlTask extends DefaultTask {
+ @Input
+ String sourceUrl
+
+ @OutputFile
+ File target
+
+ @TaskAction
+ void download() {
+ ant.get(src: sourceUrl, dest: target)
+ }
+}
diff --git a/tensorflow/contrib/lite/kernels/expand_dims_test.cc b/tensorflow/contrib/lite/kernels/expand_dims_test.cc
index b755e8ce29..50dc860e5a 100644
--- a/tensorflow/contrib/lite/kernels/expand_dims_test.cc
+++ b/tensorflow/contrib/lite/kernels/expand_dims_test.cc
@@ -39,7 +39,7 @@ class ExpandDimsOpModel : public SingleOpModel {
void SetInputFloat(std::initializer_list<float> data) {
PopulateTensor<float>(input_, data);
}
- void SetAxis(int axis) { PopulateTensor<int32>(axis_, {axis}); }
+ void SetAxis(int axis) { PopulateTensor<int32_t>(axis_, {axis}); }
std::vector<float> GetValuesFloat() { return ExtractVector<float>(output_); }
std::vector<int> GetOutputShape() { return GetTensorShape(output_); }
@@ -51,7 +51,7 @@ class ExpandDimsOpModel : public SingleOpModel {
TEST(ExpandDimsOpTest, DifferentAxis) {
ExpandDimsOpModel m({2, 2}, TensorType_FLOAT32);
- const auto values = {-1.f, 1.f, -2.f, 2.f};
+ std::initializer_list<float> values = {-1.f, 1.f, -2.f, 2.f};
m.SetInputFloat(values);
m.SetAxis(0);
m.Invoke();
diff --git a/tensorflow/contrib/lite/kernels/maximum_minimum_test.cc b/tensorflow/contrib/lite/kernels/maximum_minimum_test.cc
index 0752aa1804..fd4d5367c5 100644
--- a/tensorflow/contrib/lite/kernels/maximum_minimum_test.cc
+++ b/tensorflow/contrib/lite/kernels/maximum_minimum_test.cc
@@ -126,10 +126,10 @@ TEST(MaximumOpTest, FloatWithBroadcastTest) {
TEST(MaximumOpTest, Int32WithBroadcastTest) {
std::initializer_list<int32_t> data1 = {1, 0, -1, -2, 3, 11};
std::initializer_list<int32_t> data2 = {2};
- TestModel<int32>(BuiltinOperator_MAXIMUM, {TensorType_INT32, {3, 1, 2}},
+ TestModel<int32_t>(BuiltinOperator_MAXIMUM, {TensorType_INT32, {3, 1, 2}},
{TensorType_INT32, {1}}, {TensorType_INT32, {3, 1, 2}},
data1, data2, {2, 2, 2, 2, 3, 11});
- TestModel<int32>(BuiltinOperator_MINIMUM, {TensorType_INT32, {3, 1, 2}},
+ TestModel<int32_t>(BuiltinOperator_MINIMUM, {TensorType_INT32, {3, 1, 2}},
{TensorType_INT32, {1}}, {TensorType_INT32, {3, 1, 2}},
data1, data2, {1, 0, -1, -2, 2, 2});
}
diff --git a/tensorflow/contrib/lite/kernels/neg_test.cc b/tensorflow/contrib/lite/kernels/neg_test.cc
index 3c95ac8cc2..3d3594c60b 100644
--- a/tensorflow/contrib/lite/kernels/neg_test.cc
+++ b/tensorflow/contrib/lite/kernels/neg_test.cc
@@ -58,9 +58,9 @@ TEST(NegOpModel, NegFloat) {
TEST(NegOpModel, NegInt32) {
NegOpModel m({TensorType_INT32, {2, 3}}, {TensorType_INT32, {2, 3}});
- m.SetInput<int32>({-2, -1, 0, 1, 2, 3});
+ m.SetInput<int32_t>({-2, -1, 0, 1, 2, 3});
m.Invoke();
- EXPECT_THAT(m.GetOutput<int32>(), ElementsAreArray({2, 1, 0, -1, -2, -3}));
+ EXPECT_THAT(m.GetOutput<int32_t>(), ElementsAreArray({2, 1, 0, -1, -2, -3}));
}
TEST(NegOpModel, NegInt64) {
diff --git a/tensorflow/contrib/lite/kernels/select_test.cc b/tensorflow/contrib/lite/kernels/select_test.cc
index cfe24a5fc9..4664b9acb4 100644
--- a/tensorflow/contrib/lite/kernels/select_test.cc
+++ b/tensorflow/contrib/lite/kernels/select_test.cc
@@ -88,11 +88,11 @@ TEST(SelectOpTest, SelectUInt8) {
TensorType_UINT8);
model.PopulateTensor<bool>(model.input1(), {false, true, false, false});
- model.PopulateTensor<uint8>(model.input2(), {1, 2, 3, 4});
- model.PopulateTensor<uint8>(model.input3(), {5, 6, 7, 8});
+ model.PopulateTensor<uint8_t>(model.input2(), {1, 2, 3, 4});
+ model.PopulateTensor<uint8_t>(model.input3(), {5, 6, 7, 8});
model.Invoke();
- EXPECT_THAT(model.GetOutput<uint8>(), ElementsAreArray({5, 2, 7, 8}));
+ EXPECT_THAT(model.GetOutput<uint8_t>(), ElementsAreArray({5, 2, 7, 8}));
EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({1, 1, 1, 4}));
}
@@ -101,11 +101,11 @@ TEST(SelectOpTest, SelectInt32) {
TensorType_INT32);
model.PopulateTensor<bool>(model.input1(), {false, true, false, false});
- model.PopulateTensor<int32>(model.input2(), {1, 2, 3, 4});
- model.PopulateTensor<int32>(model.input3(), {5, 6, 7, 8});
+ model.PopulateTensor<int32_t>(model.input2(), {1, 2, 3, 4});
+ model.PopulateTensor<int32_t>(model.input3(), {5, 6, 7, 8});
model.Invoke();
- EXPECT_THAT(model.GetOutput<int32>(), ElementsAreArray({5, 2, 7, 8}));
+ EXPECT_THAT(model.GetOutput<int32_t>(), ElementsAreArray({5, 2, 7, 8}));
EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({1, 1, 1, 4}));
}
@@ -113,11 +113,11 @@ TEST(SelectOpTest, RankOneSelectInt32) {
SelectOpModel model({2}, {2, 1, 2, 1}, {2, 1, 2, 1}, TensorType_INT32);
model.PopulateTensor<bool>(model.input1(), {false, true});
- model.PopulateTensor<int32>(model.input2(), {1, 2, 3, 4});
- model.PopulateTensor<int32>(model.input3(), {5, 6, 7, 8});
+ model.PopulateTensor<int32_t>(model.input2(), {1, 2, 3, 4});
+ model.PopulateTensor<int32_t>(model.input3(), {5, 6, 7, 8});
model.Invoke();
- EXPECT_THAT(model.GetOutput<int32>(), ElementsAreArray({5, 6, 3, 4}));
+ EXPECT_THAT(model.GetOutput<int32_t>(), ElementsAreArray({5, 6, 3, 4}));
EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({2, 1, 2, 1}));
}
@@ -125,11 +125,11 @@ TEST(SelectOpTest, RankZeroSelectInt32) {
SelectOpModel model({1}, {1, 2, 2, 1}, {1, 2, 2, 1}, TensorType_INT32);
model.PopulateTensor<bool>(model.input1(), {false});
- model.PopulateTensor<int32>(model.input2(), {1, 2, 3, 4});
- model.PopulateTensor<int32>(model.input3(), {5, 6, 7, 8});
+ model.PopulateTensor<int32_t>(model.input2(), {1, 2, 3, 4});
+ model.PopulateTensor<int32_t>(model.input3(), {5, 6, 7, 8});
model.Invoke();
- EXPECT_THAT(model.GetOutput<int32>(), ElementsAreArray({5, 6, 7, 8}));
+ EXPECT_THAT(model.GetOutput<int32_t>(), ElementsAreArray({5, 6, 7, 8}));
EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({1, 2, 2, 1}));
}
diff --git a/tensorflow/contrib/lite/kernels/strided_slice_test.cc b/tensorflow/contrib/lite/kernels/strided_slice_test.cc
index 716b11d432..c5d4f9affb 100644
--- a/tensorflow/contrib/lite/kernels/strided_slice_test.cc
+++ b/tensorflow/contrib/lite/kernels/strided_slice_test.cc
@@ -21,7 +21,6 @@ limitations under the License.
namespace tflite {
namespace {
-using ::int32;
using ::testing::ElementsAreArray;
template <typename input_type = float,
@@ -50,14 +49,14 @@ class StridedSliceOpModel : public SingleOpModel {
void SetInput(std::initializer_list<input_type> data) {
PopulateTensor<input_type>(input_, data);
}
- void SetBegin(std::initializer_list<int32> data) {
- PopulateTensor<int32>(begin_, data);
+ void SetBegin(std::initializer_list<int32_t> data) {
+ PopulateTensor<int32_t>(begin_, data);
}
- void SetEnd(std::initializer_list<int32> data) {
- PopulateTensor<int32>(end_, data);
+ void SetEnd(std::initializer_list<int32_t> data) {
+ PopulateTensor<int32_t>(end_, data);
}
- void SetStrides(std::initializer_list<int32> data) {
- PopulateTensor<int32>(strides_, data);
+ void SetStrides(std::initializer_list<int32_t> data) {
+ PopulateTensor<int32_t>(strides_, data);
}
std::vector<input_type> GetOutput() {
@@ -566,7 +565,7 @@ TEST(StridedSliceOpTest, RunTwice) {
}
TEST(StridedSliceOpTest, In3D_IdentityShrinkAxis1Uint8) {
- StridedSliceOpModel<uint8, TensorType_UINT8> m({2, 3, 2}, {3}, {3}, {3}, 0, 0,
+ StridedSliceOpModel<uint8_t, TensorType_UINT8> m({2, 3, 2}, {3}, {3}, {3}, 0, 0,
0, 0, 1);
m.SetInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
m.SetBegin({0, 0, 0});
diff --git a/tensorflow/contrib/lite/kernels/test_util_test.cc b/tensorflow/contrib/lite/kernels/test_util_test.cc
index 1e10e89061..2365803472 100644
--- a/tensorflow/contrib/lite/kernels/test_util_test.cc
+++ b/tensorflow/contrib/lite/kernels/test_util_test.cc
@@ -22,22 +22,22 @@ using ::testing::ElementsAreArray;
TEST(TestUtilTest, QuantizeVector) {
std::vector<float> data = {-1.0, -0.5, 0.0, 0.5, 1.0, 1000.0};
- auto q_data = Quantize<uint8>(data, /*scale=*/1.0, /*zero_point=*/0);
- std::vector<uint8> expected = {0, 0, 0, 1, 1, 255};
+ auto q_data = Quantize<uint8_t>(data, /*scale=*/1.0, /*zero_point=*/0);
+ std::vector<uint8_t> expected = {0, 0, 0, 1, 1, 255};
EXPECT_THAT(q_data, ElementsAreArray(expected));
}
TEST(TestUtilTest, QuantizeVectorScalingDown) {
std::vector<float> data = {-1.0, -0.5, 0.0, 0.5, 1.0, 1000.0};
- auto q_data = Quantize<uint8>(data, /*scale=*/10.0, /*zero_point=*/0);
- std::vector<uint8> expected = {0, 0, 0, 0, 0, 100};
+ auto q_data = Quantize<uint8_t>(data, /*scale=*/10.0, /*zero_point=*/0);
+ std::vector<uint8_t> expected = {0, 0, 0, 0, 0, 100};
EXPECT_THAT(q_data, ElementsAreArray(expected));
}
TEST(TestUtilTest, QuantizeVectorScalingUp) {
std::vector<float> data = {-1.0, -0.5, 0.0, 0.5, 1.0, 1000.0};
- auto q_data = Quantize<uint8>(data, /*scale=*/0.1, /*zero_point=*/0);
- std::vector<uint8> expected = {0, 0, 0, 5, 10, 255};
+ auto q_data = Quantize<uint8_t>(data, /*scale=*/0.1, /*zero_point=*/0);
+ std::vector<uint8_t> expected = {0, 0, 0, 5, 10, 255};
EXPECT_THAT(q_data, ElementsAreArray(expected));
}
diff --git a/tensorflow/contrib/lite/kernels/tile_test.cc b/tensorflow/contrib/lite/kernels/tile_test.cc
index a134a75d56..4f78c224e5 100644
--- a/tensorflow/contrib/lite/kernels/tile_test.cc
+++ b/tensorflow/contrib/lite/kernels/tile_test.cc
@@ -38,27 +38,27 @@ class TileOpModel : public SingleOpModel {
PopulateTensor<float>(input_, data);
}
- void SetInputUInt8(std::initializer_list<uint8> data) {
- PopulateTensor<uint8>(input_, data);
+ void SetInputUInt8(std::initializer_list<uint8_t> data) {
+ PopulateTensor<uint8_t>(input_, data);
}
- void SetInputInt32(std::initializer_list<int32> data) {
- PopulateTensor<int32>(input_, data);
+ void SetInputInt32(std::initializer_list<int32_t> data) {
+ PopulateTensor<int32_t>(input_, data);
}
void SetInputInt64(std::initializer_list<int64_t> data) {
PopulateTensor<int64_t>(input_, data);
}
- void SetMultipliers(std::initializer_list<int32> data) {
- PopulateTensor<int32>(multipliers_, data);
+ void SetMultipliers(std::initializer_list<int32_t> data) {
+ PopulateTensor<int32_t>(multipliers_, data);
}
std::vector<float> GetOutputFloat() { return ExtractVector<float>(output_); }
- std::vector<uint8> GetOutputUInt8() { return ExtractVector<uint8>(output_); }
+ std::vector<uint8_t> GetOutputUInt8() { return ExtractVector<uint8_t>(output_); }
- std::vector<int32> GetOutputInt32() { return ExtractVector<int32>(output_); }
+ std::vector<int32_t> GetOutputInt32() { return ExtractVector<int32_t>(output_); }
std::vector<int64_t> GetOutputInt64() {
return ExtractVector<int64_t>(output_);
diff --git a/tensorflow/contrib/lite/kernels/topk_v2_test.cc b/tensorflow/contrib/lite/kernels/topk_v2_test.cc
index 212f8acc76..2abb89b617 100644
--- a/tensorflow/contrib/lite/kernels/topk_v2_test.cc
+++ b/tensorflow/contrib/lite/kernels/topk_v2_test.cc
@@ -42,32 +42,32 @@ class TopKV2OpModel : public SingleOpModel {
PopulateTensor<float>(input_, data);
}
- void SetInputUInt8(std::initializer_list<uint8> data) {
- PopulateTensor<uint8>(input_, data);
+ void SetInputUInt8(std::initializer_list<uint8_t> data) {
+ PopulateTensor<uint8_t>(input_, data);
}
- void SetInputInt32(std::initializer_list<int32> data) {
- PopulateTensor<int32>(input_, data);
+ void SetInputInt32(std::initializer_list<int32_t> data) {
+ PopulateTensor<int32_t>(input_, data);
}
void SetInputInt64(std::initializer_list<int64_t> data) {
PopulateTensor<int64_t>(input_, data);
}
- std::vector<int32> GetIndexes() {
- return ExtractVector<int32>(output_indexes_);
+ std::vector<int32_t> GetIndexes() {
+ return ExtractVector<int32_t>(output_indexes_);
}
std::vector<float> GetValuesFloat() {
return ExtractVector<float>(output_values_);
}
- std::vector<uint8> GetValuesUInt8() {
- return ExtractVector<uint8>(output_values_);
+ std::vector<uint8_t> GetValuesUInt8() {
+ return ExtractVector<uint8_t>(output_values_);
}
- std::vector<int32> GetValuesInt32() {
- return ExtractVector<int32>(output_values_);
+ std::vector<int32_t> GetValuesInt32() {
+ return ExtractVector<int32_t>(output_values_);
}
std::vector<int64_t> GetValuesInt64() {
@@ -119,7 +119,7 @@ TEST(TopKV2OpTest, VectorFloat) {
EXPECT_THAT(m.GetValuesFloat(), ElementsAreArray(ArrayFloatNear({0.8, 0.2})));
}
-// Check that uint8 works.
+// Check that uint8_t works.
TEST(TopKV2OpTest, TypeUint8) {
TopKV2OpModel m({2, 3}, TensorType_UINT8, 2);
m.SetInputUInt8({1, 2, 3, 251, 250, 249});
@@ -128,7 +128,7 @@ TEST(TopKV2OpTest, TypeUint8) {
EXPECT_THAT(m.GetValuesUInt8(), ElementsAreArray({3, 2, 251, 250}));
}
-// Check that int32 works.
+// Check that int32_t works.
TEST(TopKV2OpTest, TypeInt32) {
TopKV2OpModel m({2, 3}, TensorType_INT32, 2);
m.SetInputInt32({1, 2, 3, 10251, 10250, 10249});
diff --git a/tensorflow/contrib/lite/python/tflite_convert.py b/tensorflow/contrib/lite/python/tflite_convert.py
index 286d15984f..9bd1f4f76e 100644
--- a/tensorflow/contrib/lite/python/tflite_convert.py
+++ b/tensorflow/contrib/lite/python/tflite_convert.py
@@ -105,7 +105,7 @@ def _convert_model(flags):
input_arrays = converter.get_input_arrays()
std_dev_values = _parse_array(flags.std_dev_values, type_fn=int)
mean_values = _parse_array(flags.mean_values, type_fn=int)
- quant_stats = zip(mean_values, std_dev_values)
+ quant_stats = list(zip(mean_values, std_dev_values))
if ((not flags.input_arrays and len(input_arrays) > 1) or
(len(input_arrays) != len(quant_stats))):
raise ValueError("Mismatching --input_arrays, --std_dev_values, and "
diff --git a/tensorflow/contrib/mpi_collectives/BUILD b/tensorflow/contrib/mpi_collectives/BUILD
index a7be92a35e..ecac06354d 100644
--- a/tensorflow/contrib/mpi_collectives/BUILD
+++ b/tensorflow/contrib/mpi_collectives/BUILD
@@ -52,6 +52,7 @@ tf_custom_op_library(
deps = [
":mpi_defines",
":mpi_message_proto_cc",
+ "//tensorflow/stream_executor:stream_executor_headers_lib",
"//third_party/mpi",
],
)
diff --git a/tensorflow/contrib/mpi_collectives/kernels/mpi_ops.cc b/tensorflow/contrib/mpi_collectives/kernels/mpi_ops.cc
index ed22ee667f..e4b0c2c654 100644
--- a/tensorflow/contrib/mpi_collectives/kernels/mpi_ops.cc
+++ b/tensorflow/contrib/mpi_collectives/kernels/mpi_ops.cc
@@ -73,7 +73,7 @@ limitations under the License.
*/
template <class T>
-using StatusOr = se::port::StatusOr<T>;
+using StatusOr = stream_executor::port::StatusOr<T>;
using CPUDevice = Eigen::ThreadPoolDevice;
using GPUDevice = Eigen::GpuDevice;
diff --git a/tensorflow/contrib/opt/__init__.py b/tensorflow/contrib/opt/__init__.py
index 65777b1323..3e63e99030 100644
--- a/tensorflow/contrib/opt/__init__.py
+++ b/tensorflow/contrib/opt/__init__.py
@@ -30,6 +30,7 @@ from tensorflow.contrib.opt.python.training.model_average_optimizer import *
from tensorflow.contrib.opt.python.training.moving_average_optimizer import *
from tensorflow.contrib.opt.python.training.multitask_optimizer_wrapper import *
from tensorflow.contrib.opt.python.training.nadam_optimizer import *
+from tensorflow.contrib.opt.python.training.weight_decay_optimizers import *
from tensorflow.contrib.opt.python.training.powersign import *
from tensorflow.contrib.opt.python.training.variable_clipping_optimizer import *
from tensorflow.contrib.opt.python.training.weight_decay_optimizers import *
diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms.py b/tensorflow/contrib/quantize/python/fold_batch_norms.py
index 804cd8d72d..e3c4899830 100644
--- a/tensorflow/contrib/quantize/python/fold_batch_norms.py
+++ b/tensorflow/contrib/quantize/python/fold_batch_norms.py
@@ -506,7 +506,7 @@ def _FoldUnfusedBatchNorms(graph, is_training, freeze_batch_norm_delay):
def _IsValidUnfusedBatchNorm(graph, context):
"""Checks that the output of the unfused batch norm has consumers."""
add_shift = graph.get_operation_by_name(
- context + '/BatchNorm/batchnorm/add_1')
+ context + '/BatchNorm/batchnorm_1/add_1')
# Ensure that the output tensor of batch norm has consumers, otherwise this
# is a dangling node and not a match.
return bool(add_shift.outputs[0].consumers())
@@ -599,7 +599,7 @@ def _GetBatchNormParams(graph, context, has_scaling):
op_suffix_mean = '/BatchNorm/moments/Squeeze'
op_suffix_variance = '/BatchNorm/moments/Squeeze_1'
- op_suffix_epsilon = '/BatchNorm/batchnorm/add/y'
+ op_suffix_epsilon = '/BatchNorm/batchnorm_1/add/y'
op_suffix_bn_decay_mean = '/BatchNorm/AssignMovingAvg/decay'
op_suffix_bn_decay_var = '/BatchNorm/AssignMovingAvg_1/decay'
@@ -675,12 +675,12 @@ def _CreateFoldedOp(graph, context, has_scaling, freeze_batch_norm_delay,
Returns:
A pair of Operations, the first is the original consumer node of the batch
- norm (../BatchNorm/batchnorm/add_1), the second is the consumer node of
+ norm (../BatchNorm/batchnorm_1/add_1), the second is the consumer node of
the folded graph (add_fold).
"""
mul_scale_name = 'mul_1' if has_scaling else 'mul'
mul_scale = graph.get_operation_by_name(context +
- '/BatchNorm/batchnorm/' +
+ '/BatchNorm/batchnorm_1/' +
mul_scale_name)
op_below = mul_scale.inputs[0].op
# Skip over the BatchToSpace operation in the case of atrous convolutions.
@@ -707,7 +707,7 @@ def _CreateFoldedOp(graph, context, has_scaling, freeze_batch_norm_delay,
]
scale_name = 'mul' if has_scaling else 'Rsqrt'
scale = graph.get_operation_by_name(
- context + '/BatchNorm/batchnorm/' + scale_name)
+ context + '/BatchNorm/batchnorm_1/' + scale_name)
scale = array_ops.reshape(scale.outputs[0], new_shape,
context + '/scale_reshape')
@@ -735,7 +735,7 @@ def _CreateFoldedOp(graph, context, has_scaling, freeze_batch_norm_delay,
[(1, mul_fold.outputs[0])])
add_shift = graph.get_operation_by_name(
- context + '/BatchNorm/batchnorm/add_1')
+ context + '/BatchNorm/batchnorm_1/add_1')
corrected_output = conv_or_fc_folded.outputs[0]
# Copy the batch to space operation if we have a atrous convolution.
@@ -930,7 +930,7 @@ def _HasScaling(graph, input_to_ops_map, bn):
Returns:
A boolean indicating whether this batch norm layer has scaling enabled.
"""
- rsqrt_op = graph.get_operation_by_name(bn + '/BatchNorm/batchnorm/Rsqrt')
+ rsqrt_op = graph.get_operation_by_name(bn + '/BatchNorm/batchnorm_1/Rsqrt')
rsqrt_consumers = input_to_ops_map.ConsumerOperations(rsqrt_op)
return sum(1 for op in rsqrt_consumers if op.type == 'Mul') == 1
diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms_test.py b/tensorflow/contrib/quantize/python/fold_batch_norms_test.py
index 272afcdf07..7c907ffd92 100644
--- a/tensorflow/contrib/quantize/python/fold_batch_norms_test.py
+++ b/tensorflow/contrib/quantize/python/fold_batch_norms_test.py
@@ -600,13 +600,13 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase):
if has_scaling:
if fused:
return scope + '/BatchNorm_Fold/mul'
- return scope + '/BatchNorm/batchnorm/mul'
- return scope + '/BatchNorm/batchnorm/Rsqrt'
+ return scope + '/BatchNorm/batchnorm_1/mul'
+ return scope + '/BatchNorm/batchnorm_1/Rsqrt'
def _BathNormBiasName(self, scope, fused):
if fused:
return scope + '/BatchNorm_Fold/bias'
- return scope + '/BatchNorm/batchnorm/sub'
+ return scope + '/BatchNorm/batchnorm_1/sub'
def _WeightInit(self, stddev):
"""Returns a truncated normal variable initializer.
diff --git a/tensorflow/contrib/receptive_field/python/util/receptive_field_test.py b/tensorflow/contrib/receptive_field/python/util/receptive_field_test.py
index cf55da2723..a42bbca611 100644
--- a/tensorflow/contrib/receptive_field/python/util/receptive_field_test.py
+++ b/tensorflow/contrib/receptive_field/python/util/receptive_field_test.py
@@ -385,7 +385,7 @@ class ReceptiveFieldTest(test.TestCase):
effective_stride_y, effective_padding_x, effective_padding_y) = (
receptive_field.compute_receptive_field_from_graph_def(
graph_def, input_node, output_node,
- ['Dropout/dropout/random_uniform']))
+ ['Dropout/dropout_1/random_uniform']))
self.assertEqual(receptive_field_x, 3)
self.assertEqual(receptive_field_y, 3)
self.assertEqual(effective_stride_x, 4)
diff --git a/tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py b/tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py
index 0403b652d7..d9c41f90d0 100644
--- a/tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py
+++ b/tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py
@@ -18,131 +18,330 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
+from collections import namedtuple
+import itertools
import warnings
import numpy as np
+import six
from tensorflow.contrib import tensorrt as trt
-from tensorflow.core.protobuf import config_pb2 as cpb2
-from tensorflow.python.framework import constant_op as cop
-from tensorflow.python.framework import dtypes as dtypes
-from tensorflow.python.framework import importer as importer
-from tensorflow.python.framework import ops as ops
+from tensorflow.core.protobuf import config_pb2
+from tensorflow.core.protobuf import rewriter_config_pb2
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import importer
+from tensorflow.python.framework import ops
from tensorflow.python.framework import test_util
-from tensorflow.python.ops import array_ops as aops
-from tensorflow.python.ops import nn as nn
-from tensorflow.python.ops import nn_ops as nn_ops
-from tensorflow.python.platform import googletest
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.platform import test
+INPUT_NAME = "input"
+OUTPUT_NAME = "output"
+INPUT_DIMS = [100, 24, 24, 2]
+MODE_FP32 = "FP32"
+MODE_FP16 = "FP16"
+MODE_INT8 = "INT8"
-class IntegrationTest(test_util.TensorFlowTestCase):
+if six.PY2:
+ to_bytes = lambda s: s
+ to_string = lambda s: s
+else:
+ to_bytes = lambda s: s.encode("utf-8", errors="surrogateescape")
+ to_string = lambda s: s.decode("utf-8")
+
+
+# TODO(aaroey): test graph with different dtypes.
+def GetSingleEngineGraphDef(dtype=dtypes.float32):
+ """Create a graph containing single segment."""
+ g = ops.Graph()
+ with g.as_default():
+ inp = array_ops.placeholder(
+ dtype=dtype, shape=[None] + INPUT_DIMS[1:], name=INPUT_NAME)
+ with g.device("/GPU:0"):
+ conv_filter = constant_op.constant(
+ [[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]]],
+ name="weights",
+ dtype=dtype)
+ conv = nn.conv2d(
+ input=inp,
+ filter=conv_filter,
+ strides=[1, 2, 2, 1],
+ padding="SAME",
+ name="conv")
+ bias = constant_op.constant(
+ [4., 1.5, 2., 3., 5., 7.], name="bias", dtype=dtype)
+ added = nn.bias_add(conv, bias, name="bias_add")
+ relu = nn.relu(added, "relu")
+ identity = array_ops.identity(relu, "identity")
+ pool = nn_ops.max_pool(
+ identity, [1, 2, 2, 1], [1, 2, 2, 1], "VALID", name="max_pool")
+ array_ops.squeeze(pool, name=OUTPUT_NAME)
+ return g.as_graph_def()
+
+
+# TODO(aaroey): test graph with different dtypes.
+def GetMultiEngineGraphDef(dtype=dtypes.float32):
+ """Create a graph containing multiple segment."""
+ g = ops.Graph()
+ with g.as_default():
+ inp = array_ops.placeholder(
+ dtype=dtype, shape=[None] + INPUT_DIMS[1:], name=INPUT_NAME)
+ with g.device("/GPU:0"):
+ conv_filter = constant_op.constant(
+ [[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]]],
+ name="weights",
+ dtype=dtype)
+ conv = nn.conv2d(
+ input=inp,
+ filter=conv_filter,
+ strides=[1, 2, 2, 1],
+ padding="SAME",
+ name="conv")
+ c1 = constant_op.constant(
+ np.random.randn(INPUT_DIMS[0], 12, 12, 6), dtype=dtype)
+ p = conv * c1
+ c2 = constant_op.constant(
+ np.random.randn(INPUT_DIMS[0], 12, 12, 6), dtype=dtype)
+ q = conv / c2
+
+ edge = math_ops.sin(q)
+ edge /= edge
+ r = edge + edge
+
+ p -= edge
+ q *= edge
+ s = p + q
+ s -= r
+ array_ops.squeeze(s, name=OUTPUT_NAME)
+ return g.as_graph_def()
+
+
+TestGraph = namedtuple("TestGraph",
+ ["gdef", "num_expected_engines", "expected_output_dims"])
+
+TEST_GRAPHS = {
+ "SingleEngineGraph":
+ TestGraph(
+ gdef=GetSingleEngineGraphDef(),
+ num_expected_engines=1,
+ expected_output_dims=(100, 6, 6, 6)),
+ "MultiEngineGraph":
+ TestGraph(
+ gdef=GetMultiEngineGraphDef(),
+ num_expected_engines=2,
+ expected_output_dims=(100, 12, 12, 6)),
+ # TODO(aaroey): add a large complex graph to test.
+}
+
+
+class TfTrtIntegrationTest(test_util.TensorFlowTestCase):
"""Class to test Tensorflow-TensorRT integration."""
def setUp(self):
"""Setup method."""
- super(IntegrationTest, self).setUp()
+ super(TfTrtIntegrationTest, self).setUp()
warnings.simplefilter("always")
- inp_dims = (100, 24, 24, 2)
- self._input = np.random.random_sample(inp_dims)
- self._original_graph = self.get_simple_graph_def()
- self._gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50)
- self._config = cpb2.ConfigProto(gpu_options=self._gpu_options)
- self._reference = self.run_graph(self._original_graph, self._input)
-
- def get_simple_graph_def(self):
- """Create a simple graph and return its graph_def."""
- g = ops.Graph()
- with g.as_default():
- a = aops.placeholder(
- dtype=dtypes.float32, shape=(None, 24, 24, 2), name="input")
- e = cop.constant(
- [[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]]],
- name="weights",
- dtype=dtypes.float32)
- conv = nn.conv2d(
- input=a, filter=e, strides=[1, 2, 2, 1], padding="SAME", name="conv")
- b = cop.constant(
- [4., 1.5, 2., 3., 5., 7.], name="bias", dtype=dtypes.float32)
- t = nn.bias_add(conv, b, name="biasAdd")
- relu = nn.relu(t, "relu")
- idty = aops.identity(relu, "ID")
- v = nn_ops.max_pool(
- idty, [1, 2, 2, 1], [1, 2, 2, 1], "VALID", name="max_pool")
- aops.squeeze(v, name="output")
- return g.as_graph_def()
-
- def run_graph(self, gdef, dumm_inp):
- """Run given graphdef once."""
- ops.reset_default_graph()
+ self._input = np.random.random_sample(INPUT_DIMS)
+
+ def _GetConfigProto(self,
+ use_optimizer,
+ precision_mode=None,
+ is_dynamic_op=None):
+ if use_optimizer:
+ rewriter_cfg = rewriter_config_pb2.RewriterConfig()
+ rewriter_cfg.optimizers.extend(["constfold", "layout"])
+ custom_op = rewriter_cfg.custom_optimizers.add()
+ custom_op.name = "TensorRTOptimizer"
+ custom_op.parameter_map["minimum_segment_size"].i = 3
+ custom_op.parameter_map["max_batch_size"].i = self._input.shape[0]
+ custom_op.parameter_map["is_dynamic_op"].b = is_dynamic_op
+ custom_op.parameter_map["max_workspace_size_bytes"].i = 1 << 25
+ custom_op.parameter_map["precision_mode"].s = to_bytes(precision_mode)
+ graph_options = config_pb2.GraphOptions(rewrite_options=rewriter_cfg)
+ else:
+ graph_options = config_pb2.GraphOptions()
+
+ gpu_options = config_pb2.GPUOptions()
+ if trt.trt_convert.get_linked_tensorrt_version()[0] == 3:
+ gpu_options.per_process_gpu_memory_fraction = 0.50
+
+ config = config_pb2.ConfigProto(
+ gpu_options=gpu_options, graph_options=graph_options)
+ return config
+
+ def _RunGraph(self, graph_key, gdef, input_data, config, num_runs=2):
+ """Run given graphdef multiple times."""
g = ops.Graph()
with g.as_default():
inp, out = importer.import_graph_def(
- graph_def=gdef, return_elements=["input", "output"])
+ graph_def=gdef, return_elements=[INPUT_NAME, OUTPUT_NAME], name="")
inp = inp.outputs[0]
out = out.outputs[0]
with self.test_session(
- graph=g, config=self._config, use_gpu=True, force_gpu=True) as sess:
- val = sess.run(out, {inp: dumm_inp})
+ graph=g, config=config, use_gpu=True, force_gpu=True) as sess:
+ val = None
+ # Defaults to 2 runs to verify result across multiple runs is same.
+ for _ in range(num_runs):
+ new_val = sess.run(out, {inp: input_data})
+ self.assertEquals(TEST_GRAPHS[graph_key].expected_output_dims,
+ new_val.shape)
+ if val is not None:
+ self.assertAllEqual(new_val, val)
+ val = new_val
return val
# Use real data that is representative of the inference dataset
# for calibration. For this test script it is random data.
- def run_calibration(self, gdef, dumm_inp):
- """Run given calibration graph multiple times."""
- ops.reset_default_graph()
- g = ops.Graph()
- with g.as_default():
- inp, out = importer.import_graph_def(
- graph_def=gdef, return_elements=["input", "output"])
- inp = inp.outputs[0]
- out = out.outputs[0]
- # run over real calibration data here, we are mimicking a calibration
- # set of 30 different batches. Use as much calibration data as you want
- with self.test_session(
- graph=g, config=self._config, use_gpu=True, force_gpu=True) as sess:
- for _ in range(30):
- val = sess.run(out, {inp: dumm_inp})
- return val
+ def _RunCalibration(self, graph_key, gdef, input_data, config):
+ """Run calibration on given graph."""
+ return self._RunGraph(graph_key, gdef, input_data, config, 30)
- def get_trt_graph(self, mode):
+ def _GetTrtGraph(self, gdef, precision_mode, is_dynamic_op):
"""Return trt converted graph."""
- if mode in ["FP32", "FP16", "INT8"]:
- return trt.create_inference_graph(
- input_graph_def=self._original_graph,
- outputs=["output"],
- max_batch_size=self._input.shape[0],
- max_workspace_size_bytes=1 << 25,
- precision_mode=mode, # TRT Engine precision "FP32","FP16" or "INT8"
- minimum_segment_size=2 # minimum number of nodes in an engine
- )
- return None
-
- def testFP32(self):
- """Test FP32 conversion. Results should be identical to native case."""
- trt_graph = self.get_trt_graph("FP32")
- result = self.run_graph(trt_graph, self._input)
- self.assertAllEqual(self._reference, result)
- result1 = self.run_graph(trt_graph, self._input)
- self.assertAllEqual(result1, result)
-
- def testFP16(self):
- """Test FP16 conversion. Results may be different from native case."""
- trt_graph = self.get_trt_graph("FP16")
- result = self.run_graph(trt_graph, self._input)
- self.assertAllClose(self._reference, result, rtol=1.e-03)
- result1 = self.run_graph(trt_graph, self._input)
- self.assertAllEqual(result1, result)
-
- def testINT8(self):
- """Test INT8 conversion. Results may be different from native case."""
- calib_graph = self.get_trt_graph("INT8")
- result = self.run_calibration(calib_graph, self._input)
- self.assertAllEqual(self._reference, result)
- int8_graph = trt.calib_graph_to_infer_graph(calib_graph)
- result = self.run_graph(int8_graph, self._input)
- self.assertAllClose(self._reference, result, rtol=1.e-03)
- result1 = self.run_graph(int8_graph, self._input)
- self.assertAllEqual(result1, result)
+ return trt.create_inference_graph(
+ input_graph_def=gdef,
+ outputs=[OUTPUT_NAME],
+ max_batch_size=self._input.shape[0],
+ max_workspace_size_bytes=1 << 25,
+ precision_mode=precision_mode,
+ minimum_segment_size=2,
+ is_dynamic_op=is_dynamic_op)
+
+ def _VerifyGraphDef(self,
+ graph_key,
+ gdef,
+ precision_mode=None,
+ is_calibrated=None,
+ dynamic_engine=None):
+ num_engines = 0
+ for n in gdef.node:
+ if n.op == "TRTEngineOp":
+ num_engines += 1
+ self.assertNotEqual("", n.attr["serialized_segment"].s)
+ self.assertNotEqual("", n.attr["segment_funcdef_name"].s)
+ self.assertEquals(n.attr["precision_mode"].s, precision_mode)
+ self.assertEquals(n.attr["static_engine"].b, not dynamic_engine)
+ if precision_mode == MODE_INT8 and is_calibrated:
+ self.assertNotEqual("", n.attr["calibration_data"].s)
+ else:
+ self.assertEquals("", n.attr["calibration_data"].s)
+ if precision_mode is None:
+ self.assertEquals(num_engines, 0)
+ else:
+ self.assertEquals(num_engines,
+ TEST_GRAPHS[graph_key].num_expected_engines)
+
+ def _RunTest(self, graph_key, use_optimizer, precision_mode,
+ dynamic_infer_engine, dynamic_calib_engine):
+ assert precision_mode in [MODE_FP32, MODE_FP16, MODE_INT8]
+ input_gdef = TEST_GRAPHS[graph_key].gdef
+ self._VerifyGraphDef(graph_key, input_gdef)
+
+ # Get reference result without running trt.
+ config_no_trt = self._GetConfigProto(False)
+ print("Running original graph w/o trt, config:\n%s" % str(config_no_trt))
+ ref_result = self._RunGraph(graph_key, input_gdef, self._input,
+ config_no_trt)
+
+ # Run calibration if necessary.
+ if precision_mode == MODE_INT8:
+
+ calib_config = self._GetConfigProto(use_optimizer, precision_mode,
+ dynamic_calib_engine)
+ print("Running calibration graph, config:\n%s" % str(calib_config))
+ if use_optimizer:
+ self.assertTrue(False)
+ # TODO(aaroey): uncomment this and get infer_gdef when this mode is
+ # supported.
+ # result = self._RunCalibration(graph_key, input_gdef, self._input,
+ # calib_config)
+ else:
+ calib_gdef = self._GetTrtGraph(input_gdef, precision_mode,
+ dynamic_calib_engine)
+ self._VerifyGraphDef(graph_key, calib_gdef, precision_mode, False,
+ dynamic_calib_engine)
+ result = self._RunCalibration(graph_key, calib_gdef, self._input,
+ calib_config)
+ infer_gdef = trt.calib_graph_to_infer_graph(calib_gdef)
+ self._VerifyGraphDef(graph_key, infer_gdef, precision_mode, True,
+ dynamic_calib_engine)
+ self.assertAllClose(ref_result, result, rtol=1.e-03)
+ else:
+ infer_gdef = input_gdef
+
+ # Run inference.
+ infer_config = self._GetConfigProto(use_optimizer, precision_mode,
+ dynamic_infer_engine)
+ print("Running final inference graph, config:\n%s" % str(infer_config))
+ if use_optimizer:
+ result = self._RunGraph(graph_key, infer_gdef, self._input, infer_config)
+ else:
+ trt_infer_gdef = self._GetTrtGraph(infer_gdef, precision_mode,
+ dynamic_infer_engine)
+ self._VerifyGraphDef(graph_key, trt_infer_gdef, precision_mode, True,
+ dynamic_infer_engine)
+ result = self._RunGraph(graph_key, trt_infer_gdef, self._input,
+ infer_config)
+ self.assertAllClose(ref_result, result, rtol=1.e-03)
+
+ def testIdempotence(self):
+ # Test that applying tensorrt optimizer or offline conversion tools multiple
+ # times to the same graph will result in same graph.
+ # TODO(aaroey): implement this.
+ pass
+
+
+def GetTests():
+
+ def _GetTest(g, u, p, i, c):
+
+ def _Test(self):
+ print("Running test with parameters: graph_key=%s, use_optimizer=%s, "
+ "precision_mode=%s, dynamic_infer_engine=%s, "
+ "dynamic_calib_engine=%s" % (g, u, p, i, c))
+ self._RunTest(g, u, p, i, c)
+
+ return _Test
+
+ use_optimizer_options = [False, True]
+ precision_mode_options = [MODE_FP32, MODE_FP16, MODE_INT8]
+ dynamic_infer_engine_options = [False, True]
+ dynamic_calib_engine_options = [False, True]
+ for (graph_key, use_optimizer, precision_mode,
+ dynamic_infer_engine, dynamic_calib_engine) in itertools.product(
+ TEST_GRAPHS, use_optimizer_options, precision_mode_options,
+ dynamic_infer_engine_options, dynamic_calib_engine_options):
+ if precision_mode == MODE_INT8:
+ if not dynamic_calib_engine and dynamic_infer_engine:
+ # TODO(aaroey): test this case, the conversion from static calibration
+ # engine to dynamic inference engine should be a noop.
+ continue
+ if use_optimizer:
+ # TODO(aaroey): if use_optimizer is True we need to get the inference
+ # graphdef using custom python wrapper class, which is not currently
+ # supported yet.
+ continue
+ if not dynamic_calib_engine:
+ # TODO(aaroey): construction of static calibration engine is not
+ # supported yet.
+ continue
+ if dynamic_calib_engine and not dynamic_infer_engine:
+ # TODO(aaroey): construction of static inference engine using dynamic
+ # calibration engine is not supported yet.
+ continue
+ else: # In non int8 mode.
+ if dynamic_calib_engine:
+ # dynamic_calib_engine doesn't affect non-int8 modes, so just let
+ # related tests run once on dynamic_calib_engine=False.
+ continue
+ yield _GetTest(graph_key, use_optimizer, precision_mode,
+ dynamic_infer_engine, dynamic_calib_engine)
if __name__ == "__main__":
- googletest.main()
+ for index, t in enumerate(GetTests()):
+ setattr(TfTrtIntegrationTest, "testTfTRT_" + str(index), t)
+ test.main()
diff --git a/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt b/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt
index 6cd76ff340..342a1f6b05 100644
--- a/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt
@@ -25,7 +25,7 @@ END
(K-1)-dimensional tensor of indices into `params`, where each element defines a
slice of `params`:
- output[i_0, ..., i_{K-2}] = params[indices[i0, ..., i_{K-2}]]
+ output[\\(i_0, ..., i_{K-2}\\)] = params[indices[\\(i_0, ..., i_{K-2}\\)]]
Whereas in @{tf.gather} `indices` defines slices into the first
dimension of `params`, in `tf.gather_nd`, `indices` defines slices into the
diff --git a/tensorflow/core/api_def/base_api/api_def_LinSpace.pbtxt b/tensorflow/core/api_def/base_api/api_def_LinSpace.pbtxt
index 94a4ef574d..f706810662 100644
--- a/tensorflow/core/api_def/base_api/api_def_LinSpace.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_LinSpace.pbtxt
@@ -3,19 +3,19 @@ op {
in_arg {
name: "start"
description: <<END
-First entry in the range.
+0-D tensor. First entry in the range.
END
}
in_arg {
name: "stop"
description: <<END
-Last entry in the range.
+0-D tensor. Last entry in the range.
END
}
in_arg {
name: "num"
description: <<END
-Number of values to generate.
+0-D tensor. Number of values to generate.
END
}
out_arg {
diff --git a/tensorflow/core/api_def/base_api/api_def_MatrixExponential.pbtxt b/tensorflow/core/api_def/base_api/api_def_MatrixExponential.pbtxt
index 0d680f6531..d7b56aec87 100644
--- a/tensorflow/core/api_def/base_api/api_def_MatrixExponential.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_MatrixExponential.pbtxt
@@ -18,7 +18,7 @@ END
}
summary: "Computes the matrix exponential of one or more square matrices:"
description: <<END
-exp(A) = \sum_{n=0}^\infty A^n/n!
+\\(exp(A) = \sum_{n=0}^\infty A^n/n!\\)
The exponential is computed using a combination of the scaling and squaring
method and the Pade approximation. Details can be founds in:
diff --git a/tensorflow/core/api_def/base_api/api_def_MatrixLogarithm.pbtxt b/tensorflow/core/api_def/base_api/api_def_MatrixLogarithm.pbtxt
index a6c4d0d400..9e80064d15 100644
--- a/tensorflow/core/api_def/base_api/api_def_MatrixLogarithm.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_MatrixLogarithm.pbtxt
@@ -20,7 +20,7 @@ END
summary: "Computes the matrix logarithm of one or more square matrices:"
description: <<END
-log(exp(A)) = A
+\\(log(exp(A)) = A\\)
This op is only defined for complex matrices. If A is positive-definite and
real, then casting to a complex matrix, taking the logarithm and casting back
diff --git a/tensorflow/core/api_def/base_api/api_def_ReduceJoin.pbtxt b/tensorflow/core/api_def/base_api/api_def_ReduceJoin.pbtxt
index d13866ddaa..b447d09377 100644
--- a/tensorflow/core/api_def/base_api/api_def_ReduceJoin.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_ReduceJoin.pbtxt
@@ -36,7 +36,7 @@ END
summary: "Joins a string Tensor across the given dimensions."
description: <<END
Computes the string join across dimensions in the given string Tensor of shape
-`[d_0, d_1, ..., d_n-1]`. Returns a new Tensor created by joining the input
+`[\\(d_0, d_1, ..., d_{n-1}\\)]`. Returns a new Tensor created by joining the input
strings with the given separator (default: empty string). Negative indices are
counted backwards from the end, with `-1` being equivalent to `n - 1`. If
indices are not specified, joins across all dimensions beginning from `n - 1`
diff --git a/tensorflow/core/api_def/base_api/api_def_ScatterNdAdd.pbtxt b/tensorflow/core/api_def/base_api/api_def_ScatterNdAdd.pbtxt
index b0665ebf0e..a9a7646314 100644
--- a/tensorflow/core/api_def/base_api/api_def_ScatterNdAdd.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_ScatterNdAdd.pbtxt
@@ -42,7 +42,7 @@ within a given variable according to `indices`.
`ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
`indices` must be integer tensor, containing indices into `ref`.
-It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
+It must be shape `\\([d_0, ..., d_{Q-2}, K]\\)` where `0 < K <= P`.
The innermost dimension of `indices` (with length `K`) corresponds to
indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th
@@ -50,9 +50,7 @@ dimension of `ref`.
`updates` is `Tensor` of rank `Q-1+P-K` with shape:
-```
-[d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].
-```
+$$[d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].$$
For example, say we want to add 4 scattered elements to a rank-1 tensor to 8
elements. In Python, that addition would look like this:
diff --git a/tensorflow/core/api_def/base_api/api_def_ScatterNdNonAliasingAdd.pbtxt b/tensorflow/core/api_def/base_api/api_def_ScatterNdNonAliasingAdd.pbtxt
index e5c64c2b90..35116e5f6a 100644
--- a/tensorflow/core/api_def/base_api/api_def_ScatterNdNonAliasingAdd.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_ScatterNdNonAliasingAdd.pbtxt
@@ -37,7 +37,7 @@ respect to both `input` and `updates`.
`input` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
`indices` must be integer tensor, containing indices into `input`.
-It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
+It must be shape \\([d_0, ..., d_{Q-2}, K]\\) where `0 < K <= P`.
The innermost dimension of `indices` (with length `K`) corresponds to
indices into elements (if `K = P`) or `(P-K)`-dimensional slices
@@ -45,9 +45,7 @@ indices into elements (if `K = P`) or `(P-K)`-dimensional slices
`updates` is `Tensor` of rank `Q-1+P-K` with shape:
-```
-[d_0, ..., d_{Q-2}, input.shape[K], ..., input.shape[P-1]].
-```
+$$[d_0, ..., d_{Q-2}, input.shape[K], ..., input.shape[P-1]].$$
For example, say we want to add 4 scattered elements to a rank-1 tensor to 8
elements. In Python, that addition would look like this:
diff --git a/tensorflow/core/api_def/base_api/api_def_ScatterNdSub.pbtxt b/tensorflow/core/api_def/base_api/api_def_ScatterNdSub.pbtxt
index 333db017f5..99e5c4908b 100644
--- a/tensorflow/core/api_def/base_api/api_def_ScatterNdSub.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_ScatterNdSub.pbtxt
@@ -42,7 +42,7 @@ within a given variable according to `indices`.
`ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
`indices` must be integer tensor, containing indices into `ref`.
-It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
+It must be shape \\([d_0, ..., d_{Q-2}, K]\\) where `0 < K <= P`.
The innermost dimension of `indices` (with length `K`) corresponds to
indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th
@@ -50,9 +50,7 @@ dimension of `ref`.
`updates` is `Tensor` of rank `Q-1+P-K` with shape:
-```
-[d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].
-```
+$$[d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].$$
For example, say we want to subtract 4 scattered elements from a rank-1 tensor
with 8 elements. In Python, that subtraction would look like this:
diff --git a/tensorflow/core/api_def/base_api/api_def_ScatterNdUpdate.pbtxt b/tensorflow/core/api_def/base_api/api_def_ScatterNdUpdate.pbtxt
index 33d98262d5..cb57c171b9 100644
--- a/tensorflow/core/api_def/base_api/api_def_ScatterNdUpdate.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_ScatterNdUpdate.pbtxt
@@ -42,7 +42,7 @@ variable according to `indices`.
`ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
`indices` must be integer tensor, containing indices into `ref`.
-It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
+It must be shape \\([d_0, ..., d_{Q-2}, K]\\) where `0 < K <= P`.
The innermost dimension of `indices` (with length `K`) corresponds to
indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th
@@ -50,9 +50,7 @@ dimension of `ref`.
`updates` is `Tensor` of rank `Q-1+P-K` with shape:
-```
-[d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].
-```
+$$[d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].$$
For example, say we want to update 4 scattered elements to a rank-1 tensor to
8 elements. In Python, that update would look like this:
diff --git a/tensorflow/core/api_def/base_api/api_def_Softmax.pbtxt b/tensorflow/core/api_def/base_api/api_def_Softmax.pbtxt
index 43884824c9..b51b468c3d 100644
--- a/tensorflow/core/api_def/base_api/api_def_Softmax.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_Softmax.pbtxt
@@ -16,6 +16,6 @@ END
description: <<END
For each batch `i` and class `j` we have
- softmax[i, j] = exp(logits[i, j]) / sum_j(exp(logits[i, j]))
+ $$softmax[i, j] = exp(logits[i, j]) / sum_j(exp(logits[i, j]))$$
END
}
diff --git a/tensorflow/core/api_def/base_api/api_def_SparseApplyAdagrad.pbtxt b/tensorflow/core/api_def/base_api/api_def_SparseApplyAdagrad.pbtxt
index 1698e2def0..06409d8db2 100644
--- a/tensorflow/core/api_def/base_api/api_def_SparseApplyAdagrad.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_SparseApplyAdagrad.pbtxt
@@ -47,7 +47,7 @@ END
summary: "Update relevant entries in \'*var\' and \'*accum\' according to the adagrad scheme."
description: <<END
That is for rows we have grad for, we update var and accum as follows:
-accum += grad * grad
-var -= lr * grad * (1 / sqrt(accum))
+$$accum += grad * grad$$
+$$var -= lr * grad * (1 / sqrt(accum))$$
END
}
diff --git a/tensorflow/core/api_def/base_api/api_def_SparseApplyCenteredRMSProp.pbtxt b/tensorflow/core/api_def/base_api/api_def_SparseApplyCenteredRMSProp.pbtxt
index 2c6a36bf45..b3f2d3ea62 100644
--- a/tensorflow/core/api_def/base_api/api_def_SparseApplyCenteredRMSProp.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_SparseApplyCenteredRMSProp.pbtxt
@@ -83,8 +83,8 @@ mean_square = decay * mean_square + (1-decay) * gradient ** 2
mean_grad = decay * mean_grad + (1-decay) * gradient
Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2)
-ms <- rho * ms_{t-1} + (1-rho) * grad * grad
-mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
-var <- var - mom
+$$ms <- rho * ms_{t-1} + (1-rho) * grad * grad$$
+$$mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)$$
+$$var <- var - mom$$
END
}
diff --git a/tensorflow/core/api_def/base_api/api_def_SparseApplyFtrl.pbtxt b/tensorflow/core/api_def/base_api/api_def_SparseApplyFtrl.pbtxt
index 524b5c5a47..9a6b6bca5f 100644
--- a/tensorflow/core/api_def/base_api/api_def_SparseApplyFtrl.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_SparseApplyFtrl.pbtxt
@@ -71,10 +71,10 @@ END
summary: "Update relevant entries in \'*var\' according to the Ftrl-proximal scheme."
description: <<END
That is for rows we have grad for, we update var, accum and linear as follows:
-accum_new = accum + grad * grad
-linear += grad + (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
-quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
-var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
-accum = accum_new
+$$accum_new = accum + grad * grad$$
+$$linear += grad + (accum_{new}^{-lr_{power}} - accum^{-lr_{power}} / lr * var$$
+$$quadratic = 1.0 / (accum_{new}^{lr_{power}} * lr) + 2 * l2$$
+$$var = (sign(linear) * l1 - linear) / quadratic\ if\ |linear| > l1\ else\ 0.0$$
+$$accum = accum_{new}$$
END
}
diff --git a/tensorflow/core/api_def/base_api/api_def_SparseApplyMomentum.pbtxt b/tensorflow/core/api_def/base_api/api_def_SparseApplyMomentum.pbtxt
index 8d9ac9ea3f..17dbb488de 100644
--- a/tensorflow/core/api_def/base_api/api_def_SparseApplyMomentum.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_SparseApplyMomentum.pbtxt
@@ -64,7 +64,7 @@ Set use_nesterov = True if you want to use Nesterov momentum.
That is for rows we have grad for, we update var and accum as follows:
-accum = accum * momentum + grad
-var -= lr * accum
+$$accum = accum * momentum + grad$$
+$$var -= lr * accum$$
END
}
diff --git a/tensorflow/core/api_def/base_api/api_def_SparseApplyProximalAdagrad.pbtxt b/tensorflow/core/api_def/base_api/api_def_SparseApplyProximalAdagrad.pbtxt
index 80541b91c7..0b24f2ddd1 100644
--- a/tensorflow/core/api_def/base_api/api_def_SparseApplyProximalAdagrad.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_SparseApplyProximalAdagrad.pbtxt
@@ -58,9 +58,9 @@ END
summary: "Sparse update entries in \'*var\' and \'*accum\' according to FOBOS algorithm."
description: <<END
That is for rows we have grad for, we update var and accum as follows:
-accum += grad * grad
-prox_v = var
-prox_v -= lr * grad * (1 / sqrt(accum))
-var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}
+$$accum += grad * grad$$
+$$prox_v = var$$
+$$prox_v -= lr * grad * (1 / sqrt(accum))$$
+$$var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}$$
END
}
diff --git a/tensorflow/core/api_def/base_api/api_def_SparseApplyProximalGradientDescent.pbtxt b/tensorflow/core/api_def/base_api/api_def_SparseApplyProximalGradientDescent.pbtxt
index 5200e5516d..9dc53860e5 100644
--- a/tensorflow/core/api_def/base_api/api_def_SparseApplyProximalGradientDescent.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_SparseApplyProximalGradientDescent.pbtxt
@@ -52,7 +52,7 @@ END
summary: "Sparse update \'*var\' as FOBOS algorithm with fixed learning rate."
description: <<END
That is for rows we have grad for, we update var as follows:
-prox_v = var - alpha * grad
-var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
+$$prox_v = var - alpha * grad$$
+$$var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}$$
END
}
diff --git a/tensorflow/core/api_def/base_api/api_def_SparseApplyRMSProp.pbtxt b/tensorflow/core/api_def/base_api/api_def_SparseApplyRMSProp.pbtxt
index a4dbd608b8..ee9f57fa9d 100644
--- a/tensorflow/core/api_def/base_api/api_def_SparseApplyRMSProp.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_SparseApplyRMSProp.pbtxt
@@ -71,8 +71,8 @@ and mom will not update in iterations during which the grad is zero.
mean_square = decay * mean_square + (1-decay) * gradient ** 2
Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
-ms <- rho * ms_{t-1} + (1-rho) * grad * grad
-mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
-var <- var - mom
+$$ms <- rho * ms_{t-1} + (1-rho) * grad * grad$$
+$$mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)$$
+$$var <- var - mom$$
END
}
diff --git a/tensorflow/core/api_def/base_api/api_def_SparseSliceGrad.pbtxt b/tensorflow/core/api_def/base_api/api_def_SparseSliceGrad.pbtxt
new file mode 100644
index 0000000000..51af6adcf1
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_SparseSliceGrad.pbtxt
@@ -0,0 +1,40 @@
+op {
+ graph_op_name: "SparseSliceGrad"
+ in_arg {
+ name: "backprop_val_grad"
+ description: <<END
+1-D. The gradient with respect to
+the non-empty values of the sliced `SparseTensor`.
+END
+ }
+ in_arg {
+ name: "input_indices"
+ description: <<END
+2-D. The `indices` of the input `SparseTensor`.
+END
+ }
+ in_arg {
+ name: "input_start"
+ description: <<END
+1-D. tensor represents the start of the slice.
+END
+ }
+ in_arg {
+ name: "output_indices"
+ description: <<END
+2-D. The `indices` of the sliced `SparseTensor`.
+END
+ }
+ out_arg {
+ name: "val_grad"
+ description: <<END
+1-D. The gradient with respect to the non-empty values of input `SparseTensor`.
+END
+ }
+ summary: "The gradient operator for the SparseSlice op."
+ description: <<END
+This op takes in the upstream gradient w.r.t. non-empty values of
+the sliced `SparseTensor`, and outputs the gradients w.r.t.
+the non-empty values of input `SparseTensor`.
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt b/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt
index eb5d0d1247..9aeabd030d 100644
--- a/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt
@@ -20,7 +20,7 @@ Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of
segments.
Computes a tensor such that
-`(output[i] = sum_{j...} data[j...]` where the sum is over tuples `j...` such
+\\(output[i] = sum_{j...} data[j...]\\) where the sum is over tuples `j...` such
that `segment_ids[j...] == i`. Unlike `SegmentSum`, `segment_ids`
need not be sorted and need not cover all values in the full
range of valid values.
diff --git a/tensorflow/core/api_def/python_api/api_def_BroadcastTo.pbtxt b/tensorflow/core/api_def/python_api/api_def_BroadcastTo.pbtxt
deleted file mode 100644
index 083eeced81..0000000000
--- a/tensorflow/core/api_def/python_api/api_def_BroadcastTo.pbtxt
+++ /dev/null
@@ -1,4 +0,0 @@
-op {
- graph_op_name: "BroadcastTo"
- visibility: HIDDEN
-}
diff --git a/tensorflow/core/api_def/python_api/api_def_SparseSliceGrad.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseSliceGrad.pbtxt
new file mode 100644
index 0000000000..6ea8df46ec
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_SparseSliceGrad.pbtxt
@@ -0,0 +1,4 @@
+op {
+ graph_op_name: "SparseSliceGrad"
+ visibility: HIDDEN
+}
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 466f601471..07360d594b 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -3941,6 +3941,7 @@ cc_library(
":sparse_reduce_op",
":sparse_reorder_op",
":sparse_reshape_op",
+ ":sparse_slice_grad_op",
":sparse_slice_op",
":sparse_softmax",
":sparse_sparse_binary_op_shared",
@@ -4027,6 +4028,12 @@ tf_kernel_library(
)
tf_kernel_library(
+ name = "sparse_slice_grad_op",
+ prefix = "sparse_slice_grad_op",
+ deps = SPARSE_DEPS,
+)
+
+tf_kernel_library(
name = "sparse_slice_op",
prefix = "sparse_slice_op",
deps = SPARSE_DEPS,
diff --git a/tensorflow/core/kernels/conv_ops_test.cc b/tensorflow/core/kernels/conv_ops_test.cc
index 8afe6a2cbd..9acc725ba8 100644
--- a/tensorflow/core/kernels/conv_ops_test.cc
+++ b/tensorflow/core/kernels/conv_ops_test.cc
@@ -221,7 +221,7 @@ class FusedResizePadConvOpTest : public OpsTestBase {
std::vector<Tensor> fused_tensors;
TF_ASSERT_OK(session->Run({}, {"fused_conv"}, {}, &fused_tensors));
- test::ExpectTensorNear<float>(unfused_tensors[0], fused_tensors[0], 1e-5);
+ test::ExpectClose(unfused_tensors[0], fused_tensors[0]);
}
void CompareFusedPadOnlyAndSeparate(int input_width, int input_height,
@@ -269,7 +269,7 @@ class FusedResizePadConvOpTest : public OpsTestBase {
std::vector<Tensor> fused_tensors;
TF_ASSERT_OK(session->Run({}, {"fused_conv"}, {}, &fused_tensors));
- test::ExpectTensorNear<float>(unfused_tensors[0], fused_tensors[0], 1e-5);
+ test::ExpectClose(unfused_tensors[0], fused_tensors[0]);
}
};
diff --git a/tensorflow/core/kernels/mkl_concat_op.cc b/tensorflow/core/kernels/mkl_concat_op.cc
index 31d1b949ef..d054f0d404 100644
--- a/tensorflow/core/kernels/mkl_concat_op.cc
+++ b/tensorflow/core/kernels/mkl_concat_op.cc
@@ -704,14 +704,14 @@ class MklConcatOp : public OpKernel {
if (input_tensors[k].NumElements() == 0)
continue;
- auto src_dims = TFShapeToMklDnnDims(
- mkl_input_shapes[k].GetTfShape());
auto src_md = mkl_input_shapes[k].GetMklLayout();
srcs[k].SetUsrMem(src_md, &input_tensors[k]);
- if (src_md.data.format != mkl_common_format)
+ if (src_md.data.format != mkl_common_format) {
+ memory::dims src_dims(src_md.data.dims, &src_md.data.dims[src_md.data.ndims]);
src_md = memory::desc(src_dims, MklDnnType<T>(),
mkl_common_format);
+ }
srcs_pd.push_back(memory::primitive_desc(src_md, cpu_engine));
}
diff --git a/tensorflow/core/kernels/sparse_slice_grad_op.cc b/tensorflow/core/kernels/sparse_slice_grad_op.cc
new file mode 100644
index 0000000000..90a39ed818
--- /dev/null
+++ b/tensorflow/core/kernels/sparse_slice_grad_op.cc
@@ -0,0 +1,126 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_util.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/util/sparse/sparse_tensor.h"
+
+namespace tensorflow {
+
+template <typename T>
+class SparseSliceGradOp : public OpKernel {
+ public:
+ explicit SparseSliceGradOp(OpKernelConstruction *ctx) : OpKernel(ctx) {}
+
+ void Compute(OpKernelContext *ctx) override {
+ const Tensor *backprop_val_grad, *input_indices, *output_indices, *input_start;
+ OP_REQUIRES_OK(ctx, ctx->input("backprop_val_grad", &backprop_val_grad));
+ OP_REQUIRES_OK(ctx, ctx->input("input_indices", &input_indices));
+ OP_REQUIRES_OK(ctx, ctx->input("input_start", &input_start));
+ OP_REQUIRES_OK(ctx, ctx->input("output_indices", &output_indices));
+
+ OP_REQUIRES(ctx,
+ TensorShapeUtils::IsMatrix(input_indices->shape()) &&
+ TensorShapeUtils::IsMatrix(output_indices->shape()),
+ errors::InvalidArgument(
+ "Input and output indices should be matrices "
+ "but received shapes: ",
+ input_indices->shape().DebugString(), " and ",
+ output_indices->shape().DebugString()));
+ OP_REQUIRES(
+ ctx, TensorShapeUtils::IsVector(backprop_val_grad->shape()),
+ errors::InvalidArgument(
+ "Input backprop_val_grad should be a vector but received shape: ",
+ backprop_val_grad->shape().DebugString()));
+ OP_REQUIRES(
+ ctx,
+ input_indices->dim_size(1) == output_indices->dim_size(1),
+ errors::InvalidArgument("The input and output should have the same "
+ "ndims: got: ", input_indices->dim_size(1), " and ",
+ output_indices->dim_size(1)));
+ OP_REQUIRES(
+ ctx, output_indices->dim_size(0) <= input_indices->dim_size(0),
+ errors::InvalidArgument("# rows of output_indices should be not greater "
+ "than of input_indices, got ",
+ output_indices->dim_size(0), " and ",
+ input_indices->dim_size(0)));
+ OP_REQUIRES(
+ ctx, backprop_val_grad->NumElements() == output_indices->dim_size(0),
+ errors::InvalidArgument("# elements of backprop_val_grad and # rows of "
+ "output_indices should match (#nnz of sum): got ",
+ backprop_val_grad->NumElements(), " and ",
+ output_indices->dim_size(0)));
+ OP_REQUIRES(ctx, TensorShapeUtils::IsVector(input_start->shape()),
+ errors::InvalidArgument(
+ "The input_start should be a vector but received shape ",
+ input_start->shape().DebugString()));
+
+ const int num_dims = input_indices->dim_size(1);
+ OP_REQUIRES(ctx, num_dims == input_start->NumElements(),
+ errors::InvalidArgument(
+ "Expected input_start to be a vector of length ", num_dims,
+ " but got length ", input_start->NumElements()));
+
+ const int64 input_nnz = input_indices->dim_size(0);
+
+ Tensor *val_grad;
+ OP_REQUIRES_OK(ctx,
+ ctx->allocate_output(0, TensorShape({input_nnz}), &val_grad));
+
+ T *val_grad_flat = val_grad->flat<T>().data();
+ const T *backprop_val_grad_flat = backprop_val_grad->flat<T>().data();
+ memset(val_grad_flat, 0, sizeof(T) * input_nnz);
+
+ // Fill gradients for position where indices of input and output are same.
+ const auto input_indices_mat = input_indices->matrix<int64>();
+ const auto output_indices_mat = output_indices->matrix<int64>();
+ const auto input_start_flat = input_start->flat<int64>();
+ int64 j = 0;
+ for (int64 i = 0; i < input_nnz && j < backprop_val_grad->NumElements();
+ ++i) {
+ bool is_same = true;
+ for (int d = 0; d < num_dims; ++d) {
+ const int64 a = input_indices_mat(i, d);
+ const int64 b = output_indices_mat(j, d);
+ const int64 offset = input_start_flat(d);
+ if (a != b + offset) {
+ is_same = false;
+ break;
+ }
+ }
+ if (is_same) {
+ val_grad_flat[i] = backprop_val_grad_flat[j];
+ ++j;
+ }
+ }
+ OP_REQUIRES(
+ ctx, backprop_val_grad->NumElements() == j,
+ errors::Internal("Elements of backprop_val_grad aren't all propagated. "
+ "Num elements:", backprop_val_grad->NumElements(),
+ ", used: ", j));
+ }
+};
+
+#define REGISTER_KERNELS(type) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("SparseSliceGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
+ SparseSliceGradOp<type>)
+
+TF_CALL_NUMBER_TYPES(REGISTER_KERNELS);
+#undef REGISTER_KERNELS
+} // namespace tensorflow
diff --git a/tensorflow/core/lib/db/sqlite_test.cc b/tensorflow/core/lib/db/sqlite_test.cc
index 1e88323d01..1590055960 100644
--- a/tensorflow/core/lib/db/sqlite_test.cc
+++ b/tensorflow/core/lib/db/sqlite_test.cc
@@ -73,6 +73,21 @@ TEST_F(SqliteTest, InsertAndSelectDouble) {
EXPECT_EQ(1, stmt.ColumnInt(1));
}
+#ifdef DSQLITE_ENABLE_JSON1
+TEST_F(SqliteTest, Json1Extension) {
+ string s1 = "{\"key\": 42}";
+ string s2 = "{\"key\": \"value\"}";
+ auto stmt = db_->PrepareOrDie("INSERT INTO T (a, b) VALUES (?, ?)");
+ stmt.BindText(1, s1);
+ stmt.BindText(2, s2);
+ TF_ASSERT_OK(stmt.StepAndReset());
+ stmt = db_->PrepareOrDie("SELECT json_extract(a, '$.key'), json_extract(b, '$.key') FROM T");
+ TF_ASSERT_OK(stmt.Step(&is_done_));
+ EXPECT_EQ(42, stmt.ColumnInt(0));
+ EXPECT_EQ("value", stmt.ColumnString(1));
+}
+#endif //DSQLITE_ENABLE_JSON1
+
TEST_F(SqliteTest, NulCharsInString) {
string s; // XXX: Want to write {2, '\0'} but not sure why not.
s.append(static_cast<size_t>(2), '\0');
diff --git a/tensorflow/core/ops/sparse_ops.cc b/tensorflow/core/ops/sparse_ops.cc
index acc8c782ef..bc0cb2095d 100644
--- a/tensorflow/core/ops/sparse_ops.cc
+++ b/tensorflow/core/ops/sparse_ops.cc
@@ -302,6 +302,20 @@ REGISTER_OP("SparseSplit")
return Status::OK();
});
+REGISTER_OP("SparseSliceGrad")
+ .Input("backprop_val_grad: T")
+ .Input("input_indices: int64")
+ .Input("input_start: int64")
+ .Input("output_indices: int64")
+ .Output("val_grad: T")
+ .Attr("T: numbertype")
+ .SetShapeFn([](InferenceContext* c) {
+ ShapeHandle indices;
+ TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 2, &indices));
+ c->set_output(0, c->Vector(c->Dim(indices, 0)));
+ return Status::OK();
+ });
+
REGISTER_OP("SparseSlice")
.Input("indices: int64")
.Input("values: T")
diff --git a/tensorflow/core/ops/sparse_ops_test.cc b/tensorflow/core/ops/sparse_ops_test.cc
index 0df3320484..6a9b5ce4d3 100644
--- a/tensorflow/core/ops/sparse_ops_test.cc
+++ b/tensorflow/core/ops/sparse_ops_test.cc
@@ -52,6 +52,18 @@ TEST(SparseOpsTest, SparseAddGrad_ShapeFn) {
INFER_OK(op, "?;[?,?];[?,?];?", "[d1_0];[d2_0]");
}
+TEST(SparseOpsTest, SparseSliceGrad_ShapeFn) {
+ ShapeInferenceTestOp op("SparseSliceGrad");
+
+ // Rank checks.
+ INFER_ERROR("must be rank 2", op, "?;[1];?;?");
+
+ INFER_OK(op, "?;?;?;?", "[?]");
+
+ // input[1].dim(0) determine output.
+ INFER_OK(op, "?;[?,?];?;?", "[d1_0]");
+}
+
TEST(SparseOpsTest, SparseReorder_ShapeFn) {
ShapeInferenceTestOp op("SparseReorder");
diff --git a/tensorflow/docs_src/get_started/_index.yaml b/tensorflow/docs_src/get_started/_index.yaml
index 277fc852fb..4060804892 100644
--- a/tensorflow/docs_src/get_started/_index.yaml
+++ b/tensorflow/docs_src/get_started/_index.yaml
@@ -66,9 +66,7 @@ landing_page:
}
</style>
<div class="devsite-landing-row-item-description">
- <a href="#">
- <h3 class="hide-from-toc">Learn and use ML</h3>
- </a>
+ <h3 class="hide-from-toc">Learn and use ML</h3>
<div class="devsite-landing-row-item-description-content">
<p>
The high-level Keras API provides building blocks to create and
@@ -117,9 +115,7 @@ landing_page:
- items:
- custom_html: >
<div class="devsite-landing-row-item-description" style="border-right: 2px solid #eee;">
- <a href="https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/examples/notebooks">
- <h3 class="hide-from-toc">Research and experimentation</h3>
- </a>
+ <h3 class="hide-from-toc">Research and experimentation</h3>
<div class="devsite-landing-row-item-description-content">
<p>
Eager execution provides an imperative, define-by-run interface for advanced operations. Write custom layers, forward passes, and training loops with autoā€‘differentiation. Start with
@@ -170,9 +166,7 @@ landing_page:
</div>
- custom_html: >
<div class="devsite-landing-row-item-description">
- <a href="#">
- <h3 class="hide-from-toc">ML at production scale</h3>
- </a>
+ <h3 class="hide-from-toc">ML at production scale</h3>
<div class="devsite-landing-row-item-description-content">
<p>
Estimators can train large models on multiple machines in a
diff --git a/tensorflow/docs_src/get_started/leftnav_files b/tensorflow/docs_src/get_started/leftnav_files
index 5c400a67f0..99d2b2c3e1 100644
--- a/tensorflow/docs_src/get_started/leftnav_files
+++ b/tensorflow/docs_src/get_started/leftnav_files
@@ -1,7 +1,7 @@
### Learn and use ML
-basic_classification.md
-basic_text_classification.md
-basic_regression.md
+basic_classification.md: Basic classification
+basic_text_classification.md: Text classification
+basic_regression.md: Regression
overfit_and_underfit.md
save_and_restore_models.md
next_steps.md
diff --git a/tensorflow/docs_src/get_started/next_steps.md b/tensorflow/docs_src/get_started/next_steps.md
index 6318a39c6c..01c9f7204a 100644
--- a/tensorflow/docs_src/get_started/next_steps.md
+++ b/tensorflow/docs_src/get_started/next_steps.md
@@ -1,4 +1,4 @@
-# Next Steps
+# Next steps
## Learn more about TensorFlow
diff --git a/tensorflow/docs_src/guide/custom_estimators.md b/tensorflow/docs_src/guide/custom_estimators.md
index fb20b35c12..a63e2bafb3 100644
--- a/tensorflow/docs_src/guide/custom_estimators.md
+++ b/tensorflow/docs_src/guide/custom_estimators.md
@@ -362,10 +362,10 @@ model's loss. This is the
that will be optimized.
We can calculate the loss by calling @{tf.losses.sparse_softmax_cross_entropy}.
-The value returned by this function will be lowest, approximately 0,
-probability of the correct class (at index `label`) is near 1.0. The loss value
-returned is progressively larger as the probability of the correct class
-decreases.
+The value returned by this function will be approximately 0 at lowest,
+when the probability of the correct class (at index `label`) is near 1.0.
+The loss value returned is progressively larger as the probability of the
+correct class decreases.
This function returns the average over the whole batch.
diff --git a/tensorflow/docs_src/guide/keras.md b/tensorflow/docs_src/guide/keras.md
index 83172dab7f..1d846df104 100644
--- a/tensorflow/docs_src/guide/keras.md
+++ b/tensorflow/docs_src/guide/keras.md
@@ -35,7 +35,7 @@ from tensorflow import keras
* The `tf.keras` version in the latest TensorFlow release might not be the same
as the latest `keras` version from PyPI. Check `tf.keras.__version__`.
* When [saving a model's weights](#weights_only), `tf.keras` defaults to the
- [checkpoint format](../get_started/checkpoints.md). Pass `save_format='h5'` to
+ [checkpoint format](./checkpoints.md). Pass `save_format='h5'` to
use HDF5.
## Build a simple model
@@ -221,7 +221,7 @@ To *evaluate* the inference-mode loss and metrics for the data provided:
```python
model.evaluate(x, y, batch_size=32)
-model.evaluate(dataset, steps=30
+model.evaluate(dataset, steps=30)
```
And to *predict* the output of the last layer in inference for the data provided,
@@ -442,7 +442,7 @@ model.load_weights('my_model')
```
By default, this saves the model's weights in the
-[TensorFlow checkpoint](../get_started/checkpoints.md) file format. Weights can
+[TensorFlow checkpoint](./checkpoints.md) file format. Weights can
also be saved to the Keras HDF5 format (the default for the multi-backend
implementation of Keras):
@@ -581,15 +581,6 @@ model.compile(loss='binary_crossentropy', optimizer=optimizer)
model.summary()
```
-Convert the Keras model to a `tf.estimator.Estimator` instance:
-
-```python
-keras_estimator = keras.estimator.model_to_estimator(
- keras_model=model,
- config=config,
- model_dir='/tmp/model_dir')
-```
-
Define an *input pipeline*. The `input_fn` returns a `tf.data.Dataset` object
used to distribute the data across multiple devicesā€”with each device processing
a slice of the input batch.
@@ -615,6 +606,15 @@ strategy = tf.contrib.distribute.MirroredStrategy()
config = tf.estimator.RunConfig(train_distribute=strategy)
```
+Convert the Keras model to a `tf.estimator.Estimator` instance:
+
+```python
+keras_estimator = keras.estimator.model_to_estimator(
+ keras_model=model,
+ config=config,
+ model_dir='/tmp/model_dir')
+```
+
Finally, train the `Estimator` instance by providing the `input_fn` and `steps`
arguments:
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md
index e55520ceaa..a641dc3a6f 100644
--- a/tensorflow/docs_src/install/install_sources.md
+++ b/tensorflow/docs_src/install/install_sources.md
@@ -289,17 +289,27 @@ Note: If you're only interested in building the libraries for the TensorFlow C
or Java APIs, see [Build the C or Java libraries](#BuildCorJava), you do not
need to build the pip package in that case.
-To build a pip package for TensorFlow with CPU-only support,
-you would typically invoke the following command:
+### CPU-only support
+
+To build a pip package for TensorFlow with CPU-only support:
+
+<pre>
+$ bazel build --config=opt //tensorflow/tools/pip_package:build_pip_package
+</pre>
+
+To build a pip package for TensorFlow with CPU-only support for the IntelĀ® MKL-DNN:
<pre>
-$ <b>bazel build --config=opt //tensorflow/tools/pip_package:build_pip_package</b>
+$ bazel build --config=mkl --config=opt //tensorflow/tools/pip_package:build_pip_package
</pre>
-To build a pip package for TensorFlow with GPU support,
-invoke the following command:
+### GPU support
+
+To build a pip package for TensorFlow with GPU support:
-<pre>$ <b>bazel build --config=opt --config=cuda //tensorflow/tools/pip_package:build_pip_package</b> </pre>
+<pre>
+$ bazel build --config=opt --config=cuda //tensorflow/tools/pip_package:build_pip_package
+</pre>
**NOTE on gcc 5 or later:** the binary pip packages available on the
TensorFlow website are built with gcc 4, which uses the older ABI. To
diff --git a/tensorflow/docs_src/mobile/tflite/demo_android.md b/tensorflow/docs_src/mobile/tflite/demo_android.md
index 1980fdeb66..fdf0bcf3c1 100644
--- a/tensorflow/docs_src/mobile/tflite/demo_android.md
+++ b/tensorflow/docs_src/mobile/tflite/demo_android.md
@@ -44,23 +44,22 @@ app:
Android Studio project.
* Install all the Gradle extensions it requests.
-To get a model, either:
-
-* Download the quantized [Mobilenet TensorFlow Lite model](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip)
- and unzip and copy `mobilenet_quant_v1_224.tflite` to the assets directory:
- `tensorflow/contrib/lite/java/demo/app/src/main/assets/`.
-* Or, download the floating point [Inception-v3 model](https://storage.googleapis.com/download.tensorflow.org/models/tflite/inception_v3_slim_2016_android_2017_11_10.zip)
- and unzip and copy `inceptionv3_non_slim_2015.tflite` to the assets
- directory. Change the chosen classifier in
- [Camera2BasicFragment.java](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java)<br>
- from: `classifier = new ImageClassifierQuantizedMobileNet(getActivity());`<br>
- to: `classifier = new ImageClassifierFloatInception(getActivity());`.
+Now you can build and run the demo app.
-Now you can build and run the demo app.
+The build process downloads the quantized [Mobilenet TensorFlow Lite model](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip), and unzips it into the assets directory: `tensorflow/contrib/lite/java/demo/app/src/main/assets/`.
Some additional details are available on the
[TF Lite Android App page](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/README.md).
+### Using other models
+
+To use a different model:
+* Download the floating point [Inception-v3 model](https://storage.googleapis.com/download.tensorflow.org/models/tflite/inception_v3_slim_2016_android_2017_11_10.zip).
+* Unzip and copy `inceptionv3_non_slim_2015.tflite` to the assets directory.
+* Change the chosen classifier in [Camera2BasicFragment.java](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java)<br>
+ from: `classifier = new ImageClassifierQuantizedMobileNet(getActivity());`<br>
+ to: `classifier = new ImageClassifierFloatInception(getActivity());`.
+
## Build TensorFlow Lite and the demo app from source
diff --git a/tensorflow/docs_src/tutorials/layers.md b/tensorflow/docs_src/tutorials/layers.md
index 212e337637..791909f5fd 100644
--- a/tensorflow/docs_src/tutorials/layers.md
+++ b/tensorflow/docs_src/tutorials/layers.md
@@ -470,51 +470,18 @@ as the loss metric. The following code calculates cross entropy when the model
runs in either `TRAIN` or `EVAL` mode:
```python
-onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=10)
-loss = tf.losses.softmax_cross_entropy(
- onehot_labels=onehot_labels, logits=logits)
+loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
```
Let's take a closer look at what's happening above.
-Our `labels` tensor contains a list of predictions for our examples, e.g. `[1,
-9, ...]`. In order to calculate cross-entropy, first we need to convert `labels`
-to the corresponding
-[one-hot encoding](https://www.quora.com/What-is-one-hot-encoding-and-when-is-it-used-in-data-science):
+Our `labels` tensor contains a list of prediction indices for our examples, e.g. `[1,
+9, ...]`. `logits` contains the linear outputs of our last layer.
-```none
-[[0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
- [0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
- ...]
-```
-
-We use the @{tf.one_hot} function
-to perform this conversion. `tf.one_hot()` has two required arguments:
-
-* `indices`. The locations in the one-hot tensor that will have "on
- values"ā€”i.e., the locations of `1` values in the tensor shown above.
-* `depth`. The depth of the one-hot tensorā€”i.e., the number of target classes.
- Here, the depth is `10`.
+`tf.losses.sparse_softmax_cross_entropy`, calculates the softmax crossentropy
+(aka: categorical crossentropy, negative log-likelihood) from these two inputs
+in an efficient, numerically stable way.
-The following code creates the one-hot tensor for our labels, `onehot_labels`:
-
-```python
-onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=10)
-```
-
-Because `labels` contains a series of values from 0ā€“9, `indices` is just our
-`labels` tensor, with values cast to integers. The `depth` is `10` because we
-have 10 possible target classes, one for each digit.
-
-Next, we compute cross-entropy of `onehot_labels` and the softmax of the
-predictions from our logits layer. `tf.losses.softmax_cross_entropy()` takes
-`onehot_labels` and `logits` as arguments, performs softmax activation on
-`logits`, calculates cross-entropy, and returns our `loss` as a scalar `Tensor`:
-
-```python
-loss = tf.losses.softmax_cross_entropy(
- onehot_labels=onehot_labels, logits=logits)
-```
### Configure the Training Op
diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index b2dbdafc5f..7f1f0970a6 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -11210,7 +11210,7 @@ func SampleDistortedBoundingBoxAspectRatioRange(value []float32) SampleDistorted
// SampleDistortedBoundingBoxAreaRange sets the optional area_range attribute to value.
//
// value: The cropped area of the image must contain a fraction of the
-// supplied image within in this range.
+// supplied image within this range.
// If not specified, defaults to <f:0.05 f:1 >
func SampleDistortedBoundingBoxAreaRange(value []float32) SampleDistortedBoundingBoxAttr {
return func(m optionalAttr) {
@@ -17969,9 +17969,10 @@ func SparseFillEmptyRowsGrad(scope *Scope, reverse_index_map tf.Output, grad_val
}
// Computes scaled exponential linear: `scale * alpha * (exp(features) - 1)`
-//
// if < 0, `scale * features` otherwise.
//
+// Assumes weights to have zero mean and variance 1.0 / fan_in.
+//
// See [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515)
func Selu(scope *Scope, features tf.Output) (activations tf.Output) {
if scope.Err() != nil {
@@ -21655,7 +21656,7 @@ func ImageSummaryBadColor(value tf.Tensor) ImageSummaryAttr {
// generated sequentially as '*tag*/image/0', '*tag*/image/1', etc.
//
// The `bad_color` argument is the color to use in the generated images for
-// non-finite input values. It is a `unit8` 1-D tensor of length `channels`.
+// non-finite input values. It is a `uint8` 1-D tensor of length `channels`.
// Each element must be in the range `[0, 255]` (It represents the value of a
// pixel in the output image). Non-finite values in the input tensor are
// replaced by this tensor in the output image. The default value is the color
@@ -24048,7 +24049,7 @@ func SampleDistortedBoundingBoxV2AspectRatioRange(value []float32) SampleDistort
// SampleDistortedBoundingBoxV2AreaRange sets the optional area_range attribute to value.
//
// value: The cropped area of the image must contain a fraction of the
-// supplied image within in this range.
+// supplied image within this range.
// If not specified, defaults to <f:0.05 f:1 >
func SampleDistortedBoundingBoxV2AreaRange(value []float32) SampleDistortedBoundingBoxV2Attr {
return func(m optionalAttr) {
@@ -24744,8 +24745,7 @@ type DecodeProtoV2Attr func(optionalAttr)
// If not specified, defaults to "local://"
func DecodeProtoV2DescriptorSource(value string) DecodeProtoV2Attr {
return func(m optionalAttr) {
- m["descriptor_source"] = value
- }
+ m["descriptor_source"] = value }
}
// DecodeProtoV2MessageFormat sets the optional message_format attribute to value.
diff --git a/tensorflow/java/src/gen/cc/source_writer.cc b/tensorflow/java/src/gen/cc/source_writer.cc
index 66401bdba7..8e5fba7e32 100644
--- a/tensorflow/java/src/gen/cc/source_writer.cc
+++ b/tensorflow/java/src/gen/cc/source_writer.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
+#include <string>
#include <algorithm>
#include <list>
#include <string>
diff --git a/tensorflow/java/src/main/java/org/tensorflow/Graph.java b/tensorflow/java/src/main/java/org/tensorflow/Graph.java
index d4fd3db5f7..7d19696749 100644
--- a/tensorflow/java/src/main/java/org/tensorflow/Graph.java
+++ b/tensorflow/java/src/main/java/org/tensorflow/Graph.java
@@ -143,6 +143,82 @@ public final class Graph implements AutoCloseable {
}
}
+ /**
+ * Adds operations to compute the partial derivatives of sum of {@code y}s w.r.t {@code x}s,
+ * i.e., {@code d(y_1 + y_2 + ...)/dx_1, d(y_1 + y_2 + ...)/dx_2...}
+ * <p>
+ * {@code dx} are used as initial gradients (which represent the symbolic partial derivatives of some loss function
+ * {@code L} w.r.t. {@code y}). {@code dx} must be null or have size of {@code y}.
+ * <p>
+ * If {@code dx} is null, the implementation will use dx of {@link org.tensorflow.op.core.OnesLike OnesLike} for all
+ * shapes in {@code y}.
+ *
+ * @param y output of the function to derive
+ * @param x inputs of the function for which partial derivatives are computed
+ * @param dx if not null, the partial derivatives of some loss function {@code L} w.r.t. {@code y}
+ * @return the partial derivatives {@code dy} with the size of {@code x}
+ */
+ public Output<?>[] addGradients(Output<?>[] y, Output<?>[] x, Output<?>[] dx) {
+ Output<?>[] dy = new Output<?>[x.length];
+ final long[] yHandles = new long[y.length];
+ final int[] yIndices = new int[y.length];
+ final long[] xHandles = new long[x.length];
+ final int[] xIndices = new int[x.length];
+ long[] dxHandles = null;
+ int[] dxIndices = null;
+
+ try (Reference ref = ref()) {
+ for (int i = 0; i < y.length; ++i) {
+ yHandles[i] = y[i].op().getUnsafeNativeHandle();
+ yIndices[i] = y[i].index();
+ }
+ for (int i = 0; i < x.length; ++i) {
+ xHandles[i] = x[i].op().getUnsafeNativeHandle();
+ xIndices[i] = x[i].index();
+ }
+ if (dx != null && dx.length > 0) {
+ dxHandles = new long[dx.length];
+ dxIndices = new int[dx.length];
+
+ for (int i = 0; i < dx.length; ++i) {
+ dxHandles[i] = dx[i].op().getUnsafeNativeHandle();
+ dxIndices[i] = dx[i].index();
+ }
+ }
+ // Gradient outputs are returned in two continuous arrays concatenated into one. The first holds the native handles
+ // of the gradient operations while the second holds the index of their output
+ // e.g. given xHandles = [x0Handle, x1Handle, ...] and xIndices = [x0Index, x1Index, ..], we obtain
+ // dy = [dy0Handle, dy1Handle, ..., dy0Index, dy1Index, ...]
+ long[] dyHandlesAndIndices =
+ addGradients(ref.nativeHandle(), yHandles, yIndices, xHandles, xIndices, dxHandles, dxIndices);
+ int ndy = dyHandlesAndIndices.length >> 1;
+ if (ndy != dy.length) {
+ throw new IllegalStateException(String.valueOf(ndy) + " gradients were added to the graph when " + dy.length
+ + " were expected");
+ }
+ for (int i = 0, j = ndy; i < ndy; ++i, ++j) {
+ Operation op = new Operation(this, dyHandlesAndIndices[i]);
+ dy[i] = new Output<>(op, (int) dyHandlesAndIndices[j]);
+ }
+ }
+ return dy;
+ }
+
+ /**
+ * Adds operations to compute the partial derivatives of sum of {@code y}s w.r.t {@code x}s,
+ * i.e., {@code dy/dx_1, dy/dx_2...}
+ * <p>
+ * This is a simplified version of {@link #addGradients(Output[], Output[], Output[]) where {@code y} is
+ * a single output and {@code dx} is null.
+ *
+ * @param y output of the function to derive
+ * @param x inputs of the function for which partial derivatives are computed
+ * @return the partial derivatives {@code dy} with the size of {@code x}
+ */
+ public Output<?>[] addGradients(Output<?> y, Output<?>[] x) {
+ return addGradients(new Output<?>[]{y}, x, null);
+ }
+
private final Object nativeHandleLock = new Object();
private long nativeHandle;
private int refcount = 0;
@@ -254,6 +330,9 @@ public final class Graph implements AutoCloseable {
private static native byte[] toGraphDef(long handle);
+ private static native long[] addGradients(long handle, long[] inputHandles, int[] inputIndices,
+ long[] outputHandles, int[] outputIndices, long[] gradInputHandles, int[] gradInputIndices);
+
static {
TensorFlow.init();
}
diff --git a/tensorflow/java/src/main/java/org/tensorflow/op/core/Gradients.java b/tensorflow/java/src/main/java/org/tensorflow/op/core/Gradients.java
new file mode 100644
index 0000000000..f4671c8af9
--- /dev/null
+++ b/tensorflow/java/src/main/java/org/tensorflow/op/core/Gradients.java
@@ -0,0 +1,153 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+package org.tensorflow.op.core;
+
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+
+import org.tensorflow.Operand;
+import org.tensorflow.Output;
+import org.tensorflow.op.Op;
+import org.tensorflow.op.Operands;
+import org.tensorflow.op.Scope;
+import org.tensorflow.op.annotation.Operator;
+
+/**
+ * Adds operations to compute the partial derivatives of sum of {@code y}s w.r.t {@code x}s,
+ * i.e., {@code d(y_1 + y_2 + ...)/dx_1, d(y_1 + y_2 + ...)/dx_2...}
+ * <p>
+ * If {@code Options.dx()} values are set, they are as the initial symbolic partial derivatives of some loss
+ * function {@code L} w.r.t. {@code y}. {@code Options.dx()} must have the size of {@code y}.
+ * <p>
+ * If {@code Options.dx()} is not set, the implementation will use dx of {@code OnesLike} for all
+ * shapes in {@code y}.
+ * <p>
+ * The partial derivatives are returned in output {@code dy}, with the size of {@code x}.
+ * <p>
+ * Example of usage:
+ * <pre>{@code
+ * Gradients gradients = Gradients.create(scope, Arrays.asList(loss), Arrays.asList(w, b));
+ *
+ * Constant<Float> alpha = ops.constant(1.0f, Float.class);
+ * ApplyGradientDescent.create(scope, w, alpha, gradients.<Float>dy(0));
+ * ApplyGradientDescent.create(scope, b, alpha, gradients.<Float>dy(1));
+ * }</pre>
+ */
+@Operator
+public class Gradients implements Op, Iterable<Operand<?>> {
+
+ /**
+ * Optional attributes for {@link Gradients}
+ */
+ public static class Options {
+
+ /**
+ * @param dx partial derivatives of some loss function {@code L} w.r.t. {@code y}
+ * @return this option builder
+ */
+ public Options dx(Iterable<Operand<?>> dx) {
+ this.dx = dx;
+ return this;
+ }
+
+ private Iterable<Operand<?>> dx;
+
+ private Options() {
+ }
+ }
+
+ /**
+ * Adds gradients computation ops to the graph according to scope.
+ *
+ * @param scope current graph scope
+ * @param y outputs of the function to derive
+ * @param x inputs of the function for which partial derivatives are computed
+ * @param options carries optional attributes values
+ * @return a new instance of {@code Gradients}
+ */
+ public static Gradients create(Scope scope, Iterable<Operand<?>> y, Iterable<Operand<?>> x, Options... options) {
+ Output<?>[] dx = null;
+ if (options != null) {
+ for (Options opts : options) {
+ if (opts.dx != null) {
+ dx = Operands.asOutputs(opts.dx);
+ }
+ }
+ }
+ Output<?>[] gradOutputs = scope.graph().addGradients(Operands.asOutputs(y), Operands.asOutputs(x), dx);
+ return new Gradients(Arrays.asList(gradOutputs));
+ }
+
+ /**
+ * Adds gradients computation ops to the graph according to scope.
+ *
+ * This is a simplified version of {@link #create(Scope, Iterable, Iterable, Options...)} where {@code y} is
+ * a single output.
+ *
+ * @param scope current graph scope
+ * @param y output of the function to derive
+ * @param x inputs of the function for which partial derivatives are computed
+ * @param options carries optional attributes values
+ * @return a new instance of {@code Gradients}
+ */
+ @SuppressWarnings({"unchecked", "rawtypes"})
+ public static Gradients create(Scope scope, Operand<?> y, Iterable<Operand<?>> x, Options... options) {
+ return create(scope, (Iterable) Arrays.asList(y), x, options);
+ }
+
+ /**
+ * @param dx partial derivatives of some loss function {@code L} w.r.t. {@code y}
+ * @return builder to add more options to this operation
+ */
+ public Options dx(Iterable<Operand<?>> dx) {
+ return new Options().dx(dx);
+ }
+
+ @Override
+ @SuppressWarnings({"rawtypes", "unchecked"})
+ public Iterator<Operand<?>> iterator() {
+ return (Iterator) dy.iterator();
+ }
+
+ /**
+ * Partial derivatives of {@code y}s w.r.t. {@code x}s, with the size of {@code x}
+ */
+ public List<Output<?>> dy() {
+ return dy;
+ }
+
+ /**
+ * Returns a symbolic handle to one of the gradient operation output
+ * <p>
+ * Warning: Does not check that the type of the tensor matches T. It is recommended to call
+ * this method with an explicit type parameter rather than letting it be inferred, e.g. {@code
+ * gradients.<Integer>dy(0)}
+ *
+ * @param <T> The expected element type of the tensors produced by this output.
+ * @param index The index of the output among the gradients added by this operation
+ */
+ @SuppressWarnings("unchecked")
+ public <T> Output<T> dy(int index) {
+ return (Output<T>) dy.get(index);
+ }
+
+ private List<Output<?>> dy;
+
+ private Gradients(List<Output<?>> dy) {
+ this.dy = dy;
+ }
+}
diff --git a/tensorflow/java/src/main/native/graph_jni.cc b/tensorflow/java/src/main/native/graph_jni.cc
index 0fef155275..dac6a345e9 100644
--- a/tensorflow/java/src/main/native/graph_jni.cc
+++ b/tensorflow/java/src/main/native/graph_jni.cc
@@ -16,7 +16,9 @@ limitations under the License.
#include "tensorflow/java/src/main/native/graph_jni.h"
#include <limits>
+#include <memory>
#include "tensorflow/c/c_api.h"
+#include "tensorflow/java/src/main/native/utils_jni.h"
#include "tensorflow/java/src/main/native/exception_jni.h"
namespace {
@@ -130,3 +132,55 @@ Java_org_tensorflow_Graph_toGraphDef(JNIEnv* env, jclass clazz, jlong handle) {
TF_DeleteBuffer(buf);
return ret;
}
+
+JNIEXPORT jlongArray JNICALL
+Java_org_tensorflow_Graph_addGradients(JNIEnv* env, jclass clazz, jlong handle,
+ jlongArray y_handles, jintArray y_indices,
+ jlongArray x_handles, jintArray x_indices,
+ jlongArray dx_handles, jintArray dx_indices) {
+
+ TF_Graph* g = requireHandle(env, handle);
+ if (g == nullptr) return nullptr;
+
+ const jint ny = env->GetArrayLength(y_handles);
+ const jint nx = env->GetArrayLength(x_handles);
+
+ std::unique_ptr<TF_Output[]> y(new TF_Output[ny]);
+ std::unique_ptr<TF_Output[]> x(new TF_Output[nx]);
+ std::unique_ptr<TF_Output[]> dx(nullptr);
+ std::unique_ptr<TF_Output[]> dy(new TF_Output[nx]);
+
+ resolveOutputs(env, "y", y_handles, y_indices, y.get(), ny);
+ resolveOutputs(env, "x", x_handles, x_indices, x.get(), nx);
+ if (dx_handles != nullptr) {
+ if (env->GetArrayLength(dx_handles) != ny) {
+ throwException(env, kIllegalArgumentException,
+ "expected %d, got %d dx handles", ny,
+ env->GetArrayLength(dx_handles));
+ }
+ dx.reset(new TF_Output[ny]);
+ resolveOutputs(env, "dx", dx_handles, dx_indices, dx.get(), ny);
+ }
+ if (env->ExceptionCheck()) return nullptr;
+
+ TF_Status* status = TF_NewStatus();
+ TF_AddGradients(g, y.get(), ny, x.get(), nx, dx.get(), status, dy.get());
+
+ if (!throwExceptionIfNotOK(env, status)) {
+ TF_DeleteStatus(status);
+ return nullptr;
+ }
+ TF_DeleteStatus(status);
+
+ // returned array contains both op handles and output indices, in pair
+ jlongArray dy_handles_and_indices = env->NewLongArray(nx << 1);
+ jlong* dy_elems = env->GetLongArrayElements(dy_handles_and_indices, nullptr);
+ for (int i = 0, j = nx; i < nx; ++i, ++j) {
+ TF_Output dy_output = dy.get()[i];
+ dy_elems[i] = reinterpret_cast<jlong>(dy_output.oper);
+ dy_elems[j] = static_cast<jlong>(dy_output.index);
+ }
+ env->ReleaseLongArrayElements(dy_handles_and_indices, dy_elems, 0);
+
+ return dy_handles_and_indices;
+}
diff --git a/tensorflow/java/src/main/native/graph_jni.h b/tensorflow/java/src/main/native/graph_jni.h
index dd2e038332..4f87e8d5a7 100644
--- a/tensorflow/java/src/main/native/graph_jni.h
+++ b/tensorflow/java/src/main/native/graph_jni.h
@@ -73,6 +73,15 @@ JNIEXPORT jbyteArray JNICALL Java_org_tensorflow_Graph_toGraphDef(JNIEnv *,
jclass,
jlong);
+/*
+ * Class: org_tensorflow_Graph
+ * Method: name
+ * Signature: (J[J[I[J[I[J[I)[J
+ */
+JNIEXPORT jlongArray JNICALL Java_org_tensorflow_Graph_addGradients(JNIEnv *,
+ jclass, jlong, jlongArray, jintArray, jlongArray, jintArray, jlongArray,
+ jintArray);
+
#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus
diff --git a/tensorflow/java/src/main/native/session_jni.cc b/tensorflow/java/src/main/native/session_jni.cc
index 2cd542d3c9..cb54daf137 100644
--- a/tensorflow/java/src/main/native/session_jni.cc
+++ b/tensorflow/java/src/main/native/session_jni.cc
@@ -17,6 +17,7 @@ limitations under the License.
#include <memory>
#include "tensorflow/c/c_api.h"
+#include "tensorflow/java/src/main/native/utils_jni.h"
#include "tensorflow/java/src/main/native/exception_jni.h"
#include "tensorflow/java/src/main/native/session_jni.h"
@@ -55,37 +56,6 @@ void resolveHandles(JNIEnv* env, const char* type, jlongArray src_array,
env->ReleaseLongArrayElements(src_array, src_start, JNI_ABORT);
}
-void resolveOutputs(JNIEnv* env, const char* type, jlongArray src_op,
- jintArray src_index, TF_Output* dst, jint n) {
- if (env->ExceptionCheck()) return;
- jint len = env->GetArrayLength(src_op);
- if (len != n) {
- throwException(env, kIllegalArgumentException,
- "expected %d, got %d %s Operations", n, len, type);
- return;
- }
- len = env->GetArrayLength(src_index);
- if (len != n) {
- throwException(env, kIllegalArgumentException,
- "expected %d, got %d %s Operation output indices", n, len,
- type);
- return;
- }
- jlong* op_handles = env->GetLongArrayElements(src_op, nullptr);
- jint* indices = env->GetIntArrayElements(src_index, nullptr);
- for (int i = 0; i < n; ++i) {
- if (op_handles[i] == 0) {
- throwException(env, kNullPointerException, "invalid %s (#%d of %d)", type,
- i, n);
- break;
- }
- dst[i] = TF_Output{reinterpret_cast<TF_Operation*>(op_handles[i]),
- static_cast<int>(indices[i])};
- }
- env->ReleaseIntArrayElements(src_index, indices, JNI_ABORT);
- env->ReleaseLongArrayElements(src_op, op_handles, JNI_ABORT);
-}
-
void TF_MaybeDeleteBuffer(TF_Buffer* buf) {
if (buf == nullptr) return;
TF_DeleteBuffer(buf);
diff --git a/tensorflow/java/src/main/native/utils_jni.cc b/tensorflow/java/src/main/native/utils_jni.cc
new file mode 100644
index 0000000000..069ac05a1c
--- /dev/null
+++ b/tensorflow/java/src/main/native/utils_jni.cc
@@ -0,0 +1,53 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/java/src/main/native/utils_jni.h"
+
+#include "tensorflow/java/src/main/native/exception_jni.h"
+
+void resolveOutputs(JNIEnv* env, const char* type, jlongArray src_op,
+ jintArray src_index, TF_Output* dst, jint n) {
+ if (env->ExceptionCheck()) return;
+ jint len = env->GetArrayLength(src_op);
+ if (len != n) {
+ throwException(env, kIllegalArgumentException,
+ "expected %d, got %d %s Operations", n, len, type);
+ return;
+ }
+ len = env->GetArrayLength(src_index);
+ if (len != n) {
+ throwException(env, kIllegalArgumentException,
+ "expected %d, got %d %s Operation output indices", n, len,
+ type);
+ return;
+ }
+ jlong* op_handles = env->GetLongArrayElements(src_op, nullptr);
+ jint* indices = env->GetIntArrayElements(src_index, nullptr);
+ for (int i = 0; i < n; ++i) {
+ if (op_handles[i] == 0) {
+ throwException(env, kNullPointerException, "invalid %s (#%d of %d)", type,
+ i, n);
+ break;
+ }
+ dst[i] = TF_Output{reinterpret_cast<TF_Operation*>(op_handles[i]),
+ static_cast<int>(indices[i])};
+ }
+ env->ReleaseIntArrayElements(src_index, indices, JNI_ABORT);
+ env->ReleaseLongArrayElements(src_op, op_handles, JNI_ABORT);
+}
+
+
+
+
diff --git a/tensorflow/java/src/main/native/utils_jni.h b/tensorflow/java/src/main/native/utils_jni.h
new file mode 100644
index 0000000000..352298e7de
--- /dev/null
+++ b/tensorflow/java/src/main/native/utils_jni.h
@@ -0,0 +1,33 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_JAVA_UTILS_JNI_H_
+#define TENSORFLOW_JAVA_UTILS_JNI_H_
+
+#include <jni.h>
+
+#include "tensorflow/c/c_api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+void resolveOutputs(JNIEnv* env, const char* type, jlongArray src_op,
+ jintArray src_index, TF_Output* dst, jint n);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+#endif /* TENSORFLOW_JAVA_UTILS_JNI_H_ */
diff --git a/tensorflow/java/src/test/java/org/tensorflow/GraphTest.java b/tensorflow/java/src/test/java/org/tensorflow/GraphTest.java
index c540299bdc..c2e52c22c6 100644
--- a/tensorflow/java/src/test/java/org/tensorflow/GraphTest.java
+++ b/tensorflow/java/src/test/java/org/tensorflow/GraphTest.java
@@ -22,6 +22,7 @@ import static org.junit.Assert.assertTrue;
import java.util.HashSet;
import java.util.Iterator;
+
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@@ -129,4 +130,106 @@ public class GraphTest {
// expected exception.
}
}
+
+ @Test
+ public void addGradientsToGraph() {
+ try (Graph g = new Graph();
+ Session s = new Session(g)) {
+
+ Output<Float> x1 = TestUtil.placeholder(g, "x1", Float.class);
+ Output<Float> x2 = TestUtil.placeholder(g, "x2", Float.class);
+ Output<Float> y0 = TestUtil.square(g, "y0", x1);
+ Output<Float> y1 = TestUtil.square(g, "y1", y0);
+ Output<Float> y2 = TestUtil.addN(g, y0, x2);
+
+ Output<?>[] grads0 = g.addGradients(y1, toArray(x1));
+ assertNotNull(grads0);
+ assertEquals(1, grads0.length);
+ assertEquals(DataType.FLOAT, grads0[0].dataType());
+
+ Output<?>[] grads1 = g.addGradients(y2, toArray(x1, x2));
+ assertNotNull(grads1);
+ assertEquals(2, grads1.length);
+ assertEquals(DataType.FLOAT, grads1[0].dataType());
+ assertEquals(DataType.FLOAT, grads1[1].dataType());
+
+ try (Tensor<Float> c1 = Tensors.create(3.0f);
+ Tensor<Float> c2 = Tensors.create(2.0f);
+ TestUtil.AutoCloseableList<Tensor<?>> outputs = new TestUtil.AutoCloseableList<>(
+ s.runner()
+ .feed(x1, c1)
+ .feed(x2, c2)
+ .fetch(grads0[0])
+ .fetch(grads1[0])
+ .fetch(grads1[1])
+ .run())) {
+
+ assertEquals(3, outputs.size());
+ assertEquals(108.0f, outputs.get(0).floatValue(), 0.0f);
+ assertEquals(6.0f, outputs.get(1).floatValue(), 0.0f);
+ assertEquals(1.0f, outputs.get(2).floatValue(), 0.0f);
+ }
+ }
+ }
+
+ @Test
+ public void addGradientSumsToGraph() {
+ try (Graph g = new Graph();
+ Session s = new Session(g)) {
+
+ Output<Float> x = TestUtil.placeholder(g, "x", Float.class);
+ Output<Float> y0 = TestUtil.square(g, "y0", x);
+ Output<Float> y1 = TestUtil.square(g, "y1", y0);
+
+ Output<?>[] grad = g.addGradients(toArray(y0, y1), toArray(x), null);
+ assertNotNull(grad);
+ assertEquals(1, grad.length);
+ assertEquals(DataType.FLOAT, grad[0].dataType());
+
+ try (Tensor<Float> c = Tensors.create(3.0f);
+ Tensor<?> output = s.runner()
+ .feed(x, c)
+ .fetch(grad[0])
+ .run()
+ .get(0)) {
+
+ assertEquals(114.0f, output.floatValue(), 0.0f);
+ }
+ }
+ }
+
+ @Test
+ public void addGradientsWithInitialValuesToGraph() {
+ try (Graph g = new Graph();
+ Session s = new Session(g)) {
+
+ Output<Float> x = TestUtil.placeholder(g, "x", Float.class);
+ Output<Float> y0 = TestUtil.square(g, "y0", x);
+ Output<Float> y1 = TestUtil.square(g, "y1", y0);
+
+ Output<?>[] grad0 = g.addGradients(y1, toArray(y0));
+ assertNotNull(grad0);
+ assertEquals(1, grad0.length);
+ assertEquals(DataType.FLOAT, grad0[0].dataType());
+
+ Output<?>[] grad1 = g.addGradients(toArray(y0), toArray(x), toArray(grad0[0]));
+ assertNotNull(grad1);
+ assertEquals(1, grad1.length);
+ assertEquals(DataType.FLOAT, grad1[0].dataType());
+
+ try (Tensor<Float> c = Tensors.create(3.0f);
+ Tensor<?> output = s.runner()
+ .feed(x, c)
+ .fetch(grad1[0])
+ .run()
+ .get(0)) {
+
+ assertEquals(108.0f, output.floatValue(), 0.0f);
+ }
+ }
+ }
+
+ private static Output<?>[] toArray(Output<?>... outputs) {
+ return outputs;
+ }
}
diff --git a/tensorflow/java/src/test/java/org/tensorflow/SessionTest.java b/tensorflow/java/src/test/java/org/tensorflow/SessionTest.java
index e8cc76c2a6..7d5980bcde 100644
--- a/tensorflow/java/src/test/java/org/tensorflow/SessionTest.java
+++ b/tensorflow/java/src/test/java/org/tensorflow/SessionTest.java
@@ -20,8 +20,6 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
-import java.util.ArrayList;
-import java.util.Collection;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@@ -36,8 +34,8 @@ public class SessionTest {
Session s = new Session(g)) {
TestUtil.transpose_A_times_X(g, new int[][] {{2}, {3}});
try (Tensor<Integer> x = Tensors.create(new int[][] {{5}, {7}});
- AutoCloseableList<Tensor<?>> outputs =
- new AutoCloseableList<Tensor<?>>(s.runner().feed("X", x).fetch("Y").run())) {
+ TestUtil.AutoCloseableList<Tensor<?>> outputs =
+ new TestUtil.AutoCloseableList<Tensor<?>>(s.runner().feed("X", x).fetch("Y").run())) {
assertEquals(1, outputs.size());
final int[][] expected = {{31}};
assertArrayEquals(expected, outputs.get(0).copyTo(new int[1][1]));
@@ -53,8 +51,8 @@ public class SessionTest {
Output<Integer> feed = g.operation("X").output(0);
Output<Integer> fetch = g.operation("Y").output(0);
try (Tensor<Integer> x = Tensors.create(new int[][] {{5}, {7}});
- AutoCloseableList<Tensor<?>> outputs =
- new AutoCloseableList<Tensor<?>>(s.runner().feed(feed, x).fetch(fetch).run())) {
+ TestUtil.AutoCloseableList<Tensor<?>> outputs =
+ new TestUtil.AutoCloseableList<Tensor<?>>(s.runner().feed(feed, x).fetch(fetch).run())) {
assertEquals(1, outputs.size());
final int[][] expected = {{31}};
assertArrayEquals(expected, outputs.get(0).copyTo(new int[1][1]));
@@ -112,7 +110,7 @@ public class SessionTest {
.setOptions(fullTraceRunOptions())
.runAndFetchMetadata();
// Sanity check on outputs.
- AutoCloseableList<Tensor<?>> outputs = new AutoCloseableList<Tensor<?>>(result.outputs);
+ TestUtil.AutoCloseableList<Tensor<?>> outputs = new TestUtil.AutoCloseableList<Tensor<?>>(result.outputs);
assertEquals(1, outputs.size());
final int[][] expected = {{31}};
assertArrayEquals(expected, outputs.get(0).copyTo(new int[1][1]));
@@ -135,8 +133,8 @@ public class SessionTest {
Session s = new Session(g)) {
TestUtil.constant(g, "c1", 2718);
TestUtil.constant(g, "c2", 31415);
- AutoCloseableList<Tensor<?>> outputs =
- new AutoCloseableList<Tensor<?>>(s.runner().fetch("c2").fetch("c1").run());
+ TestUtil.AutoCloseableList<Tensor<?>> outputs =
+ new TestUtil.AutoCloseableList<Tensor<?>>(s.runner().fetch("c2").fetch("c1").run());
assertEquals(2, outputs.size());
assertEquals(31415, outputs.get(0).intValue());
assertEquals(2718, outputs.get(1).intValue());
@@ -164,28 +162,6 @@ public class SessionTest {
Session s = new Session(g, singleThreadConfigProto())) {}
}
- private static final class AutoCloseableList<E extends AutoCloseable> extends ArrayList<E>
- implements AutoCloseable {
- AutoCloseableList(Collection<? extends E> c) {
- super(c);
- }
-
- @Override
- public void close() {
- Exception toThrow = null;
- for (AutoCloseable c : this) {
- try {
- c.close();
- } catch (Exception e) {
- toThrow = e;
- }
- }
- if (toThrow != null) {
- throw new RuntimeException(toThrow);
- }
- }
- }
-
private static byte[] fullTraceRunOptions() {
// Ideally this would use the generated Java sources for protocol buffers
// and end up with something like the snippet below. However, generating
diff --git a/tensorflow/java/src/test/java/org/tensorflow/TestUtil.java b/tensorflow/java/src/test/java/org/tensorflow/TestUtil.java
index c973b5a3d8..4e84886416 100644
--- a/tensorflow/java/src/test/java/org/tensorflow/TestUtil.java
+++ b/tensorflow/java/src/test/java/org/tensorflow/TestUtil.java
@@ -16,9 +16,34 @@ limitations under the License.
package org.tensorflow;
import java.lang.reflect.Array;
+import java.util.ArrayList;
+import java.util.Collection;
/** Static utility functions. */
public class TestUtil {
+
+ public static final class AutoCloseableList<E extends AutoCloseable> extends ArrayList<E>
+ implements AutoCloseable {
+ AutoCloseableList(Collection<? extends E> c) {
+ super(c);
+ }
+
+ @Override
+ public void close() {
+ Exception toThrow = null;
+ for (AutoCloseable c : this) {
+ try {
+ c.close();
+ } catch (Exception e) {
+ toThrow = e;
+ }
+ }
+ if (toThrow != null) {
+ throw new RuntimeException(toThrow);
+ }
+ }
+ }
+
public static <T> Output<T> constant(Graph g, String name, Object value) {
try (Tensor<?> t = Tensor.create(value)) {
return g.opBuilder("Const", name)
@@ -36,7 +61,7 @@ public class TestUtil {
.<T>output(0);
}
- public static Output<?> addN(Graph g, Output<?>... inputs) {
+ public static <T> Output<T> addN(Graph g, Output<?>... inputs) {
return g.opBuilder("AddN", "AddN").addInputList(inputs).build().output(0);
}
@@ -58,6 +83,13 @@ public class TestUtil {
.setAttr("num_split", numSplit)
.build();
}
+
+ public static <T> Output<T> square(Graph g, String name, Output<T> value) {
+ return g.opBuilder("Square", name)
+ .addInput(value)
+ .build()
+ .<T>output(0);
+ }
public static void transpose_A_times_X(Graph g, int[][] a) {
Output<Integer> aa = constant(g, "A", a);
diff --git a/tensorflow/python/estimator/model_fn.py b/tensorflow/python/estimator/model_fn.py
index 009ac9d8fd..a9fd8f8e1a 100644
--- a/tensorflow/python/estimator/model_fn.py
+++ b/tensorflow/python/estimator/model_fn.py
@@ -99,7 +99,7 @@ class EstimatorSpec(
ignored in eval and infer modes. Example:
```python
- def my_model_fn(mode, features, labels):
+ def my_model_fn(features, labels, mode):
predictions = ...
loss = ...
train_op = ...
@@ -114,7 +114,7 @@ class EstimatorSpec(
given mode. Example:
```python
- def my_model_fn(mode, features, labels):
+ def my_model_fn(features, labels, mode):
if (mode == tf.estimator.ModeKeys.TRAIN or
mode == tf.estimator.ModeKeys.EVAL):
loss = ...
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 89afd1d25b..cf0b1e36fb 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -3239,8 +3239,9 @@ class Graph(object):
# the name will still appear in _names_in_use even though the name hasn't
# been used. This is ok, just leave _names_in_use as-is in this case.
# TODO(skyewm): make the C API guarantee no name conflicts.
- if ret.name not in self._names_in_use:
- self._names_in_use[ret.name] = 1
+ name_key = ret.name.lower()
+ if name_key not in self._names_in_use:
+ self._names_in_use[name_key] = 1
self._create_op_helper(ret, compute_device=compute_device)
return ret
@@ -3949,20 +3950,27 @@ class Graph(object):
"""
if self._name_stack:
name = self._name_stack + "/" + name
- i = self._names_in_use.get(name, 0)
- # Increment the number for "name".
+
+ # For the sake of checking for names in use, we treat names as case
+ # insensitive (e.g. foo = Foo).
+ name_key = name.lower()
+ i = self._names_in_use.get(name_key, 0)
+ # Increment the number for "name_key".
if mark_as_used:
- self._names_in_use[name] = i + 1
+ self._names_in_use[name_key] = i + 1
if i > 0:
- base_name = name
- # Make sure the composed name is not already used.
- while name in self._names_in_use:
- name = "%s_%d" % (base_name, i)
+ base_name_key = name_key
+ # Make sure the composed name key is not already used.
+ while name_key in self._names_in_use:
+ name_key = "%s_%d" % (base_name_key, i)
i += 1
- # Mark the composed name as used in case someone wants
+ # Mark the composed name_key as used in case someone wants
# to call unique_name("name_1").
if mark_as_used:
- self._names_in_use[name] = 1
+ self._names_in_use[name_key] = 1
+
+ # Return the new name with the original capitalization of the given name.
+ name = "%s_%d" % (name, i-1)
return name
def get_name_scope(self):
diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py
index c72406e92b..150100d771 100644
--- a/tensorflow/python/framework/ops_test.py
+++ b/tensorflow/python/framework/ops_test.py
@@ -965,6 +965,15 @@ class NameStackTest(test_util.TensorFlowTestCase):
self.assertEqual("foo_1", g.unique_name("foo"))
self.assertEqual("foo_3", g.unique_name("foo"))
+ def testUniqueNameCaseInsensitivity(self):
+ g = ops.Graph()
+ self.assertEqual("foo", g.unique_name("foo"))
+ self.assertEqual("Foo_1", g.unique_name("Foo"))
+ with g.name_scope("bar"):
+ self.assertEqual("bar/foo", g.unique_name("foo"))
+ with g.name_scope("Bar"):
+ self.assertEqual("Bar_1/foo", g.unique_name("foo"))
+
def testInvalidNameRaisesError(self):
g = ops.Graph()
with g.name_scope(""): # Should not raise
diff --git a/tensorflow/python/grappler/layout_optimizer_test.py b/tensorflow/python/grappler/layout_optimizer_test.py
index 2c9f391d01..7d07c77c79 100644
--- a/tensorflow/python/grappler/layout_optimizer_test.py
+++ b/tensorflow/python/grappler/layout_optimizer_test.py
@@ -1390,7 +1390,7 @@ class LayoutOptimizerTest(test.TestCase):
expected_num_transposes = 3
self.assertEqual(expected_num_transposes, num_transposes)
self._assert_trans_nhwc_to_nchw('map/while/Conv2D-0', nodes)
- self._assert_trans_nchw_to_nhwc('map/while/Add-0-2', nodes)
+ self._assert_trans_nchw_to_nhwc('map/while/Add_1-0-2', nodes)
self.assertAllClose(output_val_ref, output_val, atol=1e-3)
def testLoopWithVecAnd4D(self):
@@ -1414,7 +1414,7 @@ class LayoutOptimizerTest(test.TestCase):
expected_num_transposes = 2
self.assertEqual(expected_num_transposes, num_transposes)
self._assert_trans_nhwc_to_nchw('map/while/Conv2D-0', nodes)
- self._assert_trans_nchw_to_nhwc('map/while/Add-0-2', nodes)
+ self._assert_trans_nchw_to_nhwc('map/while/Add_1-0-2', nodes)
self.assertAllClose(output_val_ref, output_val, atol=1e-3)
def testBinaryOpSecondPort(self):
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 5796c874f9..8a6614c837 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -893,6 +893,7 @@ tf_py_test(
"//third_party/py/numpy",
"//tensorflow/python:client_testlib",
"//tensorflow/python:framework",
+ "//tensorflow/python:sparse_grad",
"//tensorflow/python:sparse_ops",
],
)
diff --git a/tensorflow/python/kernel_tests/init_ops_test.py b/tensorflow/python/kernel_tests/init_ops_test.py
index 795aa67248..927ca012ae 100644
--- a/tensorflow/python/kernel_tests/init_ops_test.py
+++ b/tensorflow/python/kernel_tests/init_ops_test.py
@@ -364,14 +364,52 @@ class UniformUnitScalingInitializationTest(test.TestCase):
class VarianceScalingInitializationTest(test.TestCase):
+ def testTruncatedNormalDistribution(self):
+ shape = [100, 100]
+ expect_mean = 0.
+ expect_var = 1. / shape[0]
+ init = init_ops.variance_scaling_initializer(
+ distribution='truncated_normal')
+
+ with self.test_session(use_gpu=True), \
+ test.mock.patch.object(
+ random_ops, 'truncated_normal', wraps=random_ops.truncated_normal) \
+ as mock_truncated_normal:
+ x = init(shape).eval()
+ self.assertTrue(mock_truncated_normal.called)
+
+ self.assertNear(np.mean(x), expect_mean, err=1e-2)
+ self.assertNear(np.var(x), expect_var, err=1e-2)
+
def testNormalDistribution(self):
shape = [100, 100]
expect_mean = 0.
expect_var = 1. / shape[0]
init = init_ops.variance_scaling_initializer(distribution='normal')
- with self.test_session(use_gpu=True):
+ with self.test_session(use_gpu=True), \
+ test.mock.patch.object(
+ random_ops, 'truncated_normal', wraps=random_ops.truncated_normal) \
+ as mock_truncated_normal:
+ x = init(shape).eval()
+ self.assertTrue(mock_truncated_normal.called)
+
+ self.assertNear(np.mean(x), expect_mean, err=1e-2)
+ self.assertNear(np.var(x), expect_var, err=1e-2)
+
+ def testUntruncatedNormalDistribution(self):
+ shape = [100, 100]
+ expect_mean = 0.
+ expect_var = 1. / shape[0]
+ init = init_ops.variance_scaling_initializer(
+ distribution='untruncated_normal')
+
+ with self.test_session(use_gpu=True), \
+ test.mock.patch.object(
+ random_ops, 'random_normal', wraps=random_ops.random_normal) \
+ as mock_random_normal:
x = init(shape).eval()
+ self.assertTrue(mock_random_normal.called)
self.assertNear(np.mean(x), expect_mean, err=1e-2)
self.assertNear(np.var(x), expect_var, err=1e-2)
diff --git a/tensorflow/python/kernel_tests/shape_ops_test.py b/tensorflow/python/kernel_tests/shape_ops_test.py
index 7368251ab6..34e34d9d1b 100644
--- a/tensorflow/python/kernel_tests/shape_ops_test.py
+++ b/tensorflow/python/kernel_tests/shape_ops_test.py
@@ -642,6 +642,29 @@ class TileTest(test.TestCase):
err = gradient_checker.compute_gradient_error(a, [4, 2], tiled, [4, 4])
self.assertLess(err, 1e-3)
+ def testGradientWithSparseGradWithRank1(self):
+ inputs = constant_op.constant([1.0, 2.0, 3.0, 4.0],
+ dtype=dtypes.float32)
+ outputs = array_ops.gather(array_ops.tile(inputs, [3]),
+ [1, 5, 9, 3, 7, 2, 2, 2])
+ with self.test_session():
+ error = gradient_checker.compute_gradient_error(
+ inputs, inputs.get_shape().as_list(),
+ outputs, outputs.get_shape().as_list())
+ self.assertLess(error, 1e-4)
+
+ def testGradientWithSparseGradWithRank3(self):
+ inputs = constant_op.constant([1.0, 2.0, 3.0, 4.0],
+ dtype=dtypes.float32)
+ inputs = array_ops.reshape(inputs, [-1, 1, 1])
+ outputs = array_ops.gather(array_ops.tile(inputs, [3, 4, 2]),
+ [1, 5, 9, 3, 7, 2, 2, 2])
+ with self.test_session():
+ error = gradient_checker.compute_gradient_error(
+ inputs, inputs.get_shape().as_list(),
+ outputs, outputs.get_shape().as_list())
+ self.assertLess(error, 1e-4)
+
def testShapeFunctionEdgeCases(self):
# Unknown multiples shape.
inp = constant_op.constant(0.0, shape=[4, 4, 4, 4])
diff --git a/tensorflow/python/kernel_tests/sparse_slice_op_test.py b/tensorflow/python/kernel_tests/sparse_slice_op_test.py
index da116601f8..97f30daf4a 100644
--- a/tensorflow/python/kernel_tests/sparse_slice_op_test.py
+++ b/tensorflow/python/kernel_tests/sparse_slice_op_test.py
@@ -21,13 +21,15 @@ from __future__ import print_function
import numpy as np
from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import gradient_checker
from tensorflow.python.ops import sparse_ops
+import tensorflow.python.ops.sparse_grad # pylint: disable=unused-import
from tensorflow.python.platform import test
class SparseSliceOpTest(test.TestCase):
- def _SparseTensor_4x6(self):
+ def _SparseTensor_4x6(self, val_dtype=np.int64):
# [0 | |2 | |4 |5 ]
# [ |11| |13|14| ]
# [20| | |23| |25]
@@ -37,7 +39,7 @@ class SparseSliceOpTest(test.TestCase):
[2, 3], [2, 5], [3, 0], [3, 2], [3, 3], [3, 5]]).astype(
np.int64)
val = np.array([0, 2, 4, 5, 11, 13, 14, 20, 23, 25, 30, 32, 33, 35]).astype(
- np.int64)
+ val_dtype)
shape = np.array([4, 6]).astype(np.int64)
return sparse_tensor.SparseTensor(ind, val, shape)
@@ -244,6 +246,22 @@ class SparseSliceOpTest(test.TestCase):
self.assertAllEqual(sparse_tensor5.values.eval(), [5, 25, 35])
self.assertAllEqual(sparse_tensor5.dense_shape.eval(), [4, 1])
+ def testGradients(self):
+ sp_input = self._SparseTensor_4x6(val_dtype=np.float32)
+ start_and_size = [([0, 0], [4, 2]),
+ ([0, 2], [5, 2]),
+ ([0, 4], [5, 3])]
+
+ with self.test_session(use_gpu=False):
+ for start, size in start_and_size:
+ sp_output = sparse_ops.sparse_slice(sp_input, start, size)
+ nnz_in = len(sp_input.values.eval())
+ nnz_out = len(sp_output.values.eval())
+
+ err = gradient_checker.compute_gradient_error(
+ [sp_input.values], [(nnz_in,)], sp_output.values, (nnz_out,))
+ self.assertLess(err, 1e-3)
+
if __name__ == '__main__':
test.main()
diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py
index 3678bd4c1f..fe459a96b9 100644
--- a/tensorflow/python/ops/array_grad.py
+++ b/tensorflow/python/ops/array_grad.py
@@ -568,7 +568,6 @@ ops.NotDifferentiable("Size")
@ops.RegisterGradient("Tile")
def _TileGrad(op, grad):
"""Sum reduces grad along the tiled dimensions."""
- assert isinstance(grad, ops.Tensor)
input_shape = array_ops.shape(op.inputs[0])
# We interleave multiples and input_shape to get split_shape,
# reshape grad to split_shape, and reduce along all even
@@ -581,6 +580,13 @@ def _TileGrad(op, grad):
split_shape = array_ops.reshape(
array_ops.transpose(array_ops.stack([op.inputs[1], input_shape])), [-1])
axes = math_ops.range(0, array_ops.size(split_shape), 2)
+ # Sum reduces grad along the first dimension for IndexedSlices
+ if isinstance(grad, ops.IndexedSlices):
+ grad = math_ops.unsorted_segment_sum(
+ grad.values,
+ math_ops.mod(grad.indices, input_shape[0]),
+ input_shape[0])
+ split_shape = array_ops.concat([[1], split_shape[1:]], axis=0)
input_grad = math_ops.reduce_sum(array_ops.reshape(grad, split_shape), axes)
# Fix shape inference
if not context.executing_eagerly():
diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index c8442b42d5..fc37805c79 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -3135,6 +3135,7 @@ def while_loop(cond,
happen is that the thread updating `x` can never get ahead of the
counter thread because the thread incrementing `x` depends on the value
of the counter.
+
```python
import tensorflow as tf
diff --git a/tensorflow/python/ops/init_ops.py b/tensorflow/python/ops/init_ops.py
index c41e952167..5bfc5ce2a7 100644
--- a/tensorflow/python/ops/init_ops.py
+++ b/tensorflow/python/ops/init_ops.py
@@ -43,7 +43,8 @@ from tensorflow.python.ops import linalg_ops_impl
from tensorflow.python.ops import gen_linalg_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import random_ops
-from tensorflow.python.util.deprecation import deprecated
+from tensorflow.python.util.deprecation import (
+ deprecated, deprecated_arg_values)
from tensorflow.python.util.tf_export import tf_export
@@ -409,8 +410,10 @@ class UniformUnitScaling(Initializer):
class VarianceScaling(Initializer):
"""Initializer capable of adapting its scale to the shape of weights tensors.
- With `distribution="normal"`, samples are drawn from a truncated normal
- distribution centered on zero, with `stddev = sqrt(scale / n)`
+ With `distribution="truncated_normal" or "untruncated_normal"`,
+ samples are drawn from a truncated/untruncated normal
+ distribution with a mean of zero and a standard deviation (after truncation,
+ if used) `stddev = sqrt(scale / n)`
where n is:
- number of input units in the weight tensor, if mode = "fan_in"
- number of output units, if mode = "fan_out"
@@ -433,10 +436,14 @@ class VarianceScaling(Initializer):
"distribution" arguments.
"""
+ @deprecated_arg_values(
+ None,
+ "`normal` is a deprecated alias for `truncated_normal`",
+ distribution="normal")
def __init__(self,
scale=1.0,
mode="fan_in",
- distribution="normal",
+ distribution="truncated_normal",
seed=None,
dtype=dtypes.float32):
if scale <= 0.:
@@ -444,7 +451,8 @@ class VarianceScaling(Initializer):
if mode not in {"fan_in", "fan_out", "fan_avg"}:
raise ValueError("Invalid `mode` argument:", mode)
distribution = distribution.lower()
- if distribution not in {"normal", "uniform"}:
+ if distribution not in {"normal", "uniform",
+ "truncated_normal", "untruncated_normal"}:
raise ValueError("Invalid `distribution` argument:", distribution)
self.scale = scale
self.mode = mode
@@ -466,11 +474,15 @@ class VarianceScaling(Initializer):
scale /= max(1., fan_out)
else:
scale /= max(1., (fan_in + fan_out) / 2.)
- if self.distribution == "normal":
+ if self.distribution == "normal" or self.distribution == "truncated_normal":
# constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.)
stddev = math.sqrt(scale) / .87962566103423978
return random_ops.truncated_normal(
shape, 0.0, stddev, dtype, seed=self.seed)
+ elif self.distribution == "untruncated_normal":
+ stddev = math.sqrt(scale)
+ return random_ops.random_normal(
+ shape, 0.0, stddev, dtype, seed=self.seed)
else:
limit = math.sqrt(3.0 * scale)
return random_ops.random_uniform(
diff --git a/tensorflow/python/ops/losses/losses_impl.py b/tensorflow/python/ops/losses/losses_impl.py
index 9ba91772f5..66633c8b12 100644
--- a/tensorflow/python/ops/losses/losses_impl.py
+++ b/tensorflow/python/ops/losses/losses_impl.py
@@ -878,7 +878,8 @@ def sparse_softmax_cross_entropy(
exception when this op is run on CPU, and return `NaN` for corresponding
loss and gradient rows on GPU.
logits: Unscaled log probabilities of shape
- `[d_0, d_1, ..., d_{r-1}, num_classes]` and dtype `float32` or `float64`.
+ `[d_0, d_1, ..., d_{r-1}, num_classes]` and dtype `float16`, `float32` or
+ `float64`.
weights: Coefficients for the loss. This must be scalar or broadcastable to
`labels` (i.e. same rank and each dimension is either 1 or the same).
scope: the scope for the operations performed in computing the loss.
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index 5a3b669c28..41d54a6c2f 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -2009,7 +2009,8 @@ def sparse_softmax_cross_entropy_with_logits(
exception when this op is run on CPU, and return `NaN` for corresponding
loss and gradient rows on GPU.
logits: Unscaled log probabilities of shape
- `[d_0, d_1, ..., d_{r-1}, num_classes]` and dtype `float32` or `float64`.
+ `[d_0, d_1, ..., d_{r-1}, num_classes]` and dtype `float16`, `float32`, or
+ `float64`.
name: A name for the operation (optional).
Returns:
diff --git a/tensorflow/python/ops/sparse_grad.py b/tensorflow/python/ops/sparse_grad.py
index 97353d6c74..1223b290ff 100644
--- a/tensorflow/python/ops/sparse_grad.py
+++ b/tensorflow/python/ops/sparse_grad.py
@@ -116,6 +116,35 @@ def _SparseReduceSumGrad(op, out_grad):
None, None)
+@ops.RegisterGradient("SparseSlice")
+def _SparseSliceGrad(op, *grads):
+ """The backward operator for the SparseSlice op.
+
+ This op takes in the upstream gradient w.r.t. non-empty values of
+ the sliced `SparseTensor`, and outputs the gradients w.r.t.
+ the non-empty values of input `SparseTensor`.
+
+ Args:
+ op: the SparseSlice op
+ *grads: the incoming gradients, one element per output of `op`
+
+ Returns:
+ Gradient for each of the 5 input tensors of SparseSlice:
+ (indices, values, shape, start, size)
+ The gradients for the indices, shape, start and the size are None.
+ """
+ backprop_val_grad = grads[1]
+ input_indices = op.inputs[0]
+ input_start = op.inputs[3]
+ output_indices = op.outputs[0]
+
+ val_grad = gen_sparse_ops.sparse_slice_grad(
+ backprop_val_grad, input_indices, input_start, output_indices)
+ val_grad.set_shape(op.inputs[1].get_shape())
+ # (indices, values, shape, start, size)
+ return (None, val_grad, None, None, None)
+
+
@ops.RegisterGradient("SparseTensorDenseMatMul")
def _SparseTensorDenseMatMulGrad(op, grad):
"""Gradients for the dense tensor in the SparseTensorDenseMatMul op.
diff --git a/tensorflow/stream_executor/BUILD b/tensorflow/stream_executor/BUILD
index 21295abed1..e742f8e8d5 100644
--- a/tensorflow/stream_executor/BUILD
+++ b/tensorflow/stream_executor/BUILD
@@ -2,6 +2,7 @@ licenses(["restricted"])
load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda_is_configured")
load("//tensorflow/core:platform/default/build_config_root.bzl", "if_static")
+load("//tensorflow:tensorflow.bzl", "cc_header_only_library")
STREAM_EXECUTOR_HEADERS = glob([
"*.h",
@@ -51,6 +52,14 @@ cc_library(
] + if_static([":stream_executor_impl"]),
)
+cc_header_only_library(
+ name = "stream_executor_headers_lib",
+ visibility = ["//visibility:public"],
+ deps = [
+ ":stream_executor",
+ ],
+)
+
cc_library(
name = "cuda_platform",
srcs = if_cuda_is_configured(
diff --git a/tensorflow/tools/api/generator/create_python_api.py b/tensorflow/tools/api/generator/create_python_api.py
index 671b7e387e..48d7dcd09e 100644
--- a/tensorflow/tools/api/generator/create_python_api.py
+++ b/tensorflow/tools/api/generator/create_python_api.py
@@ -180,7 +180,7 @@ def get_api_init_text(package, api_name):
for module in list(sys.modules.values()):
# Only look at tensorflow modules.
if (not module or not hasattr(module, '__name__') or
- package not in module.__name__):
+ module.__name__ is None or package not in module.__name__):
continue
# Do not generate __init__.py files for contrib modules for now.
if '.contrib.' in module.__name__ or module.__name__.endswith('.contrib'):
diff --git a/tensorflow/tools/api/golden/tensorflow.initializers.variance_scaling.pbtxt b/tensorflow/tools/api/golden/tensorflow.initializers.variance_scaling.pbtxt
index a6b6e5eceb..86340913e2 100644
--- a/tensorflow/tools/api/golden/tensorflow.initializers.variance_scaling.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.initializers.variance_scaling.pbtxt
@@ -5,7 +5,7 @@ tf_class {
is_instance: "<type \'object\'>"
member_method {
name: "__init__"
- argspec: "args=[\'self\', \'scale\', \'mode\', \'distribution\', \'seed\', \'dtype\'], varargs=None, keywords=None, defaults=[\'1.0\', \'fan_in\', \'normal\', \'None\', \"<dtype: \'float32\'>\"], "
+ argspec: "args=[\'self\', \'scale\', \'mode\', \'distribution\', \'seed\', \'dtype\'], varargs=None, keywords=None, defaults=[\'1.0\', \'fan_in\', \'truncated_normal\', \'None\', \"<dtype: \'float32\'>\"], "
}
member_method {
name: "from_config"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.initializers.-variance-scaling.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.initializers.-variance-scaling.pbtxt
index 32a6f6ee88..03f4064b9e 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.initializers.-variance-scaling.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.initializers.-variance-scaling.pbtxt
@@ -5,7 +5,7 @@ tf_class {
is_instance: "<type \'object\'>"
member_method {
name: "__init__"
- argspec: "args=[\'self\', \'scale\', \'mode\', \'distribution\', \'seed\', \'dtype\'], varargs=None, keywords=None, defaults=[\'1.0\', \'fan_in\', \'normal\', \'None\', \"<dtype: \'float32\'>\"], "
+ argspec: "args=[\'self\', \'scale\', \'mode\', \'distribution\', \'seed\', \'dtype\'], varargs=None, keywords=None, defaults=[\'1.0\', \'fan_in\', \'truncated_normal\', \'None\', \"<dtype: \'float32\'>\"], "
}
member_method {
name: "from_config"
diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt
index 5470164a5b..9ec20f0955 100644
--- a/tensorflow/tools/api/golden/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.pbtxt
@@ -817,6 +817,10 @@ tf_module {
argspec: "args=[\'shape_x\', \'shape_y\'], varargs=None, keywords=None, defaults=None"
}
member_method {
+ name: "broadcast_to"
+ argspec: "args=[\'input\', \'shape\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+ }
+ member_method {
name: "case"
argspec: "args=[\'pred_fn_pairs\', \'default\', \'exclusive\', \'strict\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'False\', \'case\'], "
}
diff --git a/tensorflow/tools/api/golden/tensorflow.variance_scaling_initializer.pbtxt b/tensorflow/tools/api/golden/tensorflow.variance_scaling_initializer.pbtxt
index a58398d645..09d7bc03b4 100644
--- a/tensorflow/tools/api/golden/tensorflow.variance_scaling_initializer.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.variance_scaling_initializer.pbtxt
@@ -5,7 +5,7 @@ tf_class {
is_instance: "<type \'object\'>"
member_method {
name: "__init__"
- argspec: "args=[\'self\', \'scale\', \'mode\', \'distribution\', \'seed\', \'dtype\'], varargs=None, keywords=None, defaults=[\'1.0\', \'fan_in\', \'normal\', \'None\', \"<dtype: \'float32\'>\"], "
+ argspec: "args=[\'self\', \'scale\', \'mode\', \'distribution\', \'seed\', \'dtype\'], varargs=None, keywords=None, defaults=[\'1.0\', \'fan_in\', \'truncated_normal\', \'None\', \"<dtype: \'float32\'>\"], "
}
member_method {
name: "from_config"
diff --git a/tensorflow/tools/ci_build/Dockerfile.cpu.ppc64le b/tensorflow/tools/ci_build/Dockerfile.cpu.ppc64le
index f496ac59b6..e879c34bbd 100644
--- a/tensorflow/tools/ci_build/Dockerfile.cpu.ppc64le
+++ b/tensorflow/tools/ci_build/Dockerfile.cpu.ppc64le
@@ -8,6 +8,7 @@ RUN /install/install_bootstrap_deb_packages.sh
RUN add-apt-repository -y ppa:openjdk-r/ppa
RUN /install/install_deb_packages.sh
RUN apt-get update && apt-get install -y libopenblas-dev
+RUN /install/install_hdf5_ppc64le.sh
RUN /install/install_pip_packages.sh
RUN /install/install_bazel_from_source.sh
RUN /install/install_proto3.sh
diff --git a/tensorflow/tools/ci_build/Dockerfile.gpu.ppc64le b/tensorflow/tools/ci_build/Dockerfile.gpu.ppc64le
index 3eddc56550..8967138747 100644
--- a/tensorflow/tools/ci_build/Dockerfile.gpu.ppc64le
+++ b/tensorflow/tools/ci_build/Dockerfile.gpu.ppc64le
@@ -14,6 +14,7 @@ RUN /install/install_bootstrap_deb_packages.sh
RUN add-apt-repository -y ppa:openjdk-r/ppa
RUN /install/install_deb_packages.sh
RUN apt-get update && apt-get install -y libopenblas-dev
+RUN /install/install_hdf5_ppc64le.sh
RUN /install/install_pip_packages.sh
RUN /install/install_bazel_from_source.sh
RUN /install/install_golang_ppc64le.sh
diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.cpu b/tensorflow/tools/ci_build/Dockerfile.rbe.cpu
index 3bc52b9ed6..7e5860aeec 100644
--- a/tensorflow/tools/ci_build/Dockerfile.rbe.cpu
+++ b/tensorflow/tools/ci_build/Dockerfile.rbe.cpu
@@ -1,4 +1,4 @@
-FROM launcher.gcr.io/google/rbe-debian8:r327695
+FROM launcher.gcr.io/google/rbe-ubuntu16-04:r327695
LABEL maintainer="Yu Yi <yiyu@google.com>"
# Copy install scripts
@@ -9,6 +9,6 @@ ENV CC /usr/local/bin/clang
ENV CXX /usr/local/bin/clang++
ENV AR /usr/bin/ar
-# Run pip install script for RBE Debian8 container.
+# Run pip install script for RBE Ubuntu 16-04 container.
RUN /install/install_pip_packages_remote.sh
RUN /install/install_pip_packages.sh
diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh
index b56b9308b3..08e2c3edd2 100755
--- a/tensorflow/tools/ci_build/ci_parameterized_build.sh
+++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh
@@ -59,6 +59,9 @@
# TF_BUILD_BAZEL_CLEAN:
# Will perform "bazel clean", if and only if this variable
# is set to any non-empty and non-0 value
+# TF_BAZEL_BUILD_ONLY:
+# If it is set to any non-empty value that is not "0", Bazel
+# will only build specified targets
# TF_GPU_COUNT:
# Run this many parallel tests for serial builds.
# For now, only can be edited for PIP builds.
@@ -410,6 +413,11 @@ fi
# this flag, and it only affects a few tests.
EXTRA_ARGS="${EXTRA_ARGS} --distinct_host_configuration=false"
+if [[ ! -z "${TF_BAZEL_BUILD_ONLY}" ]] &&
+ [[ "${TF_BAZEL_BUILD_ONLY}" != "0" ]];then
+ BAZEL_CMD=${BAZEL_BUILD_ONLY_CMD}
+fi
+
# Process PIP install-test option
if [[ ${TF_BUILD_IS_PIP} == "no_pip" ]] ||
[[ ${TF_BUILD_IS_PIP} == "both" ]]; then
diff --git a/tensorflow/tools/ci_build/install/install_hdf5_ppc64le.sh b/tensorflow/tools/ci_build/install/install_hdf5_ppc64le.sh
new file mode 100755
index 0000000000..4989d986b8
--- /dev/null
+++ b/tensorflow/tools/ci_build/install/install_hdf5_ppc64le.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+
+#This is required because pypi doesn't have a pre-built h5py binary for ppc64le
+#It has to be compiled from source during the install
+apt-get update
+apt-get install -y libhdf5-dev
+
+#h5py is not expecting the shared libraries to have _serial in the name.
+ln -s /usr/lib/powerpc64le-linux-gnu/libhdf5_serial.so /usr/lib/powerpc64le-linux-gnu/libhdf5.so
+ln -s /usr/lib/powerpc64le-linux-gnu/libhdf5_serial_hl.so /usr/lib/powerpc64le-linux-gnu/libhdf5_hl.so
+
+#pip is not installed yet, so use easy_install
+#CPATH is the location of hdf5.h
+CPATH=/usr/include/hdf5/serial/ easy_install -U h5py
+CPATH=/usr/include/hdf5/serial/ easy_install3 -U h5py
diff --git a/tensorflow/tools/ci_build/linux/mkl/build-dev-container.sh b/tensorflow/tools/ci_build/linux/mkl/build-dev-container.sh
new file mode 100755
index 0000000000..ad22ebe4eb
--- /dev/null
+++ b/tensorflow/tools/ci_build/linux/mkl/build-dev-container.sh
@@ -0,0 +1,53 @@
+#!/usr/bin/env bash
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# Build a whl and container with Intel(R) MKL support
+# Usage: build-dev-container.sh
+
+# Helper function to traverse directories up until given file is found.
+function upsearch () {
+ test / == "$PWD" && return || \
+ test -e "$1" && echo "$PWD" && return || \
+ cd .. && upsearch "$1"
+}
+
+# Set up WORKSPACE.
+WORKSPACE="${WORKSPACE:-$(upsearch WORKSPACE)}"
+
+TF_DOCKER_BUILD_DEVEL_BRANCH=${TF_DOCKER_BUILD_DEVEL_BRANCH:-master}
+TF_DOCKER_BUILD_IMAGE_NAME=${TF_DOCKER_BUILD_IMAGE_NAME:-intel-mkl/tensorflow}
+TF_DOCKER_BUILD_VERSION=${TF_DOCKER_BUILD_VERSION:-nightly}
+
+echo "TF_DOCKER_BUILD_DEVEL_BRANCH=${TF_DOCKER_BUILD_DEVEL_BRANCH}"
+echo "TF_DOCKER_BUILD_IMAGE_NAME=${TF_DOCKER_BUILD_IMAGE_NAME}"
+echo "TF_DOCKER_BUILD_VERSION=${TF_DOCKER_BUILD_VERSION}"
+
+# build the python 2 container and whl
+TF_DOCKER_BUILD_TYPE="MKL" \
+ TF_DOCKER_BUILD_IS_DEVEL="YES" \
+ TF_DOCKER_BUILD_DEVEL_BRANCH="${TF_DOCKER_BUILD_DEVEL_BRANCH}" \
+ TF_DOCKER_BUILD_IMAGE_NAME="${TF_DOCKER_BUILD_IMAGE_NAME}" \
+ TF_DOCKER_BUILD_VERSION="${TF_DOCKER_BUILD_VERSION}" \
+ ${WORKSPACE}/tensorflow/tools/docker/parameterized_docker_build.sh
+
+# build the python 3 container and whl
+TF_DOCKER_BUILD_TYPE="MKL" \
+ TF_DOCKER_BUILD_IS_DEVEL="YES" \
+ TF_DOCKER_BUILD_DEVEL_BRANCH="${TF_DOCKER_BUILD_DEVEL_BRANCH}" \
+ TF_DOCKER_BUILD_IMAGE_NAME="${TF_DOCKER_BUILD_IMAGE_NAME}" \
+ TF_DOCKER_BUILD_VERSION="${TF_DOCKER_BUILD_VERSION}" \
+ TF_DOCKER_BUILD_PYTHON_VERSION="PYTHON3" \
+ ${WORKSPACE}/tensorflow/tools/docker/parameterized_docker_build.sh
+
diff --git a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
index b8bce57c87..3d27e84b81 100755
--- a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
+++ b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
@@ -65,6 +65,10 @@ OPENBLAS_SRC_PATH=/tmp/openblas_src/
sudo rm -rf ${OPENBLAS_SRC_PATH}
git clone https://github.com/xianyi/OpenBLAS ${OPENBLAS_SRC_PATH}
cd ${OPENBLAS_SRC_PATH}
+# The commit after this introduced Fortran compile issues. In theory they should
+# be solvable using NOFORTRAN=1 on the make command, but my initial tries didn't
+# work, so pinning to the last know good version.
+git checkout 5a6a2bed9aff0ba8a18651d5514d029c8cae336a
# If this path is changed, you'll also need to update
# cxx_builtin_include_directory in third_party/toolchains/cpus/arm/CROSSTOOL.tpl
OPENBLAS_INSTALL_PATH=/tmp/openblas_install/
diff --git a/tensorflow/tools/ci_build/update_version.py b/tensorflow/tools/ci_build/update_version.py
index 00bfcfd49b..642dde36a7 100755
--- a/tensorflow/tools/ci_build/update_version.py
+++ b/tensorflow/tools/ci_build/update_version.py
@@ -37,7 +37,7 @@ SETUP_PY = "%s/tools/pip_package/setup.py" % TF_SRC_DIR
README_MD = "./README.md"
DEVEL_DOCKERFILE = "%s/tools/docker/Dockerfile.devel" % TF_SRC_DIR
GPU_DEVEL_DOCKERFILE = "%s/tools/docker/Dockerfile.devel-gpu" % TF_SRC_DIR
-CPU_MKL_DEVEL_DOCKERFILE = "%s/tools/docker/Dockerfile.devel-cpu-mkl" % TF_SRC_DIR
+CPU_MKL_DEVEL_DOCKERFILE = "%s/tools/docker/Dockerfile.devel-mkl" % TF_SRC_DIR
RELEVANT_FILES = [TF_SRC_DIR,
VERSION_H,
SETUP_PY,
diff --git a/tensorflow/tools/docker/Dockerfile.devel-mkl b/tensorflow/tools/docker/Dockerfile.devel-mkl
new file mode 100755
index 0000000000..6dca0e393f
--- /dev/null
+++ b/tensorflow/tools/docker/Dockerfile.devel-mkl
@@ -0,0 +1,128 @@
+FROM ubuntu:16.04
+
+LABEL maintainer="Clayne Robison <clayne.b.robison@intel.com>"
+
+# These parameters can be overridden by parameterized_docker_build.sh
+ARG TF_BUILD_VERSION=r1.9
+ARG PYTHON="python"
+ARG PYTHON3_DEV=""
+ARG WHL_DIR="/tmp/pip"
+ARG PIP="pip"
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ build-essential \
+ curl \
+ git \
+ libcurl3-dev \
+ libfreetype6-dev \
+ libhdf5-serial-dev \
+ libpng12-dev \
+ libzmq3-dev \
+ pkg-config \
+ python-dev \
+ ${PYTHON3_DEV} \
+ rsync \
+ software-properties-common \
+ unzip \
+ zip \
+ zlib1g-dev \
+ openjdk-8-jdk \
+ openjdk-8-jre-headless \
+ && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists/*
+
+RUN curl -fSsL -O https://bootstrap.pypa.io/get-pip.py && \
+ ${PYTHON} get-pip.py && \
+ rm get-pip.py
+
+RUN ${PIP} --no-cache-dir install \
+ Pillow \
+ h5py \
+ ipykernel \
+ jupyter \
+ matplotlib \
+ mock \
+ numpy \
+ scipy \
+ sklearn \
+ pandas \
+ && \
+ ${PYTHON} -m ipykernel.kernelspec
+
+RUN if [ "${PYTHON}" = "python3" ]; then \
+ ln -s -f /usr/bin/python3 /usr/bin/python; \
+ fi
+
+# Set up our notebook config.
+COPY jupyter_notebook_config.py /root/.jupyter/
+
+# Jupyter has issues with being run directly:
+# https://github.com/ipython/ipython/issues/7062
+# We just add a little wrapper script.
+COPY run_jupyter.sh /
+
+# Set up Bazel.
+
+# Running bazel inside a `docker build` command causes trouble, cf:
+# https://github.com/bazelbuild/bazel/issues/134
+# The easiest solution is to set up a bazelrc file forcing --batch.
+RUN echo "startup --batch" >>/etc/bazel.bazelrc
+# Similarly, we need to workaround sandboxing issues:
+# https://github.com/bazelbuild/bazel/issues/418
+RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
+ >>/etc/bazel.bazelrc
+# Install the most recent bazel release.
+ENV BAZEL_VERSION 0.11.0
+WORKDIR /
+RUN mkdir /bazel && \
+ cd /bazel && \
+ curl -H "User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36" -fSsL -O https://github.com/bazelbuild/bazel/releases/download/$BAZEL_VERSION/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh && \
+ curl -H "User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36" -fSsL -o /bazel/LICENSE.txt https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE && \
+ chmod +x bazel-*.sh && \
+ ./bazel-$BAZEL_VERSION-installer-linux-x86_64.sh && \
+ cd / && \
+ rm -f /bazel/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh
+
+# Download and build TensorFlow.
+WORKDIR /tensorflow
+
+# Download and build TensorFlow.
+# Enable checking out both tags and branches
+RUN export TAG_PREFIX="v" && \
+ echo ${TF_BUILD_VERSION} | grep -q ^${TAG_PREFIX}; \
+ if [ $? -eq 0 ]; then \
+ git clone --depth=1 https://github.com/tensorflow/tensorflow.git . && \
+ git fetch --tags && \
+ git checkout ${TF_BUILD_VERSION}; \
+ else \
+ git clone --depth=1 --branch=${TF_BUILD_VERSION} https://github.com/tensorflow/tensorflow.git . ; \
+ fi
+
+RUN yes "" | ${PYTHON} configure.py
+
+ENV CI_BUILD_PYTHON ${PYTHON}
+
+# Set bazel build parameters in .bazelrc in parameterized_docker_build.sh
+# Use --copt=-march values to get optimized builds appropriate for the hardware
+# platform of your choice.
+# For ivy-bridge or sandy-bridge
+# --copt=-march="avx" \
+# For haswell, broadwell, or skylake
+# --copt=-march="avx2" \
+COPY .bazelrc /root/.bazelrc
+
+RUN tensorflow/tools/ci_build/builds/configured CPU \
+ bazel --bazelrc=/root/.bazelrc build -c opt \
+ tensorflow/tools/pip_package:build_pip_package && \
+ bazel-bin/tensorflow/tools/pip_package/build_pip_package "${WHL_DIR}" && \
+ ${PIP} --no-cache-dir install --upgrade "${WHL_DIR}"/tensorflow-*.whl && \
+ rm -rf /root/.cache
+# Clean up Bazel cache when done.
+
+# TensorBoard
+EXPOSE 6006
+# IPython
+EXPOSE 8888
+
+WORKDIR /root
diff --git a/tensorflow/tools/docker/Dockerfile.mkl b/tensorflow/tools/docker/Dockerfile.mkl
new file mode 100755
index 0000000000..139395d491
--- /dev/null
+++ b/tensorflow/tools/docker/Dockerfile.mkl
@@ -0,0 +1,75 @@
+FROM ubuntu:16.04
+
+LABEL maintainer="Clayne Robison <clayne.b.robison@intel.com>"
+
+# This parameter MUST be set by parameterized_docker_build.sh
+ARG TF_WHL_URL
+
+# Optional parameters
+ARG TF_BUILD_VERSION=r1.9
+ARG PYTHON="python"
+ARG PYTHON_DEV="python-dev"
+ARG PIP="pip"
+
+# Pick up some TF dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ build-essential \
+ curl \
+ libfreetype6-dev \
+ libhdf5-serial-dev \
+ libpng12-dev \
+ libzmq3-dev \
+ pkg-config \
+ python \
+ ${PYTHON_DEV} \
+ rsync \
+ software-properties-common \
+ unzip \
+ && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists/*
+
+RUN curl -O https://bootstrap.pypa.io/get-pip.py && \
+ python get-pip.py && \
+ rm get-pip.py
+
+RUN ${PIP} --no-cache-dir install \
+ Pillow \
+ h5py \
+ ipykernel \
+ jupyter \
+ matplotlib \
+ numpy \
+ pandas \
+ scipy \
+ sklearn \
+ && \
+ python -m ipykernel.kernelspec
+
+COPY ${TF_WHL_URL} /
+RUN ${PIP} install --no-cache-dir --force-reinstall /${TF_WHL_URL} && \
+ rm -rf /${TF_WHL_URL}
+
+RUN if [ "${PYTHON}" = "python3" ]; then \
+ ln -s -f /usr/bin/python3 /usr/bin/python; \
+ fi
+
+# Set up our notebook config.
+COPY jupyter_notebook_config.py /root/.jupyter/
+
+# Copy sample notebooks.
+COPY notebooks /notebooks
+
+# Jupyter has issues with being run directly:
+# https://github.com/ipython/ipython/issues/7062
+# We just add a little wrapper script.
+COPY run_jupyter.sh /
+
+# TensorBoard
+EXPOSE 6006
+# IPython
+EXPOSE 8888
+
+WORKDIR "/notebooks"
+
+CMD ["/run_jupyter.sh", "--allow-root"]
diff --git a/tensorflow/tools/docker/parameterized_docker_build.sh b/tensorflow/tools/docker/parameterized_docker_build.sh
index 05de25f2cb..4681c5fd61 100755
--- a/tensorflow/tools/docker/parameterized_docker_build.sh
+++ b/tensorflow/tools/docker/parameterized_docker_build.sh
@@ -19,8 +19,8 @@
# parameterized_docker_build.sh
#
# The script obeys the following environment variables:
-# TF_DOCKER_BUILD_TYPE: (CPU | GPU)
-# CPU or GPU image
+# TF_DOCKER_BUILD_TYPE: (CPU | GPU | MKL)
+# CPU, GPU, or MKL image
#
# TF_DOCKER_BUILD_IS_DEVEL: (NO | YES)
# Is this developer image
@@ -87,6 +87,15 @@
# TF_DOCKER_BUILD_OPTIONS
# (Optional)
# Specifies the desired build options. Defaults to OPT.
+#
+# TF_DOCKER_BUILD_ARGS
+# (Optional)
+# A list (array) of docker build args. Will be passed to docker build
+# command as list of --build-arg parameters.
+#
+# TF_BAZEL_BUILD_OPTIONS
+# (Optional)
+# Bazel compiler flags to be passed to the bazelrc file
# Script directory
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
@@ -116,6 +125,8 @@ echo " TF_DOCKER_BUILD_IMAGE_NAME=${TF_DOCKER_BUILD_IMAGE_NAME}"
echo " TF_DOCKER_BUILD_VERSION=${TF_DOCKER_BUILD_VERSION}"
echo " TF_DOCKER_BUILD_PORT=${TF_DOCKER_BUILD_PORT}"
echo " TF_DOCKER_BUILD_PUSH_CMD=${TF_DOCKER_BUILD_PUSH_CMD}"
+echo " TF_DOCKER_BUILD_ARGS=${TF_DOCKER_BUILD_ARGS[@]:-()}"
+echo " TF_BAZEL_BUILD_OPTIONS=${TF_BAZEL_BUILD_OPTIONS}"
CONTAINER_PORT=${TF_DOCKER_BUILD_PORT:-8888}
@@ -149,6 +160,15 @@ fi
if [[ ${TF_DOCKER_BUILD_TYPE} == "cpu" ]]; then
DOCKER_BINARY="docker"
+elif [[ ${TF_DOCKER_BUILD_TYPE} == "mkl" ]]; then
+ DOCKER_BINARY="docker"
+ FINAL_TAG="${FINAL_TAG}-mkl"
+ if [[ ${ORIG_DOCKERFILE} == *"."* ]]; then
+ # There is already a dot in the tag, use "-"
+ ORIG_DOCKERFILE="${ORIG_DOCKERFILE}-mkl"
+ else
+ ORIG_DOCKERFILE="${ORIG_DOCKERFILE}.mkl"
+ fi
elif [[ ${TF_DOCKER_BUILD_TYPE} == "gpu" ]]; then
DOCKER_BINARY="nvidia-docker"
@@ -203,6 +223,10 @@ if [[ "${TF_DOCKER_BUILD_IS_DEVEL}" == "no" ]]; then
export TF_BUILD_OPTIONS=${TF_DOCKER_BUILD_OPTIONS}
export TF_BUILD_IS_PIP="PIP"
+ if [[ "${TF_DOCKER_BUILD_TYPE}" == "mkl" ]]; then
+ die "FAIL: Non-development MKL builds require a pre-built pip whl."
+ fi
+
if [[ "${TF_DOCKER_BUILD_TYPE}" == "gpu" ]]; then
export TF_BUILD_APPEND_CI_DOCKER_EXTRA_PARAMS=\
"${TF_BUILD_APPEND_CI_DOCKER_EXTRA_PARAMS} -e TF_CUDA_COMPUTE_CAPABILITIES=3.0,3.5,5.2"
@@ -255,25 +279,39 @@ if [[ "${TF_DOCKER_BUILD_IS_DEVEL}" == "no" ]]; then
# Use string replacement to put the correct file name into the Dockerfile
PIP_WHL=$(basename "${PIP_WHL}")
- # Modify the non-devel Dockerfile to point to the correct pip whl file
- # location
- sed -e "/# --- DO NOT EDIT OR DELETE BETWEEN THE LINES --- #/,"\
+ if [[ ${TF_DOCKER_BUILD_TYPE} == "mkl" ]]; then
+ TF_DOCKER_BUILD_ARGS+=("--build-arg TF_WHL_URL=${PIP_WHL}" )
+ cp "${ORIG_DOCKERFILE}" "${DOCKERFILE}"
+ else
+ # Modify the non-devel Dockerfile to point to the correct pip whl file
+ # location
+ sed -e "/# --- DO NOT EDIT OR DELETE BETWEEN THE LINES --- #/,"\
"/# --- ~ DO NOT EDIT OR DELETE BETWEEN THE LINES --- #/c"\
"COPY ${PIP_WHL} /\n"\
"RUN pip --no-cache-dir install /${PIP_WHL}" "${ORIG_DOCKERFILE}" \
- > "${DOCKERFILE}"
+ > "${DOCKERFILE}"
+ fi
echo "Using local pip wheel from: ${TF_DOCKER_BUILD_CENTRAL_PIP}"
echo
-
else
echo "Downloading pip wheel from: ${TF_DOCKER_BUILD_CENTRAL_PIP}"
- echo
-
- # Modify the non-devel Dockerfile to point to the correct pip whl URL.
- sed -e "/# --- DO NOT EDIT OR DELETE BETWEEN THE LINES --- #/,"\
+ if [[ ${TF_DOCKER_BUILD_TYPE} == "mkl" ]]; then
+ pushd "${TMP_DIR}/"
+ curl -O ${TF_DOCKER_BUILD_CENTRAL_PIP}
+ popd
+ PIP_WHL_PATH=`find ${TMP_DIR} -name "*.whl"`
+ PIP_WHL=$(basename "${PIP_WHL_PATH}")
+ echo "PIP_WHL= ${PIP_WHL}"
+ echo
+ TF_DOCKER_BUILD_ARGS+=("--build-arg TF_WHL_URL=${PIP_WHL}")
+ cp "${ORIG_DOCKERFILE}" "${DOCKERFILE}"
+ else
+ # Modify the non-devel Dockerfile to point to the correct pip whl URL.
+ sed -e "/# --- DO NOT EDIT OR DELETE BETWEEN THE LINES --- #/,"\
"/# --- ~ DO NOT EDIT OR DELETE BETWEEN THE LINES --- #/c"\
"RUN pip --no-cache-dir install ${TF_DOCKER_BUILD_CENTRAL_PIP}" "${ORIG_DOCKERFILE}" \
- > "${DOCKERFILE}"
+ > "${DOCKERFILE}"
+ fi
fi
echo "Modified Dockerfile at: ${DOCKERFILE}"
@@ -281,36 +319,66 @@ if [[ "${TF_DOCKER_BUILD_IS_DEVEL}" == "no" ]]; then
# Modify python/pip version if necessary.
if [[ "${TF_DOCKER_BUILD_PYTHON_VERSION}" == "python3" ]]; then
- if sed -i -e 's/python /python3 /g' "${DOCKERFILE}" && \
- sed -i -e 's/python-dev/python3-dev/g' "${DOCKERFILE}" && \
- sed -i -e 's/pip /pip3 /g' "${DOCKERFILE}" && \
- sed -i -e 's^# RUN ln -s -f /usr/bin/python3 /usr/bin/python#^RUN ln -s -f /usr/bin/python3 /usr/bin/python^' "${DOCKERFILE}"
- then
- echo "Modified Dockerfile for python version "\
-"${TF_DOCKER_BUILD_PYTHON_VERSION} at: ${DOCKERFILE}"
+ if [[ ${TF_DOCKER_BUILD_TYPE} == "mkl" ]]; then
+ TF_DOCKER_BUILD_ARGS+=("--build-arg PYTHON=${TF_DOCKER_BUILD_PYTHON_VERSION}")
+ TF_DOCKER_BUILD_ARGS+=("--build-arg PYTHON_DEV=python3-dev")
+ TF_DOCKER_BUILD_ARGS+=("--build-arg PIP=pip3")
+ cp "${ORIG_DOCKERFILE}" "${DOCKERFILE}"
else
- die "FAILED to modify ${DOCKERFILE} for python3"
+ if sed -i -e 's/python /python3 /g' "${DOCKERFILE}" && \
+ sed -i -e 's/python-dev/python3-dev/g' "${DOCKERFILE}" && \
+ sed -i -e 's/pip /pip3 /g' "${DOCKERFILE}" && \
+ sed -i -e 's^# RUN ln -s -f /usr/bin/python3 /usr/bin/python#^RUN ln -s -f /usr/bin/python3 /usr/bin/python^' "${DOCKERFILE}"
+ then
+ echo "Modified Dockerfile for python version "\
+ "${TF_DOCKER_BUILD_PYTHON_VERSION} at: ${DOCKERFILE}"
+ else
+ die "FAILED to modify ${DOCKERFILE} for python3"
+ fi
fi
fi
-else
+else # TF_DOCKER_BUILD_IS_DEVEL == 'yes'
DOCKERFILE="${TMP_DIR}/Dockerfile"
- # Modify the devel Dockerfile to specify the git branch
- sed "s/^RUN git clone --branch=.* --depth=1/RUN git clone --branch=${TF_DOCKER_BUILD_DEVEL_BRANCH} --depth=1/" \
- "${ORIG_DOCKERFILE}" > "${DOCKERFILE}"
+ # Set up Dockerfile ARGS for mkl build
+ if [[ ${TF_DOCKER_BUILD_TYPE} == "mkl" ]]; then
+ if [[ -z "${TF_BAZEL_BUILD_OPTIONS// }" ]]; then
+ TF_BAZEL_BUILD_OPTIONS=("--config=mkl --copt=-mavx --cxxopt=-D_GLIBCXX_USE_CXX11_ABI=0")
+ else
+ TF_BAZEL_BUILD_OPTIONS="${TF_BAZEL_BUILD_OPTIONS}"
+ fi
+ TF_DOCKER_BUILD_ARGS+=("--build-arg TF_BUILD_VERSION=${TF_DOCKER_BUILD_DEVEL_BRANCH}")
+ echo "TF_DOCKER_BUILD_ARGS=${TF_DOCKER_BUILD_ARGS[@]}"
+
+ # Pass the build options to bazel using the user-specific .bazelrc file
+ echo "build ${TF_BAZEL_BUILD_OPTIONS}" >> ${TMP_DIR}/.bazelrc
+ cp "${ORIG_DOCKERFILE}" "${DOCKERFILE}"
+ else
+ # Modify the devel Dockerfile to specify the git branch
+ sed "s/^RUN git clone --branch=.* --depth=1/RUN git clone --branch=${TF_DOCKER_BUILD_DEVEL_BRANCH} --depth=1/" \
+ "${ORIG_DOCKERFILE}" > "${DOCKERFILE}"
+ fi
# Modify python/pip version if necessary.
if [[ "${TF_DOCKER_BUILD_PYTHON_VERSION}" == "python3" ]]; then
- if sed -i -e 's/python-dev/python-dev python3-dev/g' "${DOCKERFILE}" && \
- sed -i -e 's/python /python3 /g' "${DOCKERFILE}" && \
- sed -i -e 's^/tmp/pip^/tmp/pip3^g' "${DOCKERFILE}" && \
- sed -i -e 's/pip /pip3 /g' "${DOCKERFILE}" && \
- sed -i -e 's/ENV CI_BUILD_PYTHON python/ENV CI_BUILD_PYTHON python3/g' "${DOCKERFILE}" && \
- sed -i -e 's^# RUN ln -s -f /usr/bin/python3 /usr/bin/python#^RUN ln -s -f /usr/bin/python3 /usr/bin/python^' "${DOCKERFILE}"
- then
- echo "Modified Dockerfile further for python version ${TF_DOCKER_BUILD_PYTHON_VERSION} at: ${DOCKERFILE}"
+ if [[ ${TF_DOCKER_BUILD_TYPE} == "mkl" ]]; then
+ TF_DOCKER_BUILD_ARGS+=("--build-arg PYTHON=${TF_DOCKER_BUILD_PYTHON_VERSION}")
+ TF_DOCKER_BUILD_ARGS+=("--build-arg PYTHON3_DEV=python3-dev")
+ TF_DOCKER_BUILD_ARGS+=("--build-arg WHL_DIR=/tmp/pip3")
+ TF_DOCKER_BUILD_ARGS+=("--build-arg PIP=pip3")
+ cp "${ORIG_DOCKERFILE}" "${DOCKERFILE}"
else
- die "FAILED to modify ${DOCKERFILE} for python3"
+ if sed -i -e 's/python-dev/python-dev python3-dev/g' "${DOCKERFILE}" && \
+ sed -i -e 's/python /python3 /g' "${DOCKERFILE}" && \
+ sed -i -e 's^/tmp/pip^/tmp/pip3^g' "${DOCKERFILE}" && \
+ sed -i -e 's/pip /pip3 /g' "${DOCKERFILE}" && \
+ sed -i -e 's/ENV CI_BUILD_PYTHON python/ENV CI_BUILD_PYTHON python3/g' "${DOCKERFILE}" && \
+ sed -i -e 's^# RUN ln -s -f /usr/bin/python3 /usr/bin/python#^RUN ln -s -f /usr/bin/python3 /usr/bin/python^' "${DOCKERFILE}"
+ then
+ echo "Modified Dockerfile further for python version ${TF_DOCKER_BUILD_PYTHON_VERSION} at: ${DOCKERFILE}"
+ else
+ die "FAILED to modify ${DOCKERFILE} for python3"
+ fi
fi
fi
fi
@@ -319,8 +387,11 @@ fi
# Intermediate image name with tag
IMG="${USER}/tensorflow:${FINAL_TAG}"
echo "Building docker image with image name and tag: ${IMG}"
+echo "TF_DOCKER_BUILD_ARGS=${TF_DOCKER_BUILD_ARGS[@]}"
+CMD="${DOCKER_BINARY} build ${TF_DOCKER_BUILD_ARGS[@]} --no-cache --pull -t ${IMG} -f ${DOCKERFILE} ${TMP_DIR}"
+echo "CMD=${CMD}"
+${CMD}
-"${DOCKER_BINARY}" build --no-cache --pull -t "${IMG}" -f "${DOCKERFILE}" "${TMP_DIR}"
if [[ $? == "0" ]]; then
echo "${DOCKER_BINARY} build of ${IMG} succeeded"
else
@@ -340,7 +411,7 @@ fi
DOCKER_RUN_LOG="${TMP_DIR}/docker_run.log"
echo ""
echo "Running docker container from image ${IMG}..."
-echo " (Log file is at: ${DOCKER_RUN_LOG}"
+echo " Log file is at: ${DOCKER_RUN_LOG}"
echo ""
if [[ "${TF_DOCKER_BUILD_IS_DEVEL}" == "no" ]]; then
@@ -386,7 +457,6 @@ if [[ "${TF_DOCKER_BUILD_IS_DEVEL}" == "no" ]]; then
# Stop the running docker container
sleep 1
"${DOCKER_BINARY}" stop --time=0 ${CONTAINER_ID}
-
fi
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 55cd4f37c6..c630ca04b8 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -53,7 +53,7 @@ REQUIRED_PACKAGES = [
'gast >= 0.2.0',
'numpy >= 1.13.3',
'six >= 1.10.0',
- 'protobuf >= 3.4.0',
+ 'protobuf >= 3.6.0',
'setuptools <= 39.1.0',
'tensorboard >= 1.8.0, < 1.9.0',
'termcolor >= 1.1.0',
@@ -170,8 +170,9 @@ class InstallHeaders(Command):
# symlink within the directory hierarchy.
# NOTE(keveman): Figure out how to customize bdist_wheel package so
# we can do the symlink.
- if 'external/eigen_archive/' in install_dir:
- extra_dir = install_dir.replace('external/eigen_archive', '')
+ if 'tensorflow/include/external/eigen_archive/' in install_dir:
+ extra_dir = install_dir.replace(
+ 'tensorflow/include/external/eigen_archive', '')
if not os.path.exists(extra_dir):
self.mkpath(extra_dir)
self.copy_file(header, extra_dir)
@@ -204,13 +205,12 @@ def find_files(pattern, root):
yield os.path.join(dirpath, filename)
-matches = ['../' + x for x in find_files('*', 'external') if '.py' not in x]
-
so_lib_paths = [
i for i in os.listdir('.')
if os.path.isdir(i) and fnmatch.fnmatch(i, '_solib_*')
]
+matches = []
for path in so_lib_paths:
matches.extend(
['../' + x for x in find_files('*', path) if '.py' not in x]
@@ -225,7 +225,7 @@ headers = (list(find_files('*.h', 'tensorflow/core')) +
list(find_files('*.h', 'tensorflow/stream_executor')) +
list(find_files('*.h', 'google/protobuf_archive/src')) +
list(find_files('*', 'third_party/eigen3')) +
- list(find_files('*', 'external/eigen_archive')))
+ list(find_files('*', 'tensorflow/include/external/eigen_archive')))
setup(
name=project_name,
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 4015c0d5a4..5372a585aa 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -559,11 +559,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
tf_http_archive(
name = "kafka",
urls = [
- "https://mirror.bazel.build/github.com/edenhill/librdkafka/archive/v0.11.1.tar.gz",
- "https://github.com/edenhill/librdkafka/archive/v0.11.1.tar.gz",
+ "https://mirror.bazel.build/github.com/edenhill/librdkafka/archive/v0.11.4.tar.gz",
+ "https://github.com/edenhill/librdkafka/archive/v0.11.4.tar.gz",
],
- sha256 = "dd035d57c8f19b0b612dd6eefe6e5eebad76f506e302cccb7c2066f25a83585e",
- strip_prefix = "librdkafka-0.11.1",
+ sha256 = "9d8f1eb7b0e29e9ab1168347c939cb7ae5dff00a39cef99e7ef033fd8f92737c",
+ strip_prefix = "librdkafka-0.11.4",
build_file = clean_dep("//third_party:kafka/BUILD"),
patch_file = clean_dep("//third_party/kafka:config.patch"),
)
diff --git a/third_party/eigen.BUILD b/third_party/eigen.BUILD
index e54c1a4501..759f8a9be9 100644
--- a/third_party/eigen.BUILD
+++ b/third_party/eigen.BUILD
@@ -69,3 +69,9 @@ cc_library(
includes = ["."],
visibility = ["//visibility:public"],
)
+
+filegroup(
+ name = "eigen_header_files",
+ srcs = EIGEN_MPL2_HEADER_FILES,
+ visibility = ["//visibility:public"],
+)
diff --git a/third_party/eigen3/BUILD b/third_party/eigen3/BUILD
index f661093bc9..203991b50f 100644
--- a/third_party/eigen3/BUILD
+++ b/third_party/eigen3/BUILD
@@ -17,21 +17,23 @@ load("//tensorflow:tensorflow.bzl", "if_mkl")
# INTEL_MKL end
load("//tensorflow:tensorflow.bzl", "if_mkl")
+EIGEN3_THIRD_PARTY_HEADERS = [
+ "Eigen/Core",
+ "Eigen/LU",
+ "Eigen/Cholesky",
+ "Eigen/Eigenvalues",
+ "Eigen/QR",
+ "Eigen/SVD",
+ "unsupported/Eigen/MatrixFunctions",
+ "unsupported/Eigen/SpecialFunctions",
+ "unsupported/Eigen/CXX11/ThreadPool",
+ "unsupported/Eigen/CXX11/Tensor",
+ "unsupported/Eigen/CXX11/FixedPoint",
+] + glob(["unsupported/Eigen/CXX11/src/FixedPoint/*.h"])
+
cc_library(
name = "eigen3",
- hdrs = glob(["unsupported/Eigen/CXX11/src/FixedPoint/*.h"]) + [
- "Eigen/Core",
- "Eigen/LU",
- "Eigen/Cholesky",
- "Eigen/Eigenvalues",
- "Eigen/QR",
- "Eigen/SVD",
- "unsupported/Eigen/MatrixFunctions",
- "unsupported/Eigen/SpecialFunctions",
- "unsupported/Eigen/CXX11/ThreadPool",
- "unsupported/Eigen/CXX11/Tensor",
- "unsupported/Eigen/CXX11/FixedPoint",
- ],
+ hdrs = EIGEN3_THIRD_PARTY_HEADERS,
includes = if_mkl(["./mkl_include"]),
visibility = ["//visibility:public"],
deps = [
@@ -48,3 +50,35 @@ filegroup(
),
visibility = ["//tensorflow:__subpackages__"],
)
+
+filegroup(
+ name = "eigen_third_party_header_files",
+ srcs = EIGEN3_THIRD_PARTY_HEADERS,
+ visibility = ["//visibility:public"],
+)
+
+genrule(
+ name = "install_eigen_headers",
+ srcs = [
+ "@eigen_archive//:eigen_header_files",
+ ":eigen_third_party_header_files",
+ ],
+ outs = ["include"],
+ cmd = """
+ mkdir $@
+ for f in $(locations @eigen_archive//:eigen_header_files) ; do
+ d="$${f%/*}"
+ d="$${d#*external/eigen_archive/}"
+
+ mkdir -p "$@/$${d}"
+ cp "$${f}" "$@/$${d}/"
+ done
+
+ for f in $(locations :eigen_third_party_header_files) ; do
+ d="$${f%/*}"
+
+ mkdir -p "$@/$${d}"
+ cp "$${f}" "$@/$${d}/"
+ done
+ """,
+)
diff --git a/third_party/kafka/BUILD b/third_party/kafka/BUILD
index a839ca717e..75792b0d87 100644
--- a/third_party/kafka/BUILD
+++ b/third_party/kafka/BUILD
@@ -60,6 +60,8 @@ cc_library(
"src/rdkafka_event.h",
"src/rdkafka_feature.c",
"src/rdkafka_feature.h",
+ "src/rdkafka_header.c",
+ "src/rdkafka_header.h",
"src/rdkafka_int.h",
"src/rdkafka_interceptor.c",
"src/rdkafka_interceptor.h",
@@ -93,7 +95,6 @@ cc_library(
"src/rdkafka_sasl_int.h",
"src/rdkafka_sasl_plain.c",
"src/rdkafka_subscription.c",
- "src/rdkafka_subscription.h",
"src/rdkafka_timer.c",
"src/rdkafka_timer.h",
"src/rdkafka_topic.c",
@@ -105,6 +106,8 @@ cc_library(
"src/rdlist.h",
"src/rdlog.c",
"src/rdlog.h",
+ "src/rdmurmur2.c",
+ "src/rdmurmur2.h",
"src/rdports.c",
"src/rdports.h",
"src/rdposix.h",
diff --git a/third_party/repo.bzl b/third_party/repo.bzl
index cb67d3e961..9cee1fcc4b 100644
--- a/third_party/repo.bzl
+++ b/third_party/repo.bzl
@@ -16,7 +16,6 @@
_SINGLE_URL_WHITELIST = depset([
"arm_compiler",
- "ortools_archive",
])
def _is_windows(ctx):
diff --git a/third_party/sqlite.BUILD b/third_party/sqlite.BUILD
index 6da7953589..2876f305f1 100644
--- a/third_party/sqlite.BUILD
+++ b/third_party/sqlite.BUILD
@@ -5,6 +5,7 @@ licenses(["unencumbered"]) # Public Domain
SQLITE_COPTS = [
"-Os",
+ "-DSQLITE_ENABLE_JSON1",
"-DHAVE_DECL_STRERROR_R=1",
"-DHAVE_STDINT_H=1",
"-DHAVE_INTTYPES_H=1",
diff --git a/third_party/toolchains/BUILD b/third_party/toolchains/BUILD
new file mode 100644
index 0000000000..fc3183a754
--- /dev/null
+++ b/third_party/toolchains/BUILD
@@ -0,0 +1,22 @@
+licenses(["restricted"])
+
+package(default_visibility = ["//visibility:public"])
+
+# Platform for use with remote execution with
+# custom container based off RBE Ubuntu16_04
+# http://gcr.io/cloud-marketplace/google/rbe-ubuntu16-04
+# Built with //tensorflow/tools/ci_build/Dockerfile.rbe.cpu
+platform(
+ name = "rbe_ubuntu16_04-tf",
+ constraint_values = [
+ "@bazel_tools//platforms:x86_64",
+ "@bazel_tools//platforms:linux",
+ "@bazel_tools//tools/cpp:clang",
+ "@bazel_toolchains//constraints:xenial",
+ ],
+ remote_execution_properties = """
+ properties: {
+ name: "container-image"
+ value:"docker://gcr.io/asci-toolchain/nosla-ubuntu16_04-tf@sha256:800a7b68cabef15419695c188ed33ed70adf678c2371b97b236f3ae26c38274d"
+ }""",
+)