aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--.gitignore3
-rw-r--r--RELEASE.md9
-rw-r--r--WORKSPACE7
-rwxr-xr-xconfigure65
-rw-r--r--tensorflow/BUILD30
-rw-r--r--tensorflow/compiler/xla/service/allocation_tracker.cc3
-rw-r--r--tensorflow/compiler/xla/service/generic_transfer_manager.cc4
-rw-r--r--tensorflow/compiler/xla/service/service.cc16
-rw-r--r--tensorflow/contrib/android/BUILD6
-rw-r--r--tensorflow/contrib/cmake/CMakeLists.txt4
-rw-r--r--tensorflow/contrib/cmake/README.md2
-rw-r--r--tensorflow/contrib/cmake/tf_cc_ops.cmake40
-rw-r--r--tensorflow/contrib/cmake/tf_core_framework.cmake7
-rw-r--r--tensorflow/contrib/cmake/tf_core_kernels.cmake6
-rwxr-xr-x[-rw-r--r--]tensorflow/contrib/cmake/tf_python.cmake107
-rw-r--r--tensorflow/contrib/cmake/tf_tests.cmake7
-rw-r--r--tensorflow/contrib/cmake/tf_tools.cmake19
-rw-r--r--tensorflow/contrib/cmake/tools/create_def_file.py134
-rw-r--r--tensorflow/contrib/framework/python/framework/checkpoint_utils.py7
-rw-r--r--tensorflow/contrib/framework/python/framework/checkpoint_utils_test.py23
-rw-r--r--tensorflow/contrib/framework/python/ops/arg_scope.py14
-rw-r--r--tensorflow/contrib/layers/__init__.py1
-rw-r--r--tensorflow/contrib/layers/python/layers/layers.py10
-rw-r--r--tensorflow/contrib/learn/__init__.py1
-rw-r--r--tensorflow/contrib/learn/python/learn/README.md17
-rw-r--r--tensorflow/contrib/learn/python/learn/estimators/estimator.py2
-rw-r--r--tensorflow/contrib/learn/python/learn/estimators/linear.py23
-rw-r--r--tensorflow/contrib/learn/python/learn/tests/dataframe/tensorflow_dataframe_test.py103
-rw-r--r--tensorflow/contrib/metrics/python/ops/metric_ops.py2
-rw-r--r--tensorflow/contrib/rnn/python/ops/core_rnn_cell_impl.py15
-rw-r--r--tensorflow/contrib/rnn/python/ops/lstm_ops.py2
-rw-r--r--tensorflow/contrib/seq2seq/python/ops/helper.py2
-rw-r--r--tensorflow/contrib/seq2seq/python/ops/loss.py3
-rw-r--r--tensorflow/contrib/slim/README.md6
-rw-r--r--tensorflow/contrib/util/loader.py27
-rw-r--r--tensorflow/core/BUILD101
-rw-r--r--tensorflow/core/common_runtime/mkl_cpu_allocator.h120
-rw-r--r--tensorflow/core/common_runtime/threadpool_device.cc9
-rw-r--r--tensorflow/core/framework/allocator.cc5
-rw-r--r--tensorflow/core/framework/allocator_registry.cc66
-rw-r--r--tensorflow/core/framework/allocator_registry.h77
-rw-r--r--tensorflow/core/framework/type_index.h4
-rw-r--r--tensorflow/core/framework/types.h6
-rw-r--r--tensorflow/core/graph/mkl_layout_pass.cc548
-rw-r--r--tensorflow/core/graph/mkl_layout_pass.h36
-rw-r--r--tensorflow/core/graph/mkl_layout_pass_test.cc199
-rw-r--r--tensorflow/core/graph/mkl_optimizer_merge.cc124
-rw-r--r--tensorflow/core/graph/mkl_optimizer_merge.h6
-rw-r--r--tensorflow/core/graph/mkl_optimizer_merge_test.cc135
-rw-r--r--tensorflow/core/graph/mkl_tfconversion_pass.cc271
-rw-r--r--tensorflow/core/graph/mkl_tfconversion_pass.h36
-rw-r--r--tensorflow/core/graph/mkl_tfconversion_pass_test.cc243
-rw-r--r--tensorflow/core/kernels/BUILD63
-rw-r--r--tensorflow/core/kernels/adjust_hue_op.cc43
-rw-r--r--tensorflow/core/kernels/adjust_hue_op.h42
-rw-r--r--tensorflow/core/kernels/adjust_hue_op_gpu.cu.cc141
-rw-r--r--tensorflow/core/kernels/eigen_pooling.h1
-rw-r--r--tensorflow/core/kernels/fixed_length_record_reader_op.cc4
-rw-r--r--tensorflow/core/kernels/mkl_conv_ops.cc457
-rw-r--r--tensorflow/core/kernels/mkl_tfconv_op.cc135
-rw-r--r--tensorflow/core/kernels/mkl_transpose_op.cc67
-rw-r--r--tensorflow/core/kernels/pooling_ops_common.cc2
-rw-r--r--tensorflow/core/kernels/resize_nearest_neighbor_op.cc4
-rw-r--r--tensorflow/core/kernels/resize_op_benchmark_test.cc (renamed from tensorflow/core/kernels/resize_nearest_neighbor_op_benchmark_test.cc)28
-rw-r--r--tensorflow/core/kernels/transpose_op.cc15
-rw-r--r--tensorflow/core/kernels/transpose_op.h11
-rw-r--r--tensorflow/core/ops/nn_ops.cc41
-rw-r--r--tensorflow/core/ops/ops.pbtxt53
-rw-r--r--tensorflow/core/platform/default/build_config.bzl74
-rw-r--r--tensorflow/core/platform/default/build_config_root.bzl18
-rw-r--r--tensorflow/core/platform/hadoop/hadoop_file_system.cc15
-rw-r--r--tensorflow/core/platform/macros.h11
-rw-r--r--tensorflow/core/platform/windows/cpu_info.h3
-rw-r--r--tensorflow/core/platform/windows/intrinsics_port.h4
-rw-r--r--tensorflow/core/platform/windows/windows_file_system.cc8
-rw-r--r--tensorflow/core/util/mkl_util.h296
-rw-r--r--tensorflow/docs_src/extend/adding_an_op.md16
-rw-r--r--tensorflow/docs_src/get_started/get_started.md2
-rw-r--r--tensorflow/docs_src/get_started/mnist/mechanics.md8
-rw-r--r--tensorflow/docs_src/programmers_guide/faq.md4
-rw-r--r--tensorflow/docs_src/programmers_guide/variables.md5
-rw-r--r--tensorflow/docs_src/tutorials/linear.md2
-rw-r--r--tensorflow/docs_src/tutorials/using_gpu.md10
-rw-r--r--tensorflow/docs_src/tutorials/wide.md6
-rw-r--r--tensorflow/docs_src/tutorials/wide_and_deep.md2
-rw-r--r--tensorflow/examples/android/src/org/tensorflow/demo/StylizeActivity.java2
-rw-r--r--tensorflow/examples/learn/README.md2
-rw-r--r--tensorflow/examples/learn/boston.py9
-rw-r--r--tensorflow/examples/learn/iris.py4
-rw-r--r--tensorflow/examples/learn/text_classification.py3
-rw-r--r--tensorflow/examples/tutorials/deepdream/deepdream.ipynb2
-rw-r--r--tensorflow/examples/tutorials/word2vec/word2vec_basic.py6
-rw-r--r--tensorflow/go/genop/generate.sh11
-rw-r--r--tensorflow/java/README.md6
-rw-r--r--tensorflow/java/src/main/java/org/tensorflow/SavedModelBundle.java3
-rw-r--r--tensorflow/java/src/main/java/org/tensorflow/Tensor.java3
-rw-r--r--tensorflow/java/src/main/java/org/tensorflow/package-info.java4
-rw-r--r--tensorflow/python/client/session.py6
-rw-r--r--tensorflow/python/debug/BUILD6
-rw-r--r--tensorflow/python/kernel_tests/tensordot_op_test.py16
-rw-r--r--tensorflow/python/layers/pooling.py4
-rw-r--r--tensorflow/python/ops/control_flow_ops.py4
-rw-r--r--tensorflow/python/ops/image_ops_impl.py138
-rw-r--r--tensorflow/python/ops/image_ops_test.py26
-rw-r--r--tensorflow/python/ops/math_ops.py36
-rw-r--r--tensorflow/python/ops/metrics_impl.py2
-rw-r--r--tensorflow/python/ops/nn_ops.py32
-rw-r--r--tensorflow/python/ops/rnn_cell_impl.py26
-rw-r--r--tensorflow/python/platform/tf_logging.py1
-rw-r--r--tensorflow/tensorboard/README.md2
-rw-r--r--tensorflow/tensorboard/defs.bzl2
-rw-r--r--tensorflow/tensorflow.bzl3
-rw-r--r--tensorflow/tools/benchmark/BUILD1
-rw-r--r--tensorflow/tools/benchmark/benchmark_model.cc5
-rw-r--r--tensorflow/tools/ci_build/Dockerfile.android7
-rw-r--r--tensorflow/tools/ci_build/Dockerfile.cmake3
-rw-r--r--tensorflow/tools/ci_build/Dockerfile.cpu7
-rw-r--r--tensorflow/tools/ci_build/Dockerfile.debian.jessie.cpu5
-rw-r--r--tensorflow/tools/ci_build/Dockerfile.gpu12
-rw-r--r--tensorflow/tools/ci_build/Dockerfile.hadoop7
-rw-r--r--tensorflow/tools/ci_build/Dockerfile.tensorboard2
-rw-r--r--tensorflow/tools/ci_build/README.md12
-rwxr-xr-xtensorflow/tools/ci_build/builds/run_pip_tests.sh24
-rwxr-xr-xtensorflow/tools/ci_build/ci_parameterized_build.sh33
-rwxr-xr-xtensorflow/tools/ci_build/ci_sanity.sh2
-rwxr-xr-xtensorflow/tools/ci_build/install/install_deb_packages.sh6
-rwxr-xr-xtensorflow/tools/ci_build/install/install_pip_packages.sh38
-rwxr-xr-xtensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh91
-rw-r--r--tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh3
-rw-r--r--tensorflow/tools/compatibility/README.md3
-rw-r--r--tensorflow/tools/compatibility/tf_upgrade.py25
-rw-r--r--tensorflow/tools/docker/Dockerfile.devel5
-rw-r--r--tensorflow/tools/docker/Dockerfile.devel-gpu5
-rwxr-xr-xtensorflow/tools/git/gen_git_source.py6
-rw-r--r--tensorflow/tools/graph_transforms/summarize_graph_main.cc5
-rw-r--r--tensorflow/tools/pip_package/BUILD18
-rw-r--r--tensorflow/tools/pip_package/MANIFEST.in2
-rw-r--r--tensorflow/workspace.bzl4
-rw-r--r--third_party/gpus/cuda_configure.bzl11
-rwxr-xr-xthird_party/sycl/crosstool/computecpp.tpl2
-rwxr-xr-xutil/python/python_config.sh2
141 files changed, 4407 insertions, 602 deletions
diff --git a/.gitignore b/.gitignore
index 07dd151380..01f06be1a9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
.DS_Store
.ipynb_checkpoints
node_modules
+/.bazelrc
/bazel-*
/third_party/py/numpy/numpy_include
/tools/bazel.rc
@@ -13,4 +14,4 @@ node_modules
*.pyc
__pycache__
*.swp
-.vscode/ \ No newline at end of file
+.vscode/
diff --git a/RELEASE.md b/RELEASE.md
index b223f51730..5f261a4543 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,3 +1,10 @@
+# Release 1.0.1
+
+## Bug Fixes and Other Changes
+* Change GraphConstructor to not increase the version when importing, but instead take the min of all versions.
+* Google Cloud Storage fixes.
+* Removed `tf.core` and `tf.python` modules from the API. These were never intended to be exposed. Please use the same objects through top-level `tf` module instead.
+
# Release 1.0.0
## Major Features and Improvements
@@ -88,6 +95,8 @@ To help you upgrade your existing TensorFlow Python code to match the API change
from the tensorflow::ops namespace to tensorflow.
* Change arg order for `{softmax,sparse_softmax,sigmoid}_cross_entropy_with_logits` to be (labels, predictions), and force use of named args.
* tf.nn.rnn_cell.* and most functions in tf.nn.rnn.* (with the exception of dynamic_rnn and raw_rnn) are temporarily in tf.contrib.rnn. They will be moved back into core for TF 1.1.
+* `tf.nn.sampled_softmax_loss` and `tf.nn.nce_loss` have both changed their API such that you need to switch the `inputs, labels` to `labels, inputs` parameters.
+* The shape keyword argument of the `SparseTensor` constructor changes its name to `dense_shape` between Tensorflow 0.12 and Tensorflow 1.0.
## Bug Fixes and Other Changes
* Numerous C++ API updates.
diff --git a/WORKSPACE b/WORKSPACE
index 72fa0d8949..6ec1a7df3e 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -14,12 +14,7 @@ load("@io_bazel_rules_closure//closure:defs.bzl", "closure_repositories")
closure_repositories()
-load("//tensorflow:workspace.bzl", "check_version", "tf_workspace")
-
-# We must check the bazel version before trying to parse any other BUILD files,
-# in case the parsing of those build files depends on the bazel version we
-# require here.
-check_version("0.4.2")
+load("//tensorflow:workspace.bzl", "tf_workspace")
# Uncomment and update the paths in these entries to build the Android demo.
#android_sdk_repository(
diff --git a/configure b/configure
index 05daa23d70..081db20d75 100755
--- a/configure
+++ b/configure
@@ -8,6 +8,9 @@ pushd `dirname $0` > /dev/null
SOURCE_BASE_DIR=`pwd -P`
popd > /dev/null
+# This file contains customized config settings.
+touch .bazelrc
+
PLATFORM="$(uname -s | tr 'A-Z' 'a-z')"
function is_linux() {
@@ -36,15 +39,11 @@ function is_windows() {
}
function bazel_clean_and_fetch() {
- # bazel clean --expunge currently doesn't work on Windows
- # TODO(pcloudy): Re-enable it after bazel clean --expunge is fixed.
- if ! is_windows; then
- bazel clean --expunge
- fi
if [ -z "$TF_BAZEL_TARGETS" ]; then
- TF_BAZEL_TARGETS="//tensorflow/... -//tensorflow/contrib/nccl/... -//tensorflow/examples/android/..."
+ bazel fetch "//tensorflow/... -//tensorflow/contrib/nccl/... -//tensorflow/examples/android/..."
+ else
+ bazel fetch $TF_BAZEL_TARGETS
fi
- bazel fetch "$TF_BAZEL_TARGETS"
}
function sed_hyphen_i() {
@@ -102,8 +101,8 @@ if false; then # Disable building with MKL for now
if [ "$TF_NEED_MKL" == "1" ]; then # TF_NEED_MKL
DST=`dirname $0`
- ARCHIVE_BASENAME=mklml_lnx_2017.0.2.20170110.tgz
- GITHUB_RELEASE_TAG=v0.3
+ ARCHIVE_BASENAME=mklml_lnx_2017.0.2.20170209.tgz
+ GITHUB_RELEASE_TAG=v0.5
MKLURL="https://github.com/01org/mkl-dnn/releases/download/$GITHUB_RELEASE_TAG/$ARCHIVE_BASENAME"
if ! [ -e "$DST/third_party/mkl/$ARCHIVE_BASENAME" ]; then
wget --no-check-certificate -P $DST/third_party/mkl/ $MKLURL
@@ -182,13 +181,12 @@ else
TF_NEED_JEMALLOC=0
fi
-if [ "$TF_NEED_JEMALLOC" == "1" ]; then
- sed_hyphen_i -e "s/WITH_JEMALLOC = False/WITH_JEMALLOC = True/" tensorflow/core/platform/default/build_config.bzl
-else
- sed_hyphen_i -e "s/WITH_JEMALLOC = True/WITH_JEMALLOC = False/" tensorflow/core/platform/default/build_config.bzl
+sed_hyphen_i -e "/with_jemalloc/d" .bazelrc
+if [[ "$TF_NEED_JEMALLOC" == "1" ]]; then
+ echo 'build --define with_jemalloc=true' >>.bazelrc
fi
-while [ "$TF_NEED_GCP" == "" ]; do
+while [[ "$TF_NEED_GCP" == "" ]]; do
read -p "Do you wish to build TensorFlow with "\
"Google Cloud Platform support? [y/N] " INPUT
case $INPUT in
@@ -202,23 +200,12 @@ while [ "$TF_NEED_GCP" == "" ]; do
esac
done
-if [ "$TF_NEED_GCP" == "1" ]; then
- ## Verify that libcurl header files are available.
- # Only check Linux, since on MacOS the header files are installed with XCode.
- if is_linux && [[ ! -f "/usr/include/curl/curl.h" ]]; then
- echo "ERROR: It appears that the development version of libcurl is not "\
-"available. Please install the libcurl3-dev package."
- exit 1
- fi
-
- # Update Bazel build configuration.
- sed_hyphen_i -e "s/WITH_GCP_SUPPORT = False/WITH_GCP_SUPPORT = True/" tensorflow/core/platform/default/build_config.bzl
-else
- # Update Bazel build configuration.
- sed_hyphen_i -e "s/WITH_GCP_SUPPORT = True/WITH_GCP_SUPPORT = False/" tensorflow/core/platform/default/build_config.bzl
+sed_hyphen_i -e "/with_gcp_support/d" .bazelrc
+if [[ "$TF_NEED_GCP" == "1" ]]; then
+ echo 'build --define with_gcp_support=true' >>.bazelrc
fi
-while [ "$TF_NEED_HDFS" == "" ]; do
+while [[ "$TF_NEED_HDFS" == "" ]]; do
read -p "Do you wish to build TensorFlow with "\
"Hadoop File System support? [y/N] " INPUT
case $INPUT in
@@ -232,16 +219,13 @@ while [ "$TF_NEED_HDFS" == "" ]; do
esac
done
-if [ "$TF_NEED_HDFS" == "1" ]; then
- # Update Bazel build configuration.
- sed_hyphen_i -e "s/WITH_HDFS_SUPPORT = False/WITH_HDFS_SUPPORT = True/" tensorflow/core/platform/default/build_config.bzl
-else
- # Update Bazel build configuration.
- sed_hyphen_i -e "s/WITH_HDFS_SUPPORT = True/WITH_HDFS_SUPPORT = False/" tensorflow/core/platform/default/build_config.bzl
+sed_hyphen_i -e "/with_hdfs_support/d" .bazelrc
+if [[ "$TF_NEED_HDFS" == "1" ]]; then
+ echo 'build --define with_hdfs_support=true' >>.bazelrc
fi
## Enable XLA.
-while [ "$TF_ENABLE_XLA" == "" ]; do
+while [[ "$TF_ENABLE_XLA" == "" ]]; do
read -p "Do you wish to build TensorFlow with the XLA just-in-time compiler (experimental)? [y/N] " INPUT
case $INPUT in
[Yy]* ) echo "XLA JIT support will be enabled for TensorFlow"; TF_ENABLE_XLA=1;;
@@ -251,12 +235,9 @@ while [ "$TF_ENABLE_XLA" == "" ]; do
esac
done
-if [ "$TF_ENABLE_XLA" == "1" ]; then
- # Update Bazel build configuration.
- sed_hyphen_i -e "s/^WITH_XLA_SUPPORT = [FT].*/WITH_XLA_SUPPORT = True/" tensorflow/core/platform/default/build_config_root.bzl
-else
- # Update Bazel build configuration.
- sed_hyphen_i -e "s/^WITH_XLA_SUPPORT = [FT].*/WITH_XLA_SUPPORT = False/" tensorflow/core/platform/default/build_config_root.bzl
+sed_hyphen_i -e "/with_xla_support/d" .bazelrc
+if [[ "$TF_ENABLE_XLA" == "1" ]]; then
+ echo 'build --define with_xla_support=true' >>.bazelrc
fi
diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index a2e74f40c3..1956cb0763 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -110,6 +110,34 @@ config_setting(
visibility = ["//visibility:public"],
)
+# TODO(jhseu): Enable on other platforms other than Linux.
+config_setting(
+ name = "with_jemalloc",
+ values = {
+ "cpu": "k8",
+ "define": "with_jemalloc=true",
+ },
+ visibility = ["//visibility:public"],
+)
+
+config_setting(
+ name = "with_gcp_support",
+ values = {"define": "with_gcp_support=true"},
+ visibility = ["//visibility:public"],
+)
+
+config_setting(
+ name = "with_hdfs_support",
+ values = {"define": "with_hdfs_support=true"},
+ visibility = ["//visibility:public"],
+)
+
+config_setting(
+ name = "with_xla_support",
+ values = {"define": "with_xla_support=true"},
+ visibility = ["//visibility:public"],
+)
+
package_group(
name = "internal",
packages = ["//tensorflow/..."],
@@ -321,6 +349,8 @@ cc_binary(
deps = [
"//tensorflow/c:c_api",
"//tensorflow/cc:cc_ops",
+ "//tensorflow/cc:client_session",
+ "//tensorflow/cc:scope",
"//tensorflow/core:tensorflow",
],
)
diff --git a/tensorflow/compiler/xla/service/allocation_tracker.cc b/tensorflow/compiler/xla/service/allocation_tracker.cc
index 998ca7d21f..8f169cd036 100644
--- a/tensorflow/compiler/xla/service/allocation_tracker.cc
+++ b/tensorflow/compiler/xla/service/allocation_tracker.cc
@@ -138,7 +138,8 @@ tensorflow::Status AllocationTracker::DeallocateShape(
TF_RET_CHECK(ShapeUtil::TupleElementCount(shape) == elements.size())
<< "tuple has unexpected number of elements: " << elements.size()
<< " != " << ShapeUtil::TupleElementCount(shape);
- for (int i = 0; i < elements.size(); ++i) {
+ for (std::vector<se::DeviceMemoryBase>::size_type i = 0;
+ i < elements.size(); ++i) {
VLOG(2) << "recursing onto the tuple elements";
TF_RETURN_IF_ERROR(DeallocateShape(backend, device_ordinal, &elements[i],
shape.tuple_shapes(i),
diff --git a/tensorflow/compiler/xla/service/generic_transfer_manager.cc b/tensorflow/compiler/xla/service/generic_transfer_manager.cc
index aa512f242a..715d3f33bc 100644
--- a/tensorflow/compiler/xla/service/generic_transfer_manager.cc
+++ b/tensorflow/compiler/xla/service/generic_transfer_manager.cc
@@ -118,10 +118,10 @@ GenericTransferManager::ShallowCopyTupleFromDevice(
// Create a DeviceMemoryBase from each void* pointer.
std::vector<se::DeviceMemoryBase> destination;
- for (int i = 0; i < element_pointers.size(); ++i) {
+ for (std::vector<void*>::size_type i = 0; i < element_pointers.size(); ++i) {
if (element_pointers[i] == nullptr &&
!ShapeUtil::HasZeroElements(shape.tuple_shapes(i))) {
- return FailedPrecondition("tuple contains nullptr at element %d", i);
+ return FailedPrecondition("tuple contains nullptr at element %lu", i);
}
int64 buffer_size = ShapeUtil::ByteSizeOf(shape.tuple_shapes(i),
/*pointer_size=*/sizeof(void*));
diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc
index d88315e747..60593afb8c 100644
--- a/tensorflow/compiler/xla/service/service.cc
+++ b/tensorflow/compiler/xla/service/service.cc
@@ -256,7 +256,8 @@ StatusOr<std::vector<const Allocation*>> Service::ResolveAndValidateArguments(
tensorflow::gtl::ArraySlice<const GlobalDataHandle*> arguments,
const Backend* backend, int device_ordinal) {
std::vector<const Allocation*> allocations;
- for (int i = 0; i < arguments.size(); ++i) {
+ for (tensorflow::gtl::ArraySlice<const GlobalDataHandle*>::size_type i = 0;
+ i < arguments.size(); ++i) {
auto allocation_status = allocation_tracker_.Resolve(*arguments[i]);
if (!allocation_status.ok()) {
return Status(allocation_status.status().code(),
@@ -269,7 +270,7 @@ StatusOr<std::vector<const Allocation*>> Service::ResolveAndValidateArguments(
if (allocation->backend() != backend ||
allocation->device_ordinal() != device_ordinal) {
return InvalidArgument(
- "argument %d is on device %s but computation will be executed "
+ "argument %lu is on device %s but computation will be executed "
"on device %s",
i,
allocation->backend()
@@ -295,13 +296,14 @@ StatusOr<std::unique_ptr<HloModuleConfig>> Service::CreateModuleConfig(
program_shape.parameters_size(), arguments.size());
}
- for (int i = 0; i < arguments.size(); ++i) {
+ for (tensorflow::gtl::ArraySlice<const Allocation*>::size_type i = 0;
+ i < arguments.size(); ++i) {
// Verify that shape of arguments matches the shape of the arguments in the
// ProgramShape.
if (!ShapeUtil::Compatible(arguments[i]->shape(),
program_shape.parameters(i))) {
return InvalidArgument(
- "computation expects parameter %d to have shape %s, given shape %s",
+ "computation expects parameter %lu to have shape %s, given shape %s",
i, ShapeUtil::HumanString(program_shape.parameters(i)).c_str(),
ShapeUtil::HumanString(arguments[i]->shape()).c_str());
}
@@ -383,7 +385,8 @@ StatusOr<std::vector<std::unique_ptr<Executable>>> Service::BuildExecutables(
hlo_dumper, std::move(executors)));
if (!other_directory_path.empty()) {
- for (int64 i = 0; i < versioned_handles.size(); ++i) {
+ for (std::vector<VersionedComputationHandle>::size_type i = 0;
+ i < versioned_handles.size(); ++i) {
executables[i]->set_session_module(std::move(session_modules[i]));
}
}
@@ -523,7 +526,8 @@ Service::ExecuteParallelAndRegisterResult(
// Asynchronously launch all executables.
std::vector<GlobalDataHandle> result_handles;
- for (int64 i = 0; i < executables.size(); i++) {
+ for (tensorflow::gtl::ArraySlice<Executable*>::size_type i = 0;
+ i < executables.size(); i++) {
TF_ASSIGN_OR_RETURN(
perftools::gputools::DeviceMemoryBase result,
executables[i]->ExecuteAsyncOnStream(&run_options[i], arguments[i]));
diff --git a/tensorflow/contrib/android/BUILD b/tensorflow/contrib/android/BUILD
index acd82dc21e..952f24f34b 100644
--- a/tensorflow/contrib/android/BUILD
+++ b/tensorflow/contrib/android/BUILD
@@ -72,13 +72,17 @@ LINKER_SCRIPT = "//tensorflow/contrib/android:jni/version_script.lds"
cc_binary(
name = "libtensorflow_inference.so",
srcs = [],
- copts = tf_copts(),
+ copts = tf_copts() + [
+ "-ffunction-sections",
+ "-fdata-sections",
+ ],
linkopts = if_android([
"-landroid",
"-llog",
"-lm",
"-z defs",
"-s",
+ "-Wl,--gc-sections",
"-Wl,--version-script", # This line must be directly followed by LINKER_SCRIPT.
LINKER_SCRIPT,
]),
diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt
index 043a69f264..3c8dc869af 100644
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@@ -56,9 +56,10 @@ mark_as_advanced(DOWNLOAD_LOCATION)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
add_definitions(-DEIGEN_AVOID_STL_ARRAY)
if(WIN32)
- add_definitions(-DNOMINMAX -D_WIN32_WINNT=0x0A00 -DLANG_CXX11 -DCOMPILER_MSVC -D__VERSION__=\"MSVC\")
+ add_definitions(-DNOMINMAX -D_WIN32_WINNT=0x0A00 -DLANG_CXX11 -DCOMPILER_MSVC)
add_definitions(-DWIN32 -DOS_WIN -D_MBCS -DWIN64 -DWIN32_LEAN_AND_MEAN -DNOGDI -DPLATFORM_WINDOWS)
add_definitions(-DTENSORFLOW_USE_EIGEN_THREADPOOL -DEIGEN_HAS_C99_MATH -D_ITERATOR_DEBUG_LEVEL=0)
+ add_definitions(-DTF_COMPILE_LIBRARY)
add_definitions(-DNDEBUG /O2) # Equivalent of -c opt in Bazel.
add_definitions(/bigobj /nologo /EHsc /GF /FC /MP /Gm-)
# Suppress warnings to reduce build log size.
@@ -190,6 +191,7 @@ if (tensorflow_ENABLE_GPU)
${CUDA_TOOLKIT_TARGET_DIR}/include/cuda.h ${CUDA_TOOLKIT_TARGET_DIR}/include/cuComplex.h
${CUDA_TOOLKIT_TARGET_DIR}/include/cublas_v2.h ${CUDNN_HOME}/include/cudnn.h
${CUDA_TOOLKIT_TARGET_DIR}/include/cufft.h ${CUDA_TOOLKIT_TARGET_DIR}/include/curand.h
+ ${CUDA_TOOLKIT_TARGET_DIR}/include/cuda_runtime_api.h
DESTINATION ${tensorflow_source_dir}/third_party/gpus/cuda/include
)
include_directories(${tensorflow_source_dir}/third_party/gpus)
diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md
index 8e7f43b511..2641d5292d 100644
--- a/tensorflow/contrib/cmake/README.md
+++ b/tensorflow/contrib/cmake/README.md
@@ -13,7 +13,7 @@ Linux.
Current Status
--------------
-CMake can be used to build TensorFlow on Windows. See the [getting started documentation](https://www.tensorflow.org/get_started/os_setup.html#pip-installation-on-windows)
+CMake can be used to build TensorFlow on Windows. See the [getting started documentation](https://www.tensorflow.org/install/install_windows)
for instructions on how to install a pre-built TensorFlow package on Windows.
### Current known limitations
diff --git a/tensorflow/contrib/cmake/tf_cc_ops.cmake b/tensorflow/contrib/cmake/tf_cc_ops.cmake
index bca700aca2..936196dd20 100644
--- a/tensorflow/contrib/cmake/tf_cc_ops.cmake
+++ b/tensorflow/contrib/cmake/tf_cc_ops.cmake
@@ -120,3 +120,43 @@ list(REMOVE_ITEM tf_cc_srcs ${tf_cc_test_srcs})
add_library(tf_cc OBJECT ${tf_cc_srcs})
add_dependencies(tf_cc tf_cc_framework tf_cc_ops)
+
+set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/pywrap_tensorflow_internal.lib")
+add_custom_target(tf_extension_ops)
+
+function(AddUserOps)
+ cmake_parse_arguments(_AT "" "" "TARGET;SOURCES;GPUSOURCES;DEPENDS;DISTCOPY" ${ARGN})
+ if (tensorflow_ENABLE_GPU AND _AT_GPUSOURCES)
+ # if gpu build is enabled and we have gpu specific code,
+ # hint to cmake that this needs to go to nvcc
+ set (gpu_source ${_AT_GPUSOURCES})
+ set (gpu_lib "${_AT_TARGET}_gpu")
+ set_source_files_properties(${gpu_source} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ)
+ cuda_compile(gpu_lib ${gpu_source})
+ endif()
+ # create shared library from source and cuda obj
+ add_library(${_AT_TARGET} SHARED ${_AT_SOURCES} ${gpu_lib})
+ target_link_libraries(${_AT_TARGET} ${pywrap_tensorflow_lib})
+ if(WIN32)
+ if (tensorflow_ENABLE_GPU AND _AT_GPUSOURCES)
+ # some ops call out to cuda directly; need to link libs for the cuda dlls
+ target_link_libraries(${_AT_TARGET} ${CUDA_LIBRARIES})
+ endif()
+ if (_AT_DISTCOPY)
+ add_custom_command(TARGET ${_AT_TARGET} POST_BUILD
+ COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:${_AT_TARGET}> ${_AT_DISTCOPY}/)
+ endif()
+ endif()
+ if (_AT_DEPENDS)
+ add_dependencies(${_AT_TARGET} ${_AT_DEPENDS})
+ endif()
+ # make sure TF_COMPILE_LIBRARY is not defined for this target
+ get_target_property(target_compile_flags ${_AT_TARGET} COMPILE_FLAGS)
+ if(target_compile_flags STREQUAL "target_compile_flags-NOTFOUND")
+ set(target_compile_flags "/UTF_COMPILE_LIBRARY")
+ else()
+ set(target_compile_flags "${target_compile_flags} /UTF_COMPILE_LIBRARY")
+ endif()
+ set_target_properties(${_AT_TARGET} PROPERTIES COMPILE_FLAGS ${target_compile_flags})
+ add_dependencies(tf_extension_ops ${_AT_TARGET})
+endfunction(AddUserOps)
diff --git a/tensorflow/contrib/cmake/tf_core_framework.cmake b/tensorflow/contrib/cmake/tf_core_framework.cmake
index 691dee9ef0..3787ac4c81 100644
--- a/tensorflow/contrib/cmake/tf_core_framework.cmake
+++ b/tensorflow/contrib/cmake/tf_core_framework.cmake
@@ -199,7 +199,6 @@ add_custom_command(OUTPUT
COMMAND ${PYTHON_EXECUTABLE} ${tensorflow_source_dir}/tensorflow/tools/git/gen_git_source.py
--raw_generate ${VERSION_INFO_CC}
DEPENDS __force_rebuild)
-
set(tf_version_srcs ${tensorflow_source_dir}/tensorflow/core/util/version_info.cc)
########################################################
@@ -238,3 +237,9 @@ add_dependencies(tf_core_framework
tf_core_lib
proto_text
)
+
+if(WIN32)
+ # Cmake > 3.6 will quote this as -D"__VERSION__=\"MSVC\"" which nvcc fails on.
+ # Instead of defining this global, limit it to tf_core_framework where its used.
+ target_compile_definitions(tf_core_framework PRIVATE __VERSION__="MSVC")
+endif()
diff --git a/tensorflow/contrib/cmake/tf_core_kernels.cmake b/tensorflow/contrib/cmake/tf_core_kernels.cmake
index dd28817b54..33384eed48 100644
--- a/tensorflow/contrib/cmake/tf_core_kernels.cmake
+++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake
@@ -93,6 +93,12 @@ if(WIN32)
"${tensorflow_source_dir}/tensorflow/core/kernels/meta_support.*"
"${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.h"
"${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.cc"
+ # no in tensorflow.dll - comes from .so
+ "${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/blas_gemm.cc"
+ "${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/gru_ops.cc"
+ "${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/lstm_ops.cc"
+ "${tensorflow_source_dir}/tensorflow/contrib/rnn/ops/gru_ops.cc"
+ "${tensorflow_source_dir}/tensorflow/contrib/rnn/ops/lstm_ops.cc"
)
list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_windows_exclude_srcs})
endif(WIN32)
diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake
index 2c21154217..2ecc08f421 100644..100755
--- a/tensorflow/contrib/cmake/tf_python.cmake
+++ b/tensorflow/contrib/cmake/tf_python.cmake
@@ -623,12 +623,7 @@ add_custom_command(
COMMENT "Running SWIG to generate Python wrappers"
VERBATIM )
-# pywrap_tensorflow_internal is a shared library containing all of the
-# TensorFlow runtime and the standard ops and kernels. These are installed into
-# tf_python/tensorflow/python/.
-# TODO(mrry): Refactor this to expose a framework library that
-# facilitates `tf.load_op_library()`.
-add_library(pywrap_tensorflow_internal SHARED
+set (pywrap_tensorflow_internal_src
"${tensorflow_source_dir}/tensorflow/python/client/tf_session_helper.h"
"${tensorflow_source_dir}/tensorflow/python/client/tf_session_helper.cc"
"${tensorflow_source_dir}/tensorflow/python/framework/cpp_shape_inference.h"
@@ -652,6 +647,55 @@ add_library(pywrap_tensorflow_internal SHARED
"${tensorflow_source_dir}/tensorflow/c/tf_status_helper.cc"
"${tensorflow_source_dir}/tensorflow/c/tf_status_helper.h"
"${CMAKE_CURRENT_BINARY_DIR}/pywrap_tensorflow_internal.cc"
+)
+
+if(WIN32)
+ # Windows: build a static library with the same objects as tensorflow.dll.
+ # This can be used to build for a standalone exe and also helps us to
+ # find all symbols that need to be exported from the dll which is needed
+ # to provide the tensorflow c/c++ api in tensorflow.dll.
+ # From the static library we create the def file with all symbols that need to
+ # be exported from tensorflow.dll. Because there is a limit of 64K sybmols
+ # that can be exported, we filter the symbols with a python script to the namespaces
+ # we need.
+ #
+ add_library(pywrap_tensorflow_internal_static STATIC
+ ${pywrap_tensorflow_internal_src}
+ $<TARGET_OBJECTS:tf_core_lib>
+ $<TARGET_OBJECTS:tf_core_cpu>
+ $<TARGET_OBJECTS:tf_core_framework>
+ $<TARGET_OBJECTS:tf_core_ops>
+ $<TARGET_OBJECTS:tf_core_direct_session>
+ $<TARGET_OBJECTS:tf_tools_transform_graph_lib>
+ $<$<BOOL:${tensorflow_ENABLE_GRPC_SUPPORT}>:$<TARGET_OBJECTS:tf_core_distributed_runtime>>
+ $<TARGET_OBJECTS:tf_core_kernels>
+ $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>
+ $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_stream_executor>>
+ )
+ target_include_directories(pywrap_tensorflow_internal_static PUBLIC
+ ${PYTHON_INCLUDE_DIR}
+ ${NUMPY_INCLUDE_DIR}
+ )
+ target_link_libraries(pywrap_tensorflow_internal_static
+ tf_protos_cc
+ tf_python_protos_cc
+ )
+ set(pywrap_tensorflow_deffile "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/pywrap_tensorflow.def")
+ set_source_files_properties(${pywrap_tensorflow_deffile} PROPERTIES GENERATED TRUE)
+
+ add_custom_command(TARGET pywrap_tensorflow_internal_static POST_BUILD
+ COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/tools/create_def_file.py
+ --input $<TARGET_FILE:pywrap_tensorflow_internal_static>
+ --output ${pywrap_tensorflow_deffile}
+ )
+endif(WIN32)
+
+
+# pywrap_tensorflow_internal is a shared library containing all of the
+# TensorFlow runtime and the standard ops and kernels. These are installed into
+# tf_python/tensorflow/python/.
+add_library(pywrap_tensorflow_internal SHARED
+ ${pywrap_tensorflow_internal_src}
$<TARGET_OBJECTS:tf_core_lib>
$<TARGET_OBJECTS:tf_core_cpu>
$<TARGET_OBJECTS:tf_core_framework>
@@ -662,7 +706,13 @@ add_library(pywrap_tensorflow_internal SHARED
$<TARGET_OBJECTS:tf_core_kernels>
$<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_core_kernels_cpu_only>>
$<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_stream_executor>>
+ ${pywrap_tensorflow_deffile}
)
+
+if(WIN32)
+ add_dependencies(pywrap_tensorflow_internal pywrap_tensorflow_internal_static)
+endif(WIN32)
+
target_include_directories(pywrap_tensorflow_internal PUBLIC
${PYTHON_INCLUDE_DIR}
${NUMPY_INCLUDE_DIR}
@@ -675,6 +725,44 @@ target_link_libraries(pywrap_tensorflow_internal
${PYTHON_LIBRARIES}
)
+if(WIN32)
+ # include contrib/rnn as .so
+ #
+ set(tf_gru_srcs
+ "${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/blas_gemm.cc"
+ "${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/blas_gemm.h"
+ "${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/gru_ops.cc"
+ "${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/gru_ops.h"
+ "${tensorflow_source_dir}/tensorflow/contrib/rnn/ops/gru_ops.cc"
+ )
+ set(tf_gru_gpu_srcs
+ "${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/gru_ops_gpu.cu.cc"
+ )
+
+ set(tf_lstm_srcs
+ "${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/blas_gemm.cc"
+ "${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/blas_gemm.h"
+ "${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/lstm_ops.cc"
+ "${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/lstm_ops.h"
+ "${tensorflow_source_dir}/tensorflow/contrib/rnn/ops/lstm_ops.cc"
+ )
+ set(tf_lstm_gpu_srcs
+ "${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/lstm_ops_gpu.cu.cc"
+ )
+
+ AddUserOps(TARGET _gru_ops
+ SOURCES "${tf_gru_srcs}"
+ GPUSOURCES ${tf_gru_gpu_srcs}
+ DEPENDS pywrap_tensorflow_internal tf_python_ops
+ DISTCOPY ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/rnn/python/ops/)
+
+ AddUserOps(TARGET _lstm_ops
+ SOURCES "${tf_lstm_srcs}"
+ GPUSOURCES ${tf_lstm_gpu_srcs}
+ DEPENDS pywrap_tensorflow_internal tf_python_ops
+ DISTCOPY ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/rnn/python/ops/)
+endif(WIN32)
+
############################################################
# Build a PIP package containing the TensorFlow runtime.
############################################################
@@ -684,14 +772,17 @@ add_dependencies(tf_python_build_pip_package
tensorboard_copy_dependencies
tf_python_copy_scripts_to_destination
tf_python_touchup_modules
- tf_python_ops)
+ tf_python_ops
+ tf_extension_ops)
add_custom_command(TARGET tf_python_build_pip_package POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy ${tensorflow_source_dir}/tensorflow/tools/pip_package/setup.py
${CMAKE_CURRENT_BINARY_DIR}/tf_python/)
if(WIN32)
add_custom_command(TARGET tf_python_build_pip_package POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/pywrap_tensorflow_internal.dll
- ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/_pywrap_tensorflow_internal.pyd)
+ ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/_pywrap_tensorflow_internal.pyd
+ COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/pywrap_tensorflow_internal.lib
+ ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/)
else()
add_custom_command(TARGET tf_python_build_pip_package POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/libpywrap_tensorflow_internal.so
diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake
index 711b5c49f4..449a762a9a 100644
--- a/tensorflow/contrib/cmake/tf_tests.cmake
+++ b/tensorflow/contrib/cmake/tf_tests.cmake
@@ -115,7 +115,14 @@ if (tensorflow_BUILD_PYTHON_TESTS)
#
# include all test
+ if (WIN32)
+ file(GLOB_RECURSE tf_test_rnn_src_py
+ "${tensorflow_source_dir}/tensorflow/contrib/rnn/python/kernel_tests/*_test.py"
+ )
+ endif()
+
file(GLOB_RECURSE tf_test_src_py
+ ${tf_test_rnn_src_py}
"${tensorflow_source_dir}/tensorflow/python/debug/cli/*_test.py"
"${tensorflow_source_dir}/tensorflow/python/debug/lib/*_test.py"
"${tensorflow_source_dir}/tensorflow/python/debug/wrappers/*_test.py"
diff --git a/tensorflow/contrib/cmake/tf_tools.cmake b/tensorflow/contrib/cmake/tf_tools.cmake
index 2aaa9ed53e..5151fdb444 100644
--- a/tensorflow/contrib/cmake/tf_tools.cmake
+++ b/tensorflow/contrib/cmake/tf_tools.cmake
@@ -106,3 +106,22 @@ target_link_libraries(${compare_graphs} PUBLIC
${tf_core_gpu_kernels_lib}
${tensorflow_EXTERNAL_LIBRARIES}
)
+
+set(benchmark_model "benchmark_model")
+
+add_executable(${benchmark_model}
+ "${tensorflow_source_dir}/tensorflow/tools/benchmark/benchmark_model.cc"
+ "${tensorflow_source_dir}/tensorflow/tools/benchmark/benchmark_model_main.cc"
+ $<TARGET_OBJECTS:tf_core_lib>
+ $<TARGET_OBJECTS:tf_core_cpu>
+ $<TARGET_OBJECTS:tf_core_framework>
+ $<TARGET_OBJECTS:tf_core_ops>
+ $<TARGET_OBJECTS:tf_core_direct_session>
+ $<TARGET_OBJECTS:tf_core_kernels>
+)
+
+target_link_libraries(${benchmark_model} PUBLIC
+ tf_protos_cc
+ ${tf_core_gpu_kernels_lib}
+ ${tensorflow_EXTERNAL_LIBRARIES}
+)
diff --git a/tensorflow/contrib/cmake/tools/create_def_file.py b/tensorflow/contrib/cmake/tools/create_def_file.py
new file mode 100644
index 0000000000..950c8f79bc
--- /dev/null
+++ b/tensorflow/contrib/cmake/tools/create_def_file.py
@@ -0,0 +1,134 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""
+create_def_file.py - tool to create a windows def file to export
+symbols from tensorflow.dll to enable tf.load_library().
+Because the linker allows only 64K symbols to be exported per dll
+we filter the symbols down to the essentials. The regular expressions
+we use for this are specific to tensorflow.
+
+TODO: this works fine but there is an issue with exporting
+'const char * const' and importing it from a user_ops. The problem is
+on the importing end and using __declspec(dllimport) works around it.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import io
+import os
+import re
+import sys
+import tempfile
+from subprocess import Popen, PIPE
+
+# External tools we use that come with visual studio sdk and
+# we assume that the caller has the correct PATH to the sdk
+UNDNAME = "undname.exe"
+DUMPBIN = "dumpbin.exe"
+
+# Exclude if matched
+EXCLUDE_RE = re.compile(r"deleting destructor|::internal::")
+
+# Include if matched before exclude
+INCLUDEPRE_RE = re.compile(r"tensorflow::internal::LogMessage|" +
+ r"tensorflow::internal::CheckOpMessageBuilder")
+
+# Include if matched after exclude
+INCLUDE_RE = re.compile(r"^(TF_\w*)$|" +
+ r"tensorflow::|" +
+ r"functor::|" +
+ r"perftools::gputools")
+
+
+def get_args():
+ """Parse command line."""
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--input", help="input library", required=True)
+ parser.add_argument("--output", help="output deffile", required=True)
+ args = parser.parse_args()
+ return args
+
+
+def main():
+ """main."""
+ args = get_args()
+
+ # Pipe dumpbin to extract all linkable symbols from a lib.
+ # Good symbols are collected in candidates and also written to
+ # a temp file.
+ candidates = []
+ tmpfile = tempfile.NamedTemporaryFile(mode="w", delete=False)
+ proc = Popen([DUMPBIN, "/nologo", "/linkermember:1", args.input], stdout=PIPE)
+ for line in io.TextIOWrapper(proc.stdout, encoding="utf-8"):
+ cols = line.split()
+ if len(cols) < 2:
+ continue
+ sym = cols[1]
+ tmpfile.file.write(sym + "\n")
+ candidates.append(sym)
+ tmpfile.file.close()
+ exit_code = proc.wait()
+ if exit_code != 0:
+ print("{} failed, exit={}".format(DUMPBIN, exit_code))
+ return exit_code
+
+ # Run the symbols through undname to get their undecorated name
+ # so we can filter on something readable.
+ with open(args.output, "w") as def_fp:
+ # track dupes
+ taken = set()
+
+ # Header for the def file. Since the tensorflow.dll is actually called
+ # _pywrap_tensorflow.pyd in the python wheel, hint that in the def file.
+ def_fp.write("LIBRARY _pywrap_tensorflow_internal.pyd\n")
+ def_fp.write("EXPORTS\n")
+ def_fp.write("\t ??1OpDef@tensorflow@@UEAA@XZ\n")
+
+ # Each symbols returned by undname matches the same position in candidates.
+ # We compare on undname but use the decorated name from candidates.
+ dupes = 0
+ proc = Popen([UNDNAME, tmpfile.name], stdout=PIPE)
+ for idx, line in enumerate(io.TextIOWrapper(proc.stdout, encoding="utf-8")):
+ decorated = candidates[idx]
+ if decorated in taken:
+ # Symbol is already in output, done.
+ dupes += 1
+ continue
+
+ if not INCLUDEPRE_RE.search(line):
+ if EXCLUDE_RE.search(line):
+ continue
+ if not INCLUDE_RE.search(line):
+ continue
+
+ def_fp.write("\t" + decorated + "\n")
+ taken.add(decorated)
+ exit_code = proc.wait()
+ if exit_code != 0:
+ print("{} failed, exit={}".format(UNDNAME, exit_code))
+ return exit_code
+
+ os.unlink(tmpfile.name)
+
+ print("symbols={}, taken={}, dupes={}"
+ .format(len(candidates), len(taken), dupes))
+ return 0
+
+
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/tensorflow/contrib/framework/python/framework/checkpoint_utils.py b/tensorflow/contrib/framework/python/framework/checkpoint_utils.py
index 4cd3efafa0..5d078236ac 100644
--- a/tensorflow/contrib/framework/python/framework/checkpoint_utils.py
+++ b/tensorflow/contrib/framework/python/framework/checkpoint_utils.py
@@ -280,10 +280,11 @@ def init_from_checkpoint(checkpoint_dir, assignment_map):
for var_name in scope_variables:
# Lookup name with specified prefix and suffix from current variable.
# If tensor_name given is '/' (root), don't use it for full name.
+ full_tensor_name = var_name[len(scopes):]
+ if current_var_or_name != "/":
+ full_tensor_name = full_tensor_name[1:]
if tensor_name_in_ckpt != "/":
- full_tensor_name = tensor_name_in_ckpt + var_name[len(scopes) + 1:]
- else:
- full_tensor_name = var_name[len(scopes) + 1:]
+ full_tensor_name = tensor_name_in_ckpt + full_tensor_name
if full_tensor_name not in variable_map:
raise ValueError(
"Tensor %s (%s in %s) is not found in %s checkpoint" % (
diff --git a/tensorflow/contrib/framework/python/framework/checkpoint_utils_test.py b/tensorflow/contrib/framework/python/framework/checkpoint_utils_test.py
index 09eecb56dc..51ca5ec125 100644
--- a/tensorflow/contrib/framework/python/framework/checkpoint_utils_test.py
+++ b/tensorflow/contrib/framework/python/framework/checkpoint_utils_test.py
@@ -168,6 +168,29 @@ class CheckpointsTest(test.TestCase):
self.assertAllEqual(my3.eval(session), v3)
self.assertAllEqual(my4.eval(session), v4)
+ def testInitToRootCheckpoint(self):
+ checkpoint_dir = self.get_temp_dir()
+ with self.test_session() as session:
+ v1, v2, v3, v4 = _create_checkpoints(session, checkpoint_dir)
+
+ # New graph and session.
+ with ops.Graph().as_default() as g:
+ with self.test_session(graph=g) as session:
+ my1 = variable_scope.get_variable("var1", [1, 10])
+ my2 = variable_scope.get_variable("var2", [10, 10])
+ my3 = variable_scope.get_variable("var3", [100, 100])
+ with variable_scope.variable_scope("useful_scope"):
+ my4 = variable_scope.get_variable("var4", [9, 9])
+
+ checkpoint_utils.init_from_checkpoint(checkpoint_dir,
+ {"/": "/",})
+
+ session.run(variables.global_variables_initializer())
+ self.assertAllEqual(my1.eval(session), v1)
+ self.assertAllEqual(my2.eval(session), v2)
+ self.assertAllEqual(my3.eval(session), v3)
+ self.assertAllEqual(my4.eval(session), v4)
+
def testInitFromPartitionVar(self):
checkpoint_dir = self.get_temp_dir()
with self.test_session() as session:
diff --git a/tensorflow/contrib/framework/python/ops/arg_scope.py b/tensorflow/contrib/framework/python/ops/arg_scope.py
index b7ec9ba936..ad84cd681a 100644
--- a/tensorflow/contrib/framework/python/ops/arg_scope.py
+++ b/tensorflow/contrib/framework/python/ops/arg_scope.py
@@ -30,11 +30,15 @@
net = layers.conv2d(inputs, 64, [11, 11], 4, padding='VALID', scope='conv1')
net = layers.conv2d(net, 256, [5, 5], scope='conv2')
```
- The first call to conv2d will use predefined args:
- layers.conv2d(inputs, 64, [11, 11], 4, padding='VALID', ..., scope='conv1')
+ The first call to conv2d will behave as follows:
+ layers.conv2d(inputs, 64, [11, 11], 4, padding='VALID',
+ initializer=layers.variance_scaling_initializer(),
+ regularizer=layers.l2_regularizer(0.05), scope='conv1')
- The second call to conv2d will overwrite padding:
- layers.conv2d(inputs, 256, [5, 5], padding='SAME', ..., scope='conv2')
+ The second call to conv2d will also use the arg_scope's default for padding:
+ layers.conv2d(inputs, 256, [5, 5], padding='SAME',
+ initializer=layers.variance_scaling_initializer(),
+ regularizer=layers.l2_regularizer(0.05), scope='conv2')
Example of how to reuse an arg_scope:
@@ -49,7 +53,7 @@
net = layers.conv2d(net, 256, [5, 5], scope='conv2')
```
- Example of how to use tf.contrib.framework.add_arg_scope:
+ Example of how to use tf.contrib.framework.add_arg_scope to enable your function to be called within an arg_scope later:
@tf.contrib.framework.add_arg_scope
def conv2d(*args, **kwargs)
diff --git a/tensorflow/contrib/layers/__init__.py b/tensorflow/contrib/layers/__init__.py
index e746107e36..6ba8f7e8ae 100644
--- a/tensorflow/contrib/layers/__init__.py
+++ b/tensorflow/contrib/layers/__init__.py
@@ -40,6 +40,7 @@ See the @{$python/contrib.layers} guide.
@@softmax
@@stack
@@unit_norm
+@@bow_encoder
@@embed_sequence
@@apply_regularization
diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py
index adbbcea02f..07be8e9990 100644
--- a/tensorflow/contrib/layers/python/layers/layers.py
+++ b/tensorflow/contrib/layers/python/layers/layers.py
@@ -160,9 +160,8 @@ def _fused_batch_norm(
they need to be added as a dependency to the `train_op`, example:
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
- if update_ops:
- updates = tf.group(*update_ops)
- total_loss = control_flow_ops.with_dependencies([updates], total_loss)
+ with tf.control_dependencies(update_ops):
+ train_op = optimizer.minimize(loss)
One can set updates_collections=None to force the updates in place, but that
can have speed penalty, especially in distributed settings.
@@ -393,9 +392,8 @@ def batch_norm(inputs,
they need to be added as a dependency to the `train_op`, example:
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
- if update_ops:
- updates = tf.group(*update_ops)
- total_loss = control_flow_ops.with_dependencies([updates], total_loss)
+ with tf.control_dependencies(update_ops):
+ train_op = optimizer.minimize(loss)
One can set updates_collections=None to force the updates in place, but that
can have speed penalty, especially in distributed settings.
diff --git a/tensorflow/contrib/learn/__init__.py b/tensorflow/contrib/learn/__init__.py
index 85cef3d8db..bd56066b1b 100644
--- a/tensorflow/contrib/learn/__init__.py
+++ b/tensorflow/contrib/learn/__init__.py
@@ -33,6 +33,7 @@ See the @{$python/contrib.learn} guide.
@@DNNLinearCombinedRegressor
@@DNNLinearCombinedEstimator
@@DNNLinearCombinedClassifier
+@@DynamicRnnEstimator
@@LinearClassifier
@@LinearEstimator
@@LinearRegressor
diff --git a/tensorflow/contrib/learn/python/learn/README.md b/tensorflow/contrib/learn/python/learn/README.md
index f412c83a97..0aae178e9a 100644
--- a/tensorflow/contrib/learn/python/learn/README.md
+++ b/tensorflow/contrib/learn/python/learn/README.md
@@ -20,18 +20,17 @@ Optionally you can install [scikit-learn](http://scikit-learn.org/stable/) and [
### Tutorials
-- [TF Learn Quickstart](../../../../g3doc/tutorials/tflearn/index.md). Build,
+- [TF Learn Quickstart](https://www.tensorflow.org/get_started/tflearn). Build,
train, and evaluate a neural network with just a few lines of code.
-- [Input Functions](../../../../g3doc/tutorials/input_fn/index.md). Learn how
+- [Input Functions](https://www.tensorflow.org/get_started/input_fn). Learn how
to create input functions to feed data into your models.
-- [Linear Model](../../../../g3doc/tutorials/wide/index.md). Learn the basics
+- [Linear Model](https://www.tensorflow.org/tutorials/wide). Learn the basics
of building linear models.
-- [Wide and Deep
- Learning](../../../../g3doc/tutorials/wide_and_deep/index.md). Jointly train
- a linear model and a deep neural network.
-- [Logging and Monitoring](../../../../g3doc/tutorials/monitors/index.md). Use
- the Monitor API to audit training of a neural network.
-- [Custom Estimators](../../../../g3doc/tutorials/estimators/index.md). Learn
+- [Wide and Deep Learning](https://www.tensorflow.org/tutorials/wide_and_deep).
+ Jointly train a linear model and a deep neural network.
+- [Logging and Monitoring](https://www.tensorflow.org/get_started/monitors).
+ Use the Monitor API to audit training of a neural network.
+- [Custom Estimators](https://www.tensorflow.org/extend/estimators). Learn
how to create a custom estimator.
- More coming soon.
diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator.py b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
index 6d591c42c6..7a95296945 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
@@ -1108,7 +1108,7 @@ class Estimator(BaseEstimator):
if isinstance(model_fn_results, model_fn_lib.ModelFnOps):
return model_fn_results
- # Here model_fn_ops should be a tuple with 3 elements.
+ # Here model_fn_results should be a tuple with 3 elements.
if len(model_fn_results) != 3:
raise ValueError('Unrecognized value returned by model_fn, '
'please return ModelFnOps.')
diff --git a/tensorflow/contrib/learn/python/learn/estimators/linear.py b/tensorflow/contrib/learn/python/learn/estimators/linear.py
index faf78a3675..d7f1017a46 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/linear.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/linear.py
@@ -149,21 +149,16 @@ def _linear_model_fn(features, labels, mode, params, config=None):
values=tuple(six.itervalues(features)),
partitioner=partitioner) as scope:
if joint_weights:
- logits, _, _ = (
- layers.joint_weighted_sum_from_feature_columns(
- columns_to_tensors=features,
- feature_columns=feature_columns,
- num_outputs=head.logits_dimension,
- weight_collections=[parent_scope],
- scope=scope))
+ layer_fn = layers.joint_weighted_sum_from_feature_columns
else:
- logits, _, _ = (
- layers.weighted_sum_from_feature_columns(
- columns_to_tensors=features,
- feature_columns=feature_columns,
- num_outputs=head.logits_dimension,
- weight_collections=[parent_scope],
- scope=scope))
+ layer_fn = layers.weighted_sum_from_feature_columns
+
+ logits, _, _ = layer_fn(
+ columns_to_tensors=features,
+ feature_columns=feature_columns,
+ num_outputs=head.logits_dimension,
+ weight_collections=[parent_scope],
+ scope=scope)
def _train_op_fn(loss):
global_step = contrib_variables.get_global_step()
diff --git a/tensorflow/contrib/learn/python/learn/tests/dataframe/tensorflow_dataframe_test.py b/tensorflow/contrib/learn/python/learn/tests/dataframe/tensorflow_dataframe_test.py
index c164a12b1d..09f19ad274 100644
--- a/tensorflow/contrib/learn/python/learn/tests/dataframe/tensorflow_dataframe_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/dataframe/tensorflow_dataframe_test.py
@@ -63,57 +63,54 @@ def _assert_df_equals_dict(expected_df, actual_dict):
actual_dict[col]))
-def _make_test_csv():
- f = tempfile.NamedTemporaryFile(
- dir=test.get_temp_dir(), delete=False, mode="w")
- w = csv.writer(f)
- w.writerow(["int", "float", "bool", "string"])
- for _ in range(100):
- intvalue = np.random.randint(-10, 10)
- floatvalue = np.random.rand()
- boolvalue = int(np.random.rand() > 0.3)
- stringvalue = "S: %.4f" % np.random.rand()
-
- row = [intvalue, floatvalue, boolvalue, stringvalue]
- w.writerow(row)
- f.close()
- return f.name
-
-
-def _make_test_csv_sparse():
- f = tempfile.NamedTemporaryFile(
- dir=test.get_temp_dir(), delete=False, mode="w")
- w = csv.writer(f)
- w.writerow(["int", "float", "bool", "string"])
- for _ in range(100):
- # leave columns empty; these will be read as default value (e.g. 0 or NaN)
- intvalue = np.random.randint(-10, 10) if np.random.rand() > 0.5 else ""
- floatvalue = np.random.rand() if np.random.rand() > 0.5 else ""
- boolvalue = int(np.random.rand() > 0.3) if np.random.rand() > 0.5 else ""
- stringvalue = (("S: %.4f" % np.random.rand()) if np.random.rand() > 0.5 else
- "")
-
- row = [intvalue, floatvalue, boolvalue, stringvalue]
- w.writerow(row)
- f.close()
- return f.name
-
-
-def _make_test_tfrecord():
- f = tempfile.NamedTemporaryFile(dir=test.get_temp_dir(), delete=False)
- w = tf_record.TFRecordWriter(f.name)
- for i in range(100):
- ex = example_pb2.Example()
- ex.features.feature["var_len_int"].int64_list.value.extend(range((i % 3)))
- ex.features.feature["fixed_len_float"].float_list.value.extend(
- [float(i), 2 * float(i)])
- w.write(ex.SerializeToString())
- return f.name
-
-
class TensorFlowDataFrameTestCase(test.TestCase):
"""Tests for `TensorFlowDataFrame`."""
+ def _make_test_csv(self):
+ f = tempfile.NamedTemporaryFile(
+ dir=self.get_temp_dir(), delete=False, mode="w")
+ w = csv.writer(f)
+ w.writerow(["int", "float", "bool", "string"])
+ for _ in range(100):
+ intvalue = np.random.randint(-10, 10)
+ floatvalue = np.random.rand()
+ boolvalue = int(np.random.rand() > 0.3)
+ stringvalue = "S: %.4f" % np.random.rand()
+
+ row = [intvalue, floatvalue, boolvalue, stringvalue]
+ w.writerow(row)
+ f.close()
+ return f.name
+
+ def _make_test_csv_sparse(self):
+ f = tempfile.NamedTemporaryFile(
+ dir=self.get_temp_dir(), delete=False, mode="w")
+ w = csv.writer(f)
+ w.writerow(["int", "float", "bool", "string"])
+ for _ in range(100):
+ # leave columns empty; these will be read as default value (e.g. 0 or NaN)
+ intvalue = np.random.randint(-10, 10) if np.random.rand() > 0.5 else ""
+ floatvalue = np.random.rand() if np.random.rand() > 0.5 else ""
+ boolvalue = int(np.random.rand() > 0.3) if np.random.rand() > 0.5 else ""
+ stringvalue = (("S: %.4f" % np.random.rand()) if np.random.rand() > 0.5 else
+ "")
+
+ row = [intvalue, floatvalue, boolvalue, stringvalue]
+ w.writerow(row)
+ f.close()
+ return f.name
+
+ def _make_test_tfrecord(self):
+ f = tempfile.NamedTemporaryFile(dir=self.get_temp_dir(), delete=False)
+ w = tf_record.TFRecordWriter(f.name)
+ for i in range(100):
+ ex = example_pb2.Example()
+ ex.features.feature["var_len_int"].int64_list.value.extend(range((i % 3)))
+ ex.features.feature["fixed_len_float"].float_list.value.extend(
+ [float(i), 2 * float(i)])
+ w.write(ex.SerializeToString())
+ return f.name
+
def _assert_pandas_equals_tensorflow(self, pandas_df, tensorflow_df,
num_batches, batch_size):
self.assertItemsEqual(
@@ -190,7 +187,7 @@ class TensorFlowDataFrameTestCase(test.TestCase):
batch_size = 8
enqueue_size = 7
- data_path = _make_test_csv()
+ data_path = self._make_test_csv()
default_values = [0, 0.0, 0, ""]
pandas_df = pd.read_csv(data_path)
@@ -211,7 +208,7 @@ class TensorFlowDataFrameTestCase(test.TestCase):
num_epochs = 17
expected_num_batches = (num_epochs * 100) // batch_size
- data_path = _make_test_csv()
+ data_path = self._make_test_csv()
default_values = [0, 0.0, 0, ""]
tensorflow_df = df.TensorFlowDataFrame.from_csv(
@@ -234,7 +231,7 @@ class TensorFlowDataFrameTestCase(test.TestCase):
num_batches = 100
batch_size = 8
- data_path = _make_test_csv_sparse()
+ data_path = self._make_test_csv_sparse()
feature_spec = {
"int": parsing_ops.FixedLenFeature(None, dtypes.int16, np.nan),
"float": parsing_ops.VarLenFeature(dtypes.float16),
@@ -270,7 +267,7 @@ class TensorFlowDataFrameTestCase(test.TestCase):
enqueue_size = 11
batch_size = 13
- data_path = _make_test_tfrecord()
+ data_path = self._make_test_tfrecord()
features = {
"fixed_len_float":
parsing_ops.FixedLenFeature(
@@ -318,7 +315,7 @@ class TensorFlowDataFrameTestCase(test.TestCase):
num_epochs = 17
expected_num_batches = (num_epochs * 100) // batch_size
- data_path = _make_test_csv()
+ data_path = self._make_test_csv()
default_values = [0, 0.0, 0, ""]
tensorflow_df = df.TensorFlowDataFrame.from_csv(
diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py
index 1e5795d035..c1ba9d4ead 100644
--- a/tensorflow/contrib/metrics/python/ops/metric_ops.py
+++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py
@@ -261,7 +261,7 @@ def streaming_false_negatives(predictions, labels, weights=None,
metrics_collections=None,
updates_collections=None,
name=None):
- """Computes the total number of false positives.
+ """Computes the total number of false negatives.
If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
diff --git a/tensorflow/contrib/rnn/python/ops/core_rnn_cell_impl.py b/tensorflow/contrib/rnn/python/ops/core_rnn_cell_impl.py
index 755ebd048b..f44302638e 100644
--- a/tensorflow/contrib/rnn/python/ops/core_rnn_cell_impl.py
+++ b/tensorflow/contrib/rnn/python/ops/core_rnn_cell_impl.py
@@ -13,7 +13,14 @@
# limitations under the License.
# ==============================================================================
-"""Module implementing RNN Cells."""
+"""Module implementing RNN Cells.
+
+This module provides a number of basic commonly used RNN cells, such as LSTM
+(Long Short Term Memory) or GRU (Gated Recurrent Unit), and a number of
+operators that allow adding dropouts, projections, or embeddings for inputs.
+Constructing multi-layer cells is supported by the class `MultiRNNCell`, or by
+calling the `rnn` ops several times.
+"""
from __future__ import absolute_import
from __future__ import division
@@ -146,12 +153,12 @@ class GRUCell(RNNCell):
with _checked_scope(self, scope or "gru_cell", reuse=self._reuse):
with vs.variable_scope("gates"): # Reset gate and update gate.
# We start with bias of 1.0 to not reset and not update.
+ value = sigmoid(_linear(
+ [inputs, state], 2 * self._num_units, True, 1.0))
r, u = array_ops.split(
- value=_linear(
- [inputs, state], 2 * self._num_units, True, 1.0),
+ value=value,
num_or_size_splits=2,
axis=1)
- r, u = sigmoid(r), sigmoid(u)
with vs.variable_scope("candidate"):
c = self._activation(_linear([inputs, r * state],
self._num_units, True))
diff --git a/tensorflow/contrib/rnn/python/ops/lstm_ops.py b/tensorflow/contrib/rnn/python/ops/lstm_ops.py
index c1ec46d763..318b552f4a 100644
--- a/tensorflow/contrib/rnn/python/ops/lstm_ops.py
+++ b/tensorflow/contrib/rnn/python/ops/lstm_ops.py
@@ -70,7 +70,7 @@ def _lstm_block_cell(x,
cs = ci .* i + cs_prev .* f
cs = clip(cs, cell_clip)
- o = sigmoid(cs * wco + f)
+ o = sigmoid(cs * wco + o)
co = tanh(cs)
h = co .* o
```
diff --git a/tensorflow/contrib/seq2seq/python/ops/helper.py b/tensorflow/contrib/seq2seq/python/ops/helper.py
index 34367db01b..616de3199c 100644
--- a/tensorflow/contrib/seq2seq/python/ops/helper.py
+++ b/tensorflow/contrib/seq2seq/python/ops/helper.py
@@ -486,7 +486,7 @@ class GreedyEmbeddingHelper(Helper):
# Outputs are logits, use argmax to get the most probable id
if not isinstance(outputs, ops.Tensor):
raise TypeError("Expected outputs to be a single Tensor, got: %s" %
- outputs)
+ type(outputs))
sample_ids = math_ops.cast(
math_ops.argmax(outputs, axis=-1), dtypes.int32)
return sample_ids
diff --git a/tensorflow/contrib/seq2seq/python/ops/loss.py b/tensorflow/contrib/seq2seq/python/ops/loss.py
index e14f07bc09..61852eda4f 100644
--- a/tensorflow/contrib/seq2seq/python/ops/loss.py
+++ b/tensorflow/contrib/seq2seq/python/ops/loss.py
@@ -44,8 +44,7 @@ def sequence_loss(logits, targets, weights,
sequence. When using weights as masking set all valid timesteps to 1 and
all padded timesteps to 0.
average_across_timesteps: If set, sum the cost across the sequence
- dimension and divide by the cost by the total label weight across
- timesteps.
+ dimension and divide the cost by the total label weight across timesteps.
average_across_batch: If set, sum the cost across the batch dimension and
divide the returned cost by the batch size.
softmax_loss_function: Function (labels-batch, inputs-batch) -> loss-batch
diff --git a/tensorflow/contrib/slim/README.md b/tensorflow/contrib/slim/README.md
index 898d3a11d0..94b0263ae8 100644
--- a/tensorflow/contrib/slim/README.md
+++ b/tensorflow/contrib/slim/README.md
@@ -229,7 +229,7 @@ net = ...
net = slim.conv2d(net, 256, [3, 3], scope='conv3_1')
net = slim.conv2d(net, 256, [3, 3], scope='conv3_2')
net = slim.conv2d(net, 256, [3, 3], scope='conv3_3')
-net = slim.max_pool2d(net, [2, 2], scope='pool3')
+net = slim.max_pool2d(net, [2, 2], scope='pool2')
```
One way to reduce this code duplication would be via a `for` loop:
@@ -238,14 +238,14 @@ One way to reduce this code duplication would be via a `for` loop:
net = ...
for i in range(3):
net = slim.conv2d(net, 256, [3, 3], scope='conv3_' % (i+1))
-net = slim.max_pool2d(net, [2, 2], scope='pool3')
+net = slim.max_pool2d(net, [2, 2], scope='pool2')
```
This can be made even cleaner by using TF-Slim's `repeat` operation:
```python
net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
-net = slim.max_pool(net, [2, 2], scope='pool2')
+net = slim.max_pool2d(net, [2, 2], scope='pool2')
```
Notice that the `slim.repeat` not only applies the same argument in-line, it
diff --git a/tensorflow/contrib/util/loader.py b/tensorflow/contrib/util/loader.py
index 95657217a0..c2ae425b56 100644
--- a/tensorflow/contrib/util/loader.py
+++ b/tensorflow/contrib/util/loader.py
@@ -21,6 +21,7 @@ from __future__ import division
from __future__ import print_function
import os
+import re
from tensorflow.python.framework import load_library
from tensorflow.python.platform import resource_loader
@@ -29,9 +30,9 @@ from tensorflow.python.platform import resource_loader
def load_op_library(path):
"""Loads a contrib op library from the given path.
- NOTE(mrry): On Windows, we currently assume that contrib op
+ NOTE(mrry): On Windows, we currently assume that some contrib op
libraries are statically linked into the main TensorFlow Python
- extension DLL.
+ extension DLL - use dynamically linked ops if the .so is present.
Args:
path: An absolute path to a shared object file.
@@ -40,11 +41,17 @@ def load_op_library(path):
A Python module containing the Python wrappers for Ops defined in the
plugin.
"""
- if os.name != 'nt':
- path = resource_loader.get_path_to_datafile(path)
- ret = load_library.load_op_library(path)
- assert ret, 'Could not load %s' % path
- return ret
- else:
- # NOTE(mrry):
- return None
+ if os.name == 'nt':
+ # To avoid makeing every user_ops aware of windows, re-write
+ # the file extension from .so to .dll.
+ path = re.sub('\.so$', '.dll', path)
+
+ # TODO: currently we have only some user_ops as .dll's on windows - don't try
+ # to load them if the dll is not found. Once we have all of them
+ # this check should be removed.
+ if not os.path.exists(path):
+ return None
+ path = resource_loader.get_path_to_datafile(path)
+ ret = load_library.load_op_library(path)
+ assert ret, 'Could not load %s' % path
+ return ret
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index ea434c3eb2..79d44c5a0c 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -339,6 +339,7 @@ tf_cuda_library(
hdrs = [
"example/feature_util.h",
"framework/allocator.h",
+ "framework/allocator_registry.h",
"framework/attr_value_util.h",
"framework/bfloat16.h",
"framework/cancellation.h",
@@ -408,7 +409,9 @@ tf_cuda_library(
"util/memmapped_file_system.h",
"util/memmapped_file_system_writer.h",
],
- }),
+ }) + if_mkl([
+ "util/mkl_util.h",
+ ]),
visibility = ["//visibility:public"],
deps = [":framework_internal"],
)
@@ -707,7 +710,9 @@ cc_library(
"//tensorflow/core/kernels:math_not_windows",
"//tensorflow/core/kernels:quantized_ops",
]) + if_mkl([
- "//tensorflow/core/kernels:mkl_ops",
+ "//tensorflow/core/kernels:mkl_conv_op",
+ "//tensorflow/core/kernels:mkl_matmul_op",
+ "//tensorflow/core/kernels:mkl_tfconv_op",
]),
)
@@ -772,7 +777,7 @@ cc_library(
"//tensorflow/core/kernels:constant_op",
"//tensorflow/core/kernels:ops_testutil",
"//tensorflow/core/kernels:ops_util",
- "//tensorflow/core/platform/default/build_config:gtest", # + if_sycl([":sycl_runtime"]),
+ "//tensorflow/core/platform/default/build_config:gtest", # + if_sycl([":sycl_runtime"])
],
)
@@ -1393,7 +1398,7 @@ tf_cuda_library(
":version_lib",
"//tensorflow/core/kernels:bounds_check",
"//third_party/eigen3",
- ],
+ ] + if_mkl(["//third_party/mkl:intel_binary_blob"]),
alwayslink = 1,
)
@@ -1482,20 +1487,21 @@ tf_cuda_library(
),
copts = tf_copts(),
deps = [
- ":framework",
- ":framework_internal",
- ":function_ops_op_lib",
- ":functional_grad",
- ":functional_ops_op_lib",
- ":lib",
- ":lib_internal",
- ":proto_text",
- ":protos_all_cc",
- "//tensorflow/core/grappler:grappler_item",
- "//tensorflow/core/grappler/optimizers:meta_optimizer",
- "//third_party/eigen3",
- "//tensorflow/core/kernels:required",
- ] + tf_additional_core_deps(),
+ ":framework",
+ ":framework_internal",
+ ":function_ops_op_lib",
+ ":functional_grad",
+ ":functional_ops_op_lib",
+ ":lib",
+ ":lib_internal",
+ ":proto_text",
+ ":protos_all_cc",
+ "//tensorflow/core/grappler:grappler_item",
+ "//tensorflow/core/grappler/optimizers:meta_optimizer",
+ "//third_party/eigen3",
+ "//tensorflow/core/kernels:required",
+ ] + if_mkl(["//third_party/mkl:intel_binary_blob"]) +
+ tf_additional_core_deps(),
alwayslink = 1,
)
@@ -2037,33 +2043,38 @@ tf_cc_tests(
],
)
-if_mkl(
- tf_cc_test_mkl(
- name = "mkl_related_tests",
- size = "small",
- srcs = ["graph/mkl_optimizer_merge_test.cc"],
- linkstatic = tf_kernel_tests_linkstatic(),
- deps = [
- ":core",
- ":core_cpu",
- ":core_cpu_internal",
- ":direct_session_internal",
- ":framework",
- ":framework_internal",
- ":lib",
- ":lib_internal",
- ":ops",
- ":protos_all_cc", # under if_mkl
- ":test",
- ":test_main",
- ":testlib",
- "//tensorflow/cc:cc_ops",
- "//tensorflow/cc:scope",
- "//tensorflow/cc:sendrecv_ops",
- "//tensorflow/core/kernels:ops_util",
- "//third_party/eigen3",
- ],
- ),
+tf_cc_test_mkl(
+ name = "mkl_related_tests",
+ size = "small",
+ srcs = [
+ "graph/mkl_layout_pass_test.cc",
+ "graph/mkl_optimizer_merge_test.cc",
+ "graph/mkl_tfconversion_pass_test.cc",
+ ],
+ linkstatic = tf_kernel_tests_linkstatic(),
+ deps = [
+ ":core",
+ ":core_cpu",
+ ":core_cpu_internal",
+ ":direct_session_internal",
+ ":framework",
+ ":framework_internal",
+ ":lib",
+ ":lib_internal",
+ ":ops",
+ ":protos_all_cc",
+ ":test",
+ ":test_main",
+ ":testlib",
+ "//tensorflow/cc:cc_ops",
+ "//tensorflow/cc:scope",
+ "//tensorflow/cc:sendrecv_ops",
+ "//tensorflow/core/kernels:mkl_conv_op",
+ "//tensorflow/core/kernels:mkl_matmul_op",
+ "//tensorflow/core/kernels:mkl_tfconv_op",
+ "//tensorflow/core/kernels:ops_util",
+ "//third_party/eigen3",
+ ],
)
tf_cc_tests_gpu(
diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
new file mode 100644
index 0000000000..41bf23be27
--- /dev/null
+++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
@@ -0,0 +1,120 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// A simple CPU allocator that intercepts malloc/free calls from MKL library
+// and redirects them to Tensorflow allocator
+
+#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_MKL_CPU_ALLOCATOR_H_
+#define TENSORFLOW_CORE_COMMON_RUNTIME_MKL_CPU_ALLOCATOR_H_
+
+#ifdef INTEL_MKL
+
+#include <string>
+#include "tensorflow/core/common_runtime/bfc_allocator.h"
+#include "tensorflow/core/framework/allocator.h"
+#include "tensorflow/core/platform/mem.h"
+
+#include "third_party/mkl/include/i_malloc.h"
+
+namespace tensorflow {
+
+class MklSubAllocator : public SubAllocator {
+ public:
+ ~MklSubAllocator() override {}
+
+ void* Alloc(size_t alignment, size_t num_bytes) override {
+ return port::AlignedMalloc(num_bytes, alignment);
+ }
+ void Free(void* ptr, size_t num_bytes) override { port::AlignedFree(ptr); }
+};
+
+/// CPU allocator for MKL that wraps BFC allocator and intercepts
+/// and redirects memory allocation calls from MKL.
+class MklCPUAllocator : public Allocator {
+ public:
+ // Constructor and other standard functions
+
+ MklCPUAllocator() {
+ VLOG(2) << "MklCPUAllocator: In MklCPUAllocator";
+ allocator_ =
+ new BFCAllocator(new MklSubAllocator, kMaxMemSize, kAllowGrowth, kName);
+
+ // For redirecting all allocations from MKL to this allocator
+ // From: http://software.intel.com/en-us/node/528565
+ i_malloc = MallocHook;
+ i_calloc = CallocHook;
+ i_realloc = ReallocHook;
+ i_free = FreeHook;
+ }
+
+ ~MklCPUAllocator() override { delete allocator_; }
+
+ inline string Name() override { return kName; }
+
+ inline void* AllocateRaw(size_t alignment, size_t num_bytes) override {
+ return allocator_->AllocateRaw(alignment, num_bytes);
+ }
+
+ inline void DeallocateRaw(void* ptr) override {
+ allocator_->DeallocateRaw(ptr);
+ }
+
+ private:
+ // Hooks provided by this allocator for memory allocation routines from MKL
+
+ static inline void* MallocHook(size_t size) {
+ VLOG(2) << "MklCPUAllocator: In MallocHook";
+ return cpu_allocator()->AllocateRaw(kAlignment, size);
+ }
+
+ static inline void FreeHook(void* ptr) {
+ VLOG(2) << "MklCPUAllocator: In FreeHook";
+ cpu_allocator()->DeallocateRaw(ptr);
+ }
+
+ static inline void* CallocHook(size_t num, size_t size) {
+ Status s = Status(error::Code::UNIMPLEMENTED,
+ "Unimplemented case for hooking MKL function.");
+ TF_CHECK_OK(s); // way to assert with an error message
+ }
+
+ static inline void* ReallocHook(void* ptr, size_t size) {
+ Status s = Status(error::Code::UNIMPLEMENTED,
+ "Unimplemented case for hooking MKL function.");
+ TF_CHECK_OK(s); // way to assert with an error message
+ }
+
+ // TODO(jbobba): We should ideally move this into CPUOptions in config.proto.
+ /// Memory limit - 64GB
+ static const size_t kMaxMemSize =
+ static_cast<size_t>(64) * 1024 * 1024 * 1024;
+
+ /// Do we allow growth in BFC Allocator
+ static const bool kAllowGrowth = true;
+
+ /// Name
+ static constexpr const char* kName = "mklcpu";
+
+ /// The alignment that we need for the allocations
+ static const size_t kAlignment = 64;
+
+ Allocator* allocator_; // owned by this class
+};
+
+} // namespace tensorflow
+
+#endif // INTEL_MKL
+
+#endif // TENSORFLOW_CORE_COMMON_RUNTIME_MKL_CPU_ALLOCATOR_H_
diff --git a/tensorflow/core/common_runtime/threadpool_device.cc b/tensorflow/core/common_runtime/threadpool_device.cc
index 686bc6885e..ca6ba7970f 100644
--- a/tensorflow/core/common_runtime/threadpool_device.cc
+++ b/tensorflow/core/common_runtime/threadpool_device.cc
@@ -17,6 +17,7 @@ limitations under the License.
#include "tensorflow/core/common_runtime/local_device.h"
#include "tensorflow/core/framework/allocator.h"
+#include "tensorflow/core/framework/allocator_registry.h"
#include "tensorflow/core/framework/device_base.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/tensor.pb_text.h"
@@ -27,6 +28,10 @@ limitations under the License.
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/public/session_options.h"
+#ifdef INTEL_MKL
+#include "tensorflow/core/common_runtime/mkl_cpu_allocator.h"
+#endif
+
namespace tensorflow {
ThreadPoolDevice::ThreadPoolDevice(const SessionOptions& options,
@@ -70,4 +75,8 @@ Status ThreadPoolDevice::MakeTensorFromProto(
ProtoDebugString(tensor_proto));
}
+#ifdef INTEL_MKL
+REGISTER_MEM_ALLOCATOR("MklCPUAllocator", 200, MklCPUAllocator);
+#endif
+
} // namespace tensorflow
diff --git a/tensorflow/core/framework/allocator.cc b/tensorflow/core/framework/allocator.cc
index ff31ad965b..943dcab362 100644
--- a/tensorflow/core/framework/allocator.cc
+++ b/tensorflow/core/framework/allocator.cc
@@ -15,6 +15,7 @@ limitations under the License.
#include "tensorflow/core/framework/allocator.h"
+#include "tensorflow/core/framework/allocator_registry.h"
#include "tensorflow/core/framework/log_memory.h"
#include "tensorflow/core/framework/tracking_allocator.h"
#include "tensorflow/core/lib/strings/stringprintf.h"
@@ -119,11 +120,13 @@ Allocator* MakeCpuAllocator() {
} // namespace
Allocator* cpu_allocator() {
- static Allocator* cpu_alloc = MakeCpuAllocator();
+ static Allocator* cpu_alloc = AllocatorRegistry::Global()->GetAllocator();
if (cpu_allocator_collect_full_stats && !cpu_alloc->TracksAllocationSizes()) {
cpu_alloc = new TrackingAllocator(cpu_alloc, true);
}
return cpu_alloc;
}
+REGISTER_MEM_ALLOCATOR("DefaultCPUAllocator", 100, CPUAllocator);
+
} // namespace tensorflow
diff --git a/tensorflow/core/framework/allocator_registry.cc b/tensorflow/core/framework/allocator_registry.cc
new file mode 100644
index 0000000000..792b1ceb5a
--- /dev/null
+++ b/tensorflow/core/framework/allocator_registry.cc
@@ -0,0 +1,66 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <string>
+
+#include "tensorflow/core/framework/allocator_registry.h"
+#include "tensorflow/core/platform/logging.h"
+
+
+namespace tensorflow {
+
+// static
+AllocatorRegistry* AllocatorRegistry::Global() {
+ static AllocatorRegistry* global_allocator_registry = new AllocatorRegistry;
+ return global_allocator_registry;
+}
+
+bool AllocatorRegistry::CheckForDuplicates(const string& name, int priority) {
+ for (auto entry : allocators_) {
+ if (!name.compare(entry.name) && priority == entry.priority) {
+ return true;
+ }
+ }
+ return false;
+}
+
+void AllocatorRegistry::Register(const string& name, int priority,
+ Allocator* allocator) {
+ CHECK(!name.empty()) << "Need a valid name for Allocator";
+ CHECK_GE(priority, 0) << "Priority needs to be non-negative";
+ CHECK(!CheckForDuplicates(name, priority)) << "Allocator with name: [" << name
+ << "] and priority: [" << priority
+ << "] already registered";
+
+ AllocatorRegistryEntry tmp_entry;
+ tmp_entry.name = name;
+ tmp_entry.priority = priority;
+ tmp_entry.allocator = allocator;
+
+ allocators_.push_back(tmp_entry);
+ int high_pri = -1;
+ for (auto entry : allocators_) {
+ if (high_pri < entry.priority) {
+ m_curr_allocator_ = entry.allocator;
+ high_pri = entry.priority;
+ }
+ }
+}
+
+Allocator* AllocatorRegistry::GetAllocator() {
+ return CHECK_NOTNULL(m_curr_allocator_);
+}
+
+} // namespace tensorflow
diff --git a/tensorflow/core/framework/allocator_registry.h b/tensorflow/core/framework/allocator_registry.h
new file mode 100644
index 0000000000..c419366ae1
--- /dev/null
+++ b/tensorflow/core/framework/allocator_registry.h
@@ -0,0 +1,77 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Classes to maintain a static registry of memory allocators
+#ifndef TENSORFLOW_CORE_FRAMEWORK_ALLOCATOR_REGISTRY_H_
+#define TENSORFLOW_CORE_FRAMEWORK_ALLOCATOR_REGISTRY_H_
+
+#include <string>
+#include <vector>
+
+#include "tensorflow/core/framework/allocator.h"
+
+namespace tensorflow {
+
+// A global AllocatorRegistry is used to hold allocators for CPU backends
+class AllocatorRegistry {
+ public:
+ // Add an allocator to the registry.
+ void Register(const string& name, int priority, Allocator* allocator);
+
+ // Return allocator with highest priority
+ // If multiple allocators have the same high priority, return one of them
+ Allocator* GetAllocator();
+
+ // Returns the global registry of allocators.
+ static AllocatorRegistry* Global();
+
+ private:
+ typedef struct {
+ string name;
+ int priority;
+ Allocator* allocator; // not owned
+ } AllocatorRegistryEntry;
+
+ bool CheckForDuplicates(const string& name, int priority);
+
+ std::vector<AllocatorRegistryEntry> allocators_;
+ Allocator* m_curr_allocator_; // not owned
+};
+
+namespace allocator_registration {
+
+class AllocatorRegistration {
+ public:
+ AllocatorRegistration(const string& name, int priority,
+ Allocator* allocator) {
+ AllocatorRegistry::Global()->Register(name, priority, allocator);
+ }
+};
+
+} // namespace allocator_registration
+
+#define REGISTER_MEM_ALLOCATOR(name, priority, allocator) \
+ REGISTER_MEM_ALLOCATOR_UNIQ_HELPER(__COUNTER__, name, priority, allocator)
+
+#define REGISTER_MEM_ALLOCATOR_UNIQ_HELPER(ctr, name, priority, allocator) \
+ REGISTER_MEM_ALLOCATOR_UNIQ(ctr, name, priority, allocator)
+
+#define REGISTER_MEM_ALLOCATOR_UNIQ(ctr, name, priority, allocator) \
+ static allocator_registration::AllocatorRegistration \
+ register_allocator_##ctr(name, priority, new allocator)
+
+} // namespace tensorflow
+
+#endif // TENSORFLOW_CORE_FRAMEWORK_ALLOCATOR_REGISTRY_H_
diff --git a/tensorflow/core/framework/type_index.h b/tensorflow/core/framework/type_index.h
index dfde25c21e..b978d90fa8 100644
--- a/tensorflow/core/framework/type_index.h
+++ b/tensorflow/core/framework/type_index.h
@@ -17,7 +17,7 @@ limitations under the License.
#define TENSORFLOW_FRAMEWORK_TYPE_INDEX_H_
#include <string>
-#ifdef __GXX_RTTI
+#if defined(__GXX_RTTI) || defined(_CPPRTTI)
#include <typeindex>
#include <typeinfo>
#endif // __GXX_RTTI
@@ -30,7 +30,7 @@ namespace tensorflow {
// binary sizes. The following #ifdef section provides a non-RTTI
// replacement for std::type_index (with a minimal set of functions needed by
// the TensorFlow framework, and more can be added if necessary).
-#ifndef __GXX_RTTI
+#if !defined(__GXX_RTTI) && !defined(_CPPRTTI)
// A thin TypeIndex class that mimics std::type_index but does not use RTTI. As
// a result, it does not provide the actual name of the type, and only returns a
diff --git a/tensorflow/core/framework/types.h b/tensorflow/core/framework/types.h
index 589730baf1..932d788f23 100644
--- a/tensorflow/core/framework/types.h
+++ b/tensorflow/core/framework/types.h
@@ -68,9 +68,9 @@ class DeviceType {
std::ostream& operator<<(std::ostream& os, const DeviceType& d);
// Convenient constants that can be passed to a DeviceType constructor
-extern const char* const DEVICE_CPU; // "CPU"
-extern const char* const DEVICE_GPU; // "GPU"
-extern const char* const DEVICE_SYCL; // "SYCL"
+TF_EXPORT extern const char* const DEVICE_CPU; // "CPU"
+TF_EXPORT extern const char* const DEVICE_GPU; // "GPU"
+TF_EXPORT extern const char* const DEVICE_SYCL; // "SYCL"
typedef gtl::InlinedVector<MemoryType, 4> MemoryTypeVector;
typedef gtl::ArraySlice<MemoryType> MemoryTypeSlice;
diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
new file mode 100644
index 0000000000..87850b3e9a
--- /dev/null
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -0,0 +1,548 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifdef INTEL_MKL
+
+#include <vector>
+#include <utility>
+#include <string>
+#include <memory>
+#include <unordered_set>
+#include <functional>
+
+#include "tensorflow/core/framework/node_def_util.h"
+#include "tensorflow/core/graph/algorithm.h"
+#include "tensorflow/core/graph/node_builder.h"
+#include "tensorflow/core/lib/gtl/map_util.h"
+#include "tensorflow/core/lib/hash/hash.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/common_runtime/function.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/graph/graph.h"
+#include "tensorflow/core/common_runtime/optimization_registry.h"
+
+#include "tensorflow/core/graph/mkl_layout_pass.h"
+#include "tensorflow/core/util/mkl_util.h"
+
+namespace tensorflow {
+
+// This pass implements rewriting of graph for propagating Mkl
+// layout as an additional output tensor (we will loosely call a
+// tensor that carries Mkl layout as Mkl tensor henceforth.)
+// from every Mkl supported NN layer.
+//
+// As a example, consider Relu layer. Current definition of Relu
+// layer looks like:
+//
+// O = Relu(A)
+//
+// Relu has 1 input (A), and 1 output (O).
+//
+// This rewrite pass will generate a new graph node for Relu
+// (new node is called MklRelu) as:
+//
+// O, O_m = MklRelu(A, A_m)
+//
+// MklRelu has 2 inputs (A and A_m) and 2 outputs (O and O_m).
+// Here A input is same as A input of Relu; O output is same
+// as O output of Relu. O_m is the additional output tensor
+// that will be set by MklRelu, and it represents Mkl tensor
+// corresponding to O -- in other words, O_m is some kind of
+// metadata for O. A_m is additional input of Relu, and it
+// represents metadata for A - as O_m is metadata for O, A_m
+// is metadata for A. MklRelu receives this metadata from
+// previous layer (in the graph).
+//
+// When previous layer in the graph is Mkl layer, A_m will
+// represent a valid Mkl tensor. But when previous Mkl layer
+// is not an Mkl layer, then A_m represents a dummy Mkl tensor.
+//
+// Rewriting rules:
+// - Selection of an op for rewriting happens by registering
+// an op with this pass. If an op is not registered, then
+// it is not rewritten.
+// - Number of inputs after rewriting:
+// Since for every input Tensorflow tensor, the rewritten
+// layer gets Mkl tensor, rewritten op gets 2*N inputs,
+// where N is the number of inputs for original op.
+// - Number of outputs after rewriting:
+// Since for every output Tensorflow tensor, the rewritten
+// layer generates Mkl tensor, rewritten op generates 2*N
+// outputs, where N is the number of outputs of original op.
+// - Ordering of Tensorflow tensors and Mkl tensors:
+// Since every op generates twice the number of inputs and
+// outputs, one could imagine different ordering among
+// Tensorflow tensors and Mkl tensors. E.g., let's assume
+// an op 'Conv2D' takes (A, B) as input, then new op
+// 'MklConv2D' can take (A, A_m, B, B_m) as input or it
+// can also take (A, B, A_m, B_m) as input. Among N inputs
+// one can get N! permutations.
+//
+// So the question is: which one do we follow? Currently,
+// we follow an intuitive order where Mkl tensor follows a
+// corresponding Tensorflow tensor immediately. In the
+// context of above example, it will be: (A, A_m, B, B_m).
+// We follow same ordering rule for output tensors.
+//
+// NOTE: Current rewriting approach rewrites an op to Mkl op without
+// any conditions. But in the future, it may be possible to
+// consider conditions such as input shapes and sizes to rewrite
+// an op.
+//
+// Graph rewrite algorithm:
+// Algorithm: Graph Rewrite
+// Input: Graph G, Names of nodes to rewrite and their new nodes
+// Output: Modified Graph G' if nodes are modified, G otherwise.
+// Start:
+// N = Topological_Sort(G) // N is set of nodes in toposort order.
+// foreach node n in N
+// do
+// if (Is_MKL_Layer(n)) // Can this layer accept Mkl layout as input.
+// then
+// E = set of <incoming edge and its src_output slot> of n
+// E' = {} // new set of edges for rewritten node
+// foreach <e,s> in E
+// do
+// E' U {<e,s>} // First copy edge which generates Tensorflow
+// // tensor as it is
+// m = Source node of edge e
+// if Is_Rewritten(m) // Did we rewrite this node in this pass?
+// then
+// E' U {<m,s+1>} // If yes, then m will generate Mkl tensor
+// // as output.
+// else
+// d = Generate_Dummy_Mkl_Tensor() // If not, generate dummy
+// // Mkl tensor.
+// E' U {<d,0>} // Dummy Mkl tensor has only 1 output slot.
+// fi
+// done
+// n' = Build_New_Node(G,new_name,E')
+// Mark_Rewritten(n') // Mark new node as being rewritten.
+// fi
+// done
+//
+// Explanation:
+// For graph rewrite, we visit nodes of the graph in the topological
+// sort order. With this ordering, we visit nodes in top-to-bottom
+// fashion. We need this order because while visiting a node we want
+// all of its input nodes (parents) visited (and rewritten if
+// applicable). This is because if we need to rewrite a current node
+// then all of its input nodes need to be fixed (in other words they
+// cannot be removed later.)
+//
+// While visiting each node, we first check if it is Mkl layer. If
+// it is, then we rewrite that node after constructing new inputs to
+// the node. If it is not Mkl layer, then we do not rewrite the node.
+//
+class MklLayoutRewritePass : public GraphOptimizationPass {
+ public:
+ MklLayoutRewritePass() {
+ csinfo_.conv2d = "Conv2D";
+
+ ninfo_.push_back({csinfo_.conv2d, GetMklOpName(csinfo_.conv2d),
+ 2, CopyAttrsConv2D});
+ }
+
+ // Standard interface to run pass
+ Status Run(const GraphOptimizationPassOptions& options);
+
+ // Helper function which does most of heavy lifting for rewriting
+ // Mkl nodes to propagate Mkl tensor as additional output
+ //
+ // Extracts common functionality between Run public interface and
+ // test interface.
+ //
+ // @return true, if and only if graph is mutated; false otherwise.
+ bool RunPass(std::unique_ptr<Graph>* g);
+
+ private:
+ /// Structure to specify name of original op, its new name after rewrite,
+ /// the number of inputs to the original op, and the function to be used
+ /// to copy attributes for the op
+ typedef struct {
+ string name; // Original name of the op in the graph
+ string newname; // New name of op in the graph
+ int numins; // Number of inputs to the original op
+ std::function<void(Node*, NodeBuilder*)> copyattrs; // Function handler
+ // to copy attributes from old node to new node.
+ } NodesInfo;
+
+ /// Structure to store all constant strings
+ struct {
+ string relu;
+ string relugrad;
+ string conv2d;
+ } csinfo_;
+
+ /// Maintain info about nodes to rewrite
+ std::vector<NodesInfo> ninfo_;
+
+ /// Hash table to maintain nodes visited in the graph.
+ std::unordered_set<const Node*> visited_nodes_;
+
+ private:
+ // Predicate to check if we rewrote node 'n'
+ //
+ // If we rewrote the node, then the rewritten node will produce
+ // Mkl tensor as output. If we did not rewrite the node, then
+ // we need to insert dummy Mkl node on the input side.
+ //
+ // Returns true if node is rewritten, false otherwise.
+ inline bool IsRewrittenNode(Node* n) const {
+ return visited_nodes_.find(n) != visited_nodes_.end();
+ }
+
+ // Mark the node as rewritten
+ inline void MarkRewrittenNode(Node* n) {
+ visited_nodes_.insert(n);
+ }
+
+ // Get the name of Mkl op from original TensorFlow op
+ // We prefix 'Mkl' to the original op to get Mkl op.
+ // TODO(nhasabni) We should move this to mkl_util.h.
+ inline string GetMklOpName(const string& name) const {
+ // Prefix that we add to Tensorflow op name to construct Mkl op name.
+ const char* const kMklOpPrefix = "Mkl";
+ return string(kMklOpPrefix) + name;
+ }
+
+ // Setup new inputs using old inputs 'inputs' for the rewritten node in 'nb'
+ // in graph 'g'. Original node is input in 'orign'.
+ //
+ // For details, refer to 'Number of inputs after rewriting' section in the
+ // documentation above.
+ //
+ // Returns Status::OK() if setting up inputs is successful, otherwise
+ // returns appropriate status code.
+ Status SetUpInputs(std::unique_ptr<Graph>* g,
+ const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs,
+ NodeBuilder* nb, Node* orign);
+
+ // Rewrite Node 'n' in graph 'g' with rewrite information specified in 'ni'
+ // Returns Status::OK() if node rewrite is successful, otherwise returns
+ // appropriate error status
+ Status RewriteNode(std::unique_ptr<Graph>* g, Node* n, const NodesInfo& ni);
+
+ // Functions specific to operators to copy attributes
+ // We need operator-specific function to copy attributes because the framework
+ // does not provide any generic function for it.
+ static void CopyAttrsConv2D(Node* orign, NodeBuilder* nb);
+
+ // Generate a graph node in graph 'g' representing a dummy Mkl tensor node,
+ // using node for original node 'orign' and return it in '*out'.
+ // TODO(nhasabni) We should move this to mkl_util.h
+ void GetDummyMklTensorNode(std::unique_ptr<Graph>* g, Node** out,
+ Node* orign);
+};
+
+
+// We register Mkl rewrite pass for phase 1 in pre-placement group.
+// Do not change the ordering of the Mkl passes.
+REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 1,
+ MklLayoutRewritePass);
+
+
+static void FillInputs(const Node* n,
+ gtl::InlinedVector<Node*, 4>* control_edges,
+ gtl::InlinedVector<std::pair<Node*, int>, 4>* in) {
+ DCHECK_EQ(in->size(), n->num_inputs());
+ control_edges->clear();
+ for (const Edge* e : n->in_edges()) {
+ if (e->IsControlEdge()) {
+ control_edges->push_back(e->src());
+ } else {
+ (*in)[e->dst_input()] = std::make_pair(e->src(), e->src_output());
+ }
+ }
+ std::sort(control_edges->begin(), control_edges->end());
+ if (n->op_def().is_commutative()) {
+ // For commutative inputs, we sort the input by the input Node*
+ // to get a canonical ordering (so that add(a,b) and add(b, a) will
+ // hash to the same value if is_commutative is true for 'add').
+ std::sort(in->begin(), in->end());
+ }
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+// Macros to build new node with different number of inputs.
+// We need this way because we need to specify all the inputs when
+// building a node. Comment at core/graph/node_builder.h, line 85-86.
+
+#define SETUP_INPUTS1(nb, op1) do { \
+ nb->Input(op1.node, op1.index); \
+}while(0)
+
+#define SETUP_INPUTS2(nb, op1, op2) do { \
+ nb->Input(op1.node, op1.index); \
+ nb->Input(op2.node, op2.index); \
+}while(0)
+
+#define SETUP_INPUTS3(nb, op1, op2, op3) do { \
+ nb->Input(op1.node, op1.index); \
+ nb->Input(op2.node, op2.index); \
+ nb->Input(op3.node, op3.index); \
+}while(0)
+
+#define SETUP_INPUTS4(nb, op1, op2, op3, op4) do { \
+ nb->Input(op1.node, op1.index); \
+ nb->Input(op2.node, op2.index); \
+ nb->Input(op3.node, op3.index); \
+ nb->Input(op4.node, op4.index); \
+}while(0)
+
+#define SETUP_INPUTS5(nb, op1, op2, op3, op4, op5) do {\
+ nb->Input(op1.node, op1.index); \
+ nb->Input(op2.node, op2.index); \
+ nb->Input(op3.node, op3.index); \
+ nb->Input(op4.node, op4.index); \
+ nb->Input(op5.node, op5.index); \
+}while(0)
+
+// TODO(nhasabni) We should move this to mkl_util.h.
+void MklLayoutRewritePass::GetDummyMklTensorNode(
+ std::unique_ptr<Graph>* g, Node** out, Node* orign) {
+ // We use a tensor of shape {8} and value 0,0,0,0,0,0,0,0 to represent
+ // dummy Mkl tensor. 8 = 2*size_t.
+ const DataType dt = DataTypeToEnum<uint8>::v();
+ TensorProto proto;
+ proto.set_dtype(dt);
+ uint8 zero[8] = {0, 0, 0, 0, 0, 0, 0, 0};
+ proto.set_tensor_content(const_cast<const void*>(
+ static_cast<void*>(&zero)), 8);
+ TensorShape dummy_shape({8});
+ dummy_shape.AsProto(proto.mutable_tensor_shape());
+ TF_CHECK_OK(NodeBuilder((*g)->NewName("DMT"), "Const")
+ .Attr("value", proto)
+ .Attr("dtype", dt)
+ .Device(orign->def().device()) // We place this node on same
+ // device as device of original
+ // node.
+ .Finalize(&**g, out));
+}
+
+Status MklLayoutRewritePass::SetUpInputs(std::unique_ptr<Graph>* g,
+ const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs,
+ NodeBuilder* nb, Node* orign) {
+ std::vector<NodeBuilder::NodeOut> new_inputs;
+
+ // 1. Let's setup inputs for the new node.
+ for (int i = 0; i < inputs.size(); i++) {
+ Node* n = inputs[i].first;
+ // First let's copy original TF tensor input as it is.
+ new_inputs.push_back(NodeBuilder::NodeOut(n, inputs[i].second));
+
+ // Second, let's add edge to propagate Mkl tensors from input Mkl layers,
+ // or generate a dummy Mkl tensor representing not-mkl-tensor case.
+ if (IsRewrittenNode(n)) {
+ // If we have visited this node and rewritten it, then it will generate
+ // an edge that will receive Mkl tensor from a node.
+ // First, let's assert that this op is Mkl layer.
+ DataType T;
+ TF_CHECK_OK(GetNodeAttr(n->def(), "T", &T));
+ // If this op has been rewritten, then its name must have been same as
+ // Mkl op.
+ CHECK_EQ(mkl_layer_registry::IsMklLayer(n->type_string()), true);
+ // src slot number for Mkl tensor would be the one next to TF tensor
+ // slot number.
+ new_inputs.push_back(NodeBuilder::NodeOut(n, inputs[i].second+1));
+ } else {
+ // If we have not visited the node and rewritten it, then we need
+ // to create a dummy node that will feed a non-Mkl tensor to this node.
+ // DummyMklTensor node has no input and generates only 1 output
+ // (dummy Mkl tensor) as output slot number 0.
+ Node* dmt = nullptr;
+ GetDummyMklTensorNode(g, &dmt, orign);
+ CHECK_NOTNULL(dmt);
+ new_inputs.push_back(NodeBuilder::NodeOut(dmt, 0));
+ }
+ }
+
+ // The total number of inputs to new node _must_ be 2 times the number
+ // of inputs to the original node: N original Tensorflow tensors and
+ // N for Mkl tensors corresponding to each Tensorflow tensors.
+ CHECK_EQ(new_inputs.size(), inputs.size() * 2);
+
+ // 2. Let's build the node with new inputs.
+ switch (new_inputs.size()) {
+ case 0: // We don't need to do anything for no input as we have
+ // already built node.
+ break;
+ case 1: SETUP_INPUTS1(nb, new_inputs[0]); break;
+ case 2: SETUP_INPUTS2(nb, new_inputs[0],
+ new_inputs[1]); break;
+ case 3: SETUP_INPUTS3(nb, new_inputs[0],
+ new_inputs[1],
+ new_inputs[2]); break;
+ case 4: SETUP_INPUTS4(nb, new_inputs[0],
+ new_inputs[1],
+ new_inputs[2],
+ new_inputs[3]); break;
+ case 5: SETUP_INPUTS5(nb, new_inputs[0],
+ new_inputs[1],
+ new_inputs[2],
+ new_inputs[3],
+ new_inputs[4]); break;
+ default: {
+ return Status(error::Code::UNIMPLEMENTED,
+ "Could not create node with given number of inputs");
+ }
+ }
+
+ return Status::OK();
+}
+
+void MklLayoutRewritePass::CopyAttrsConv2D(Node* orign, NodeBuilder* nb) {
+ DataType T;
+ string data_format;
+ string padding;
+ std::vector<int32> strides;
+ bool use_cudnn_on_gpu;
+
+ // Get all attributes from old node.
+ TF_CHECK_OK(GetNodeAttr(orign->def(), "T", &T));
+ TF_CHECK_OK(GetNodeAttr(orign->def(), "strides", &strides));
+ TF_CHECK_OK(GetNodeAttr(orign->def(), "padding", &padding));
+ TF_CHECK_OK(GetNodeAttr(orign->def(), "data_format", &data_format));
+ TF_CHECK_OK(GetNodeAttr(orign->def(), "use_cudnn_on_gpu", &use_cudnn_on_gpu));
+
+ // Add attributes to new node.
+ nb->Attr("T", T);
+ nb->Attr("strides", strides);
+ nb->Attr("padding", padding);
+ nb->Attr("data_format", data_format);
+ nb->Attr("use_cudnn_on_gpu", use_cudnn_on_gpu);
+}
+
+Status MklLayoutRewritePass::RewriteNode(
+ std::unique_ptr<Graph>* g, Node* orign, const NodesInfo& ni) {
+ VLOG(1) << "MKLLayoutRewritePass: Original node:" << orign->DebugString();
+
+ // Get all inputs.
+ const int num = orign->num_inputs();
+ CHECK_EQ(num, ni.numins);
+ gtl::InlinedVector<Node*, 4> control_edges;
+ gtl::InlinedVector<std::pair<Node*, int>, 4> inputs(num);
+ FillInputs(orign, &control_edges, &inputs);
+
+ // Build new node. We use same name as original node, but change the op name.
+ NodeBuilder nb(orign->name().c_str(), ni.newname.c_str());
+ // Copy user-specified device assigned to original node to new node.
+ nb.Device(orign->def().device());
+ // Set up new inputs to the rewritten node.
+ Status s = SetUpInputs(g, inputs, &nb, orign);
+ if (s != Status::OK()) {
+ return s;
+ }
+ // Copy attributes from original node to new node.
+ ni.copyattrs(orign, &nb);
+ // Set the Mkl layer label for this op.
+ nb.Attr("_kernel", mkl_layer_registry::kMklLayerLabel);
+ Node* newn = nullptr;
+
+ // Finalize graph and get new node.
+ TF_CHECK_OK(nb.Finalize(&**g, &newn));
+ CHECK_NOTNULL(newn);
+
+ // Incoming edges from 'orign' node to new 'newn' node are already copied
+ // in BuildNode. Copy outgoing edges from 'orign' node to new 'newn' node.
+ for (const Edge* e : orign->out_edges()) {
+ (*g)->AddEdge(newn, e->src_output(), e->dst(), e->dst_input());
+ }
+
+ // Copy the runtime device assigned from original code to new node.
+ newn->set_assigned_device_name(orign->assigned_device_name());
+
+ // Delete original node and mark new node as rewritten.
+ (*g)->RemoveNode(orign);
+ MarkRewrittenNode(newn);
+
+ VLOG(1) << "MKLLayoutRewritePass: New node:" << newn->DebugString();
+ return Status::OK();
+}
+
+bool MklLayoutRewritePass::RunPass(
+ std::unique_ptr<Graph>* g) {
+ bool result = false;
+ CHECK_NOTNULL(g);
+
+ DumpGraph("Before running MklLayoutRewritePass", &**g);
+
+ std::vector<Node*> order;
+ GetReversePostOrder(**g, &order); // This will give us topological sort.
+
+ for (Node* n : order) {
+ if (!n->IsOp()) {
+ continue;
+ }
+
+ for (const NodesInfo& ni : ninfo_) {
+ DataType dtype = DT_INVALID;
+ // An op needs to have data type (T) attribute and its corresponding
+ // Mkl op name must be supported.
+ if (GetNodeAttr(n->def(), "T", &dtype) == Status::OK() &&
+ mkl_layer_registry::IsMklLayer(GetMklOpName(n->type_string())) &&
+ n->type_string().compare(ni.name) == 0) {
+ string node_name = n->name();
+ string op_name = n->type_string();
+
+ VLOG(1) << "MKLLayoutRewritePass: Scheduled node " << node_name
+ << " with op " << op_name << " for rewrite using"
+ << " layout optimization.";
+
+ if (RewriteNode(g, n, ni) == Status::OK()) {
+ VLOG(1) << "MKLLayoutRewritePass: Successfully rewrote node "
+ << node_name << " with op " << op_name
+ << " for Mkl layout optimization.";
+ result = true;
+ break; // We found matching nodesinfo so no need to search next.
+ }
+ }
+ }
+ }
+
+ DumpGraph("After running MklLayoutRewritePass", &**g);
+
+ return result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// Run function for the pass
+///////////////////////////////////////////////////////////////////////////////
+
+bool RunMklLayoutRewritePass(std::unique_ptr<Graph>* g) {
+ return MklLayoutRewritePass().RunPass(g);
+}
+
+Status MklLayoutRewritePass::Run(const GraphOptimizationPassOptions& options) {
+ if (options.graph == nullptr) {
+ return Status::OK();
+ }
+
+ // Get the ownership of graph
+ std::unique_ptr<Graph>* g = std::move(options.graph);
+
+ RunPass(g);
+
+ // Return the ownership of graph back
+ options.graph->reset(g->release());
+
+ return Status::OK();
+}
+
+} // namespace tensorflow
+
+#endif
diff --git a/tensorflow/core/graph/mkl_layout_pass.h b/tensorflow/core/graph/mkl_layout_pass.h
new file mode 100644
index 0000000000..ffe5c1ecfc
--- /dev/null
+++ b/tensorflow/core/graph/mkl_layout_pass.h
@@ -0,0 +1,36 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// A graph pass that rewrites graph for propagating MKL layout as a tensor
+
+#ifndef TENSORFLOW_GRAPH_MKL_LAYOUT_PASS_H_
+#define TENSORFLOW_GRAPH_MKL_LAYOUT_PASS_H_
+
+#ifdef INTEL_MKL
+
+#include <sys/types.h>
+#include <memory>
+#include "tensorflow/core/graph/graph.h"
+
+namespace tensorflow {
+// Interface to invoke the pass for unit test
+//
+// Returns true if and only if 'g' is mutated.
+extern bool RunMklLayoutRewritePass(std::unique_ptr<Graph>* g);
+} // namespace tensorflow
+
+#endif
+
+#endif // TENSORFLOW_GRAPH_MKL_LAYOUT_PASS_H_
diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc
new file mode 100644
index 0000000000..10671ee2e9
--- /dev/null
+++ b/tensorflow/core/graph/mkl_layout_pass_test.cc
@@ -0,0 +1,199 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifdef INTEL_MKL
+
+#include "tensorflow/core/graph/mkl_layout_pass.h"
+#include "tensorflow/core/util/mkl_util.h"
+
+#include <vector>
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/graph/graph.h"
+#include "tensorflow/core/graph/graph_constructor.h"
+#include "tensorflow/core/graph/testlib.h"
+#include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/lib/random/simple_philox.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/protobuf.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/test_benchmark.h"
+
+namespace tensorflow {
+namespace {
+
+static void InitGraph(const string& s, Graph* graph) {
+ GraphDef graph_def;
+
+ auto parser = protobuf::TextFormat::Parser();
+ // parser.AllowRelaxedWhitespace(true);
+ CHECK(parser.MergeFromString(s, &graph_def)) << s;
+ GraphConstructorOptions opts;
+ TF_CHECK_OK(ConvertGraphDefToGraph(opts, graph_def, graph));
+}
+
+class MklLayoutPassTest : public ::testing::Test {
+ public:
+ MklLayoutPassTest() : graph_(OpRegistry::Global()) {}
+
+ void InitGraph(const string& s) {
+ ::tensorflow::InitGraph(s, &graph_);
+ original_ = CanonicalGraphString(&graph_);
+ }
+
+ static bool IncludeNode(const Node* n) { return n->IsOp(); }
+
+ static string EdgeId(const Node* n, int index) {
+ if (index == 0) {
+ return n->name();
+ } else if (index == Graph::kControlSlot) {
+ return strings::StrCat(n->name(), ":control");
+ } else {
+ return strings::StrCat(n->name(), ":", index);
+ }
+ }
+
+ string CanonicalGraphString(Graph* g) {
+ std::vector<string> nodes;
+ std::vector<string> edges;
+ for (const Node* n : g->nodes()) {
+ if (IncludeNode(n)) {
+ nodes.push_back(strings::StrCat(n->name(), "(", n->type_string(), ")"));
+ }
+ }
+ for (const Edge* e : g->edges()) {
+ if (IncludeNode(e->src()) && IncludeNode(e->dst())) {
+ edges.push_back(strings::StrCat(EdgeId(e->src(), e->src_output()), "->",
+ EdgeId(e->dst(), e->dst_input())));
+ }
+ }
+ // Canonicalize
+ std::sort(nodes.begin(), nodes.end());
+ std::sort(edges.begin(), edges.end());
+ return strings::StrCat(str_util::Join(nodes, ";"), "|",
+ str_util::Join(edges, ";"));
+ }
+
+ string DoMklLayoutOptimizationPass() {
+ string before = CanonicalGraphString(&graph_);
+ LOG(ERROR) << "Before MKL layout rewrite pass: " << before;
+
+ std::unique_ptr<Graph>* ug = new std::unique_ptr<Graph>(&graph_);
+ RunMklLayoutRewritePass(ug);
+
+ string result = CanonicalGraphString(&graph_);
+ LOG(ERROR) << "After MKL layout rewrite pass: " << result;
+ return result;
+ }
+
+ const string& OriginalGraph() const { return original_; }
+
+ Graph graph_;
+ string original_;
+};
+
+REGISTER_OP("Input").Output("o: float").SetIsStateful();
+
+// Single Conv2D Op; No Mkl layer on the input and on the output.
+// We will generate dummy Mkl tensor as 2nd input of Conv2D.
+TEST_F(MklLayoutPassTest, Conv2D_Basic) {
+ InitGraph(
+ "node { name: 'A' op: 'Input'}"
+ "node { name: 'B' op: 'Input'}"
+ "node { name: 'C' op: 'Conv2D'"
+ " attr { key: 'T' value { type: DT_FLOAT } }"
+ " attr { key: 'data_format' value { s: 'NCHW' } }"
+ " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+ " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }"
+ " attr { key: 'padding' value { s: 'SAME' } }"
+ " input: ['A', 'B']}"
+ "node { name: 'D' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+ " input: ['B', 'C'] }");
+ EXPECT_EQ(DoMklLayoutOptimizationPass(),
+ "A(Input);B(Input);C(MklConv2D);D(Mul);DMT/_0(Const);DMT/_1(Const)|"
+ "A->C;B->C:2;B->D;C->D:1;DMT/_0->C:1;DMT/_1->C:3");
+}
+
+// 2 Conv2D Ops in sequence. Both should get transformed and 1st Conv2D will
+// have 2 outputs, both of which will be inputs to next Conv2D.
+TEST_F(MklLayoutPassTest, Conv2D_Positive1) {
+ InitGraph(
+ "node { name: 'A' op: 'Input'}"
+ "node { name: 'B' op: 'Input'}"
+ "node { name: 'C' op: 'Conv2D'"
+ " attr { key: 'T' value { type: DT_FLOAT } }"
+ " attr { key: 'data_format' value { s: 'NCHW' } }"
+ " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+ " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }"
+ " attr { key: 'padding' value { s: 'SAME' } }"
+ " input: ['A', 'B']}"
+ "node { name: 'D' op: 'Conv2D'"
+ " attr { key: 'T' value { type: DT_FLOAT } }"
+ " attr { key: 'data_format' value { s: 'NCHW' } }"
+ " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+ " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }"
+ " attr { key: 'padding' value { s: 'SAME' } }"
+ " input: ['A', 'C']}"
+ "node { name: 'E' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+ " input: ['C', 'D'] }");
+ EXPECT_EQ(DoMklLayoutOptimizationPass(),
+ "A(Input);B(Input);C(MklConv2D);D(MklConv2D);DMT/_0(Const);"
+ "DMT/_1(Const);DMT/_2(Const);E(Mul)|A->C;A->D;B->C:2;C->D:2;C->E;"
+ "C:1->D:3;D->E:1;DMT/_0->C:1;DMT/_1->C:3;DMT/_2->D:1");
+}
+
+static void BM_MklLayoutRewritePass(int iters, int op_nodes) {
+ testing::StopTiming();
+ string s;
+ for (int in = 0; in < 10; in++) {
+ s += strings::Printf("node { name: 'in%04d' op: 'Input'}", in);
+ }
+ random::PhiloxRandom philox(301, 17);
+ random::SimplePhilox rnd(&philox);
+ for (int op = 0; op < op_nodes; op++) {
+ s += strings::Printf(
+ "node { name: 'op%04d' op: 'Mul' attr { key: 'T' value { "
+ "type: DT_FLOAT } } input: ['in%04d', 'in%04d' ] }",
+ op, rnd.Uniform(10), rnd.Uniform(10));
+ }
+
+ bool first = true;
+ while (iters > 0) {
+ Graph* graph = new Graph(OpRegistry::Global());
+ InitGraph(s, graph);
+ int N = graph->num_node_ids();
+ if (first) {
+ testing::SetLabel(strings::StrCat("Per graph node. Nodes: ", N));
+ first = false;
+ }
+ {
+ testing::StartTiming();
+ std::unique_ptr<Graph> ug(graph);
+ RunMklLayoutRewritePass(&ug);
+ testing::StopTiming();
+ }
+ iters -= N; // Our benchmark units are individual graph nodes,
+ // not whole graphs
+ // delete graph;
+ }
+}
+BENCHMARK(BM_MklLayoutRewritePass)->Arg(1000)->Arg(10000);
+
+} // namespace
+} // namespace tensorflow
+
+#endif /* INTEL_MKL */
diff --git a/tensorflow/core/graph/mkl_optimizer_merge.cc b/tensorflow/core/graph/mkl_optimizer_merge.cc
index 98fc268d28..bc5915eda2 100644
--- a/tensorflow/core/graph/mkl_optimizer_merge.cc
+++ b/tensorflow/core/graph/mkl_optimizer_merge.cc
@@ -22,6 +22,8 @@ limitations under the License.
#include <vector>
#include <queue>
#include <utility>
+#include <string>
+#include <memory>
#include "tensorflow/core/graph/mkl_optimizer_merge.h"
@@ -33,6 +35,8 @@ limitations under the License.
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/common_runtime/function.h"
+#include "tensorflow/core/graph/graph.h"
+#include "tensorflow/core/common_runtime/optimization_registry.h"
namespace tensorflow {
@@ -58,8 +62,8 @@ static size_t kNodeMergeContextMaxDepth = 10;
class NodeMergeRewritePass : public GraphOptimizationPass {
public:
NodeMergeRewritePass() {
- csinfo_.conv2d = "Conv2D";
- csinfo_.conv2dwithbias = "Conv2DWithBias";
+ csinfo_.conv2d = "MklConv2D";
+ csinfo_.conv2dwithbias = "MklConv2DWithBias";
csinfo_.conv2dwithbiasbackpropbias = "Conv2DWithBiasBackpropBias";
csinfo_.biasadd = "BiasAdd";
csinfo_.matmul = "MatMul";
@@ -72,6 +76,9 @@ class NodeMergeRewritePass : public GraphOptimizationPass {
// maxhops in backward data-flow graph. Since input of forward nodes
// (Conv2D) directly goes to backward nodes, we do not expect the
// hop-distance would be more than few nodes.
+ // TODO(nhasabni) Temporarily disabling rewrite of BiasAddGrad.
+ // Will enable it once we support Conv2DWithBiasBackpropBias op.
+#if 0
rinfo_.push_back({csinfo_.biasaddgrad, csinfo_.conv2dwithbiasbackpropbias,
{csinfo_.conv2dwithbias, kNodeMergeContextMaxDepth}});
rinfo_.push_back({csinfo_.biasaddgrad, csinfo_.conv2dwithbiasbackpropbias,
@@ -80,6 +87,7 @@ class NodeMergeRewritePass : public GraphOptimizationPass {
// because we do not have a separate Op for MatMulwithBias.
rinfo_.push_back({csinfo_.biasaddgrad, csinfo_.biasaddgrad,
{csinfo_.matmul, kNodeMergeContextMaxDepth}});
+#endif
}
// Standard interface to run optimization pass
@@ -182,10 +190,16 @@ class NodeMergeRewritePass : public GraphOptimizationPass {
// @return Matching rewriteinfo in case a match is found; null otherwise.
const RewriteInfo* FindMatchingRewriteInfo(const Node* n,
const Node** fwdn) const;
+
+ // Generate a graph node in graph 'g' representing a dummy Mkl tensor node,
+ // and return it in '*out'.
+ // TODO(nhasabni) We should move this to mkl_util.h
+ void GetDummyMklTensorNode(std::unique_ptr<Graph>* g, Node** out);
};
-/// We register merge optimizer for phase 1 and MKLToTF insertion for phase 2.
-REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 1,
+// We register merge optimizer for phase 2 in pre-placement group.
+// Do not change the ordering of the Mkl passes.
+REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 2,
NodeMergeRewritePass);
static void FillInputs(const Node* n,
@@ -219,8 +233,6 @@ Node* NodeMergeRewritePass::FindNodeForMerge(const Node* a) const {
}
}
- VLOG(1) << "FindNodeForMerge: " << a->type_string();
-
for (const MergeInfo* mi : matching_mi) {
const int N_in = a->num_inputs();
if (mi->op >= N_in) {
@@ -240,8 +252,6 @@ Node* NodeMergeRewritePass::FindNodeForMerge(const Node* a) const {
continue;
}
- VLOG(1) << " FindNode: " << b->type_string();
-
gtl::InlinedVector<Node*, 4> b_control_edges;
gtl::InlinedVector<std::pair<Node*, int>, 4> b_in(N_in);
FillInputs(b, &b_control_edges, &b_in);
@@ -258,6 +268,22 @@ Node* NodeMergeRewritePass::FindNodeForMerge(const Node* a) const {
return nullptr;
}
+void NodeMergeRewritePass::GetDummyMklTensorNode(
+ std::unique_ptr<Graph>* g, Node** out) {
+ const DataType dt = DataTypeToEnum<uint8>::v();
+ TensorProto proto;
+ proto.set_dtype(dt);
+ uint8 zero[8] = {0, 0, 0, 0, 0, 0, 0, 0};
+ proto.set_tensor_content(const_cast<const void*>(
+ static_cast<void*>(&zero)), 8);
+ TensorShape dummy_shape({8});
+ dummy_shape.AsProto(proto.mutable_tensor_shape());
+ TF_CHECK_OK(NodeBuilder((*g)->NewName("DMT"), "Const")
+ .Attr("value", proto)
+ .Attr("dtype", dt)
+ .Finalize(&**g, out));
+}
+
Status NodeMergeRewritePass::MergeNode(std::unique_ptr<Graph>* g,
Node* succ, Node* pred) {
CHECK_NOTNULL(succ);
@@ -271,7 +297,6 @@ Status NodeMergeRewritePass::MergeNode(std::unique_ptr<Graph>* g,
std::vector<int32> strides;
string data_format_pred, data_format_succ;
bool use_cudnn_on_gnu;
- int groups = 1;
TF_CHECK_OK(GetNodeAttr(pred->def(), "T", &T_pred));
TF_CHECK_OK(GetNodeAttr(succ->def(), "T", &T_succ));
TF_CHECK_OK(GetNodeAttr(pred->def(), "padding", &padding));
@@ -280,25 +305,28 @@ Status NodeMergeRewritePass::MergeNode(std::unique_ptr<Graph>* g,
TF_CHECK_OK(GetNodeAttr(succ->def(), "data_format", &data_format_succ));
TF_CHECK_OK(GetNodeAttr(pred->def(), "use_cudnn_on_gpu",
&use_cudnn_on_gnu));
- // Groups attribute may not be there on the input node. So we do not
- // check for error in GetNodeAttr call.
- GetNodeAttr(pred->def(), "groups", &groups);
// We check to ensure that data formats of both succ and pred are same.
// We expect them to be same, so we can enforce this as assert.
// But assert can be too strict, so we enforce this as a check.
// If the check fails, then we do not merge two nodes.
+ // We also do same check for devices.
if (data_format_pred != data_format_succ ||
- T_pred != T_succ) {
+ T_pred != T_succ ||
+ pred->assigned_device_name() != succ->assigned_device_name() ||
+ pred->def().device() != succ->def().device()) {
return Status(error::Code::INVALID_ARGUMENT,
- "data_format or T attribute of Conv2D and BiasAdd"
- "do not match. Will skip node merge optimization");
+ "data_format or T attribute or devices of Conv2D and "
+ "BiasAdd do not match. Will skip node merge optimization");
}
// 2. Get inputs from both the nodes.
// Find the 2 inputs from the conv and the bias from the add Bias.
Node* oper1 = nullptr;
+ Node* oper1_mkl = nullptr; // Mkl tensor corresponding to oper1
Node* oper2 = nullptr;
+ Node* oper2_mkl = nullptr; // Mkl tensor corresponding to oper2
Node* oper3 = nullptr;
+ Node* oper3_mkl = nullptr; // Mkl tensor corresponding to oper3
const int succ_num = succ->num_inputs();
gtl::InlinedVector<Node*, 4> succ_control_edges;
@@ -326,24 +354,35 @@ Status NodeMergeRewritePass::MergeNode(std::unique_ptr<Graph>* g,
}
}
- // Get operand 0, 1 of conv2D
- oper1 = pred_in[0].first;
- oper2 = pred_in[1].first;
+ // Get operand 0, 1 of conv2D and their Mkl tensors.
+ CHECK_EQ(pred->in_edges().size(), 4); // MklConv2D must have 4 inputs.
+ oper1 = pred_in[0].first;
+ oper1_mkl = pred_in[1].first;
+ oper2 = pred_in[2].first;
+ oper2_mkl = pred_in[3].first;
// Get operand 1 of add_bias
- oper3 = succ_in[1].first;
+ // BiasAdd must have 2 inputs: Conv, bias
+ CHECK_EQ(succ->in_edges().size(), 2);
+ oper3 = succ_in[1].first;
+ GetDummyMklTensorNode(g, &oper3_mkl); // Get dummy Mkl tensor node
+ // as BiasAdd does not have Mkl tensor as input.
+ CHECK_NOTNULL(oper3_mkl);
Node* ret;
// We will use the node name of BiasAdd as the name of new node
TF_CHECK_OK(NodeBuilder(succ->name(), csinfo_.conv2dwithbias)
.Input(oper1)
+ .Input(oper1_mkl)
.Input(oper2)
+ .Input(oper2_mkl)
.Input(oper3)
+ .Input(oper3_mkl)
.Attr("T", T_pred)
.Attr("strides", strides)
.Attr("padding", padding)
.Attr("data_format", data_format_pred)
.Attr("use_cudnn_on_gpu", use_cudnn_on_gnu)
- .Attr("groups", groups)
+ .Device(succ->def().device())
.Finalize(&**g, &ret));
CHECK_NOTNULL(ret);
@@ -352,6 +391,15 @@ Status NodeMergeRewritePass::MergeNode(std::unique_ptr<Graph>* g,
(*g)->AddEdge(ret, e->src_output(), e->dst(), e->dst_input());
}
+ // Copy device assigned to old node to new node.
+ // It's ok to use pred or succ as we have enforced a check that
+ // both have same device assigned.
+ ret->set_assigned_device_name(pred->assigned_device_name());
+
+ VLOG(1) << "NodeMergeRewritePass: Merged old node:" << pred->DebugString()
+ << ", and node: " << succ->DebugString() << ", into node:"
+ << ret->DebugString();
+
(*g)->RemoveNode(succ);
(*g)->RemoveNode(pred);
@@ -369,13 +417,14 @@ Status NodeMergeRewritePass::RewriteNode(std::unique_ptr<Graph>* g, Node *n) {
const Node* fwdn = nullptr;
const RewriteInfo* ri = FindMatchingRewriteInfo(n, &fwdn);
if (ri == nullptr || fwdn == nullptr) {
- VLOG(1) << "Rewriteinfo not found for: " << n->type_string();
+ VLOG(2) << "NodeMergeRewritePass: Rewriteinfo not found for: "
+ << n->type_string();
return Status(error::Code::INVALID_ARGUMENT,
"Rewrite info not found for the node."
"Will skip node rewrite optimization");
}
- VLOG(1) << "Rewrite called for: " << n->type_string();
+ VLOG(1) << "NodeMergeRewritePass: Rewrite called for: " << n->type_string();
if (n->type_string() == csinfo_.biasaddgrad &&
ri->node == csinfo_.biasaddgrad &&
@@ -407,6 +456,7 @@ Status NodeMergeRewritePass::RewriteNode(std::unique_ptr<Graph>* g, Node *n) {
.Attr("T", T)
.Attr("data_format", data_format)
.Attr("strides", strides)
+ .Device(n->def().device())
.Finalize(&**g, &ret));
} else {
CHECK_EQ(ri->rewrite, csinfo_.biasaddgrad);
@@ -414,6 +464,7 @@ Status NodeMergeRewritePass::RewriteNode(std::unique_ptr<Graph>* g, Node *n) {
.Input(op)
.Attr("T", T)
.Attr("data_format", data_format)
+ .Device(n->def().device())
.Finalize(&**g, &ret));
}
@@ -424,7 +475,11 @@ Status NodeMergeRewritePass::RewriteNode(std::unique_ptr<Graph>* g, Node *n) {
(*g)->AddEdge(ret, e->src_output(), e->dst(), e->dst_input());
}
- VLOG(1) << "Rewrite node: " << n->type_string() << " successful";
+ // Copy device assigned to old node to new node.
+ ret->set_assigned_device_name(n->assigned_device_name());
+
+ VLOG(1) << "MKLOptimizerMergePass: Rewrote old node:" << n->DebugString()
+ << ", into node:" << ret->DebugString();
(*g)->RemoveNode(n);
return Status::OK();
@@ -450,7 +505,8 @@ NodeMergeRewritePass::FindMatchingRewriteInfo(const Node* n,
}
}
- VLOG(1) << "Searching graph for: " << n->type_string() << " in backwards.";
+ VLOG(1) << "NodeMergeRewritePass: Searching graph for: "
+ << n->type_string() << " in backwards.";
// Now we will check for forward op name for rewrite info in data
// flow graph. Get the max hops we should search for the fwd node
@@ -473,7 +529,8 @@ NodeMergeRewritePass::FindMatchingRewriteInfo(const Node* n,
curr_depth = curr_pair.second;
CHECK_NOTNULL(curr_node);
- VLOG(1) << "Visiting node: " << curr_node->type_string()
+ VLOG(1) << "NodeMergeRewritePass: Visiting node: "
+ << curr_node->type_string()
<< " at depth: " << curr_depth
<< " for node: " << n->type_string();
@@ -528,17 +585,16 @@ bool NodeMergeRewritePass::RunPass(std::unique_ptr<Graph>* g) {
std::vector<std::pair<Node*, Node*>> nodes_to_be_merged;
std::vector<Node*> nodes_to_be_rewritten;
- VLOG(1) << "Running NodeMerge Optimization";
-
for (Node* n : order) {
if (!n->IsOp()) continue;
Node* n1 = nullptr;
if ((n1 = FindNodeForMerge(n)) != nullptr) {
- VLOG(1) << "Scheduled nodes " << n->name() << " and "
- << n1->name() << " for merging";
+ VLOG(1) << "NodeMergeRewritePass: Scheduled nodes "
+ << n->name() << " and " << n1->name() << " for merging";
nodes_to_be_merged.push_back(std::make_pair(n, n1));
} else if (IsApplicableRewriteNode(n)) {
- VLOG(1) << "Scheduled node " << n->name() << " for rewrite";
+ VLOG(1) << "NodeMergeRewritePass: Scheduled node " << n->name()
+ << " for rewrite";
nodes_to_be_rewritten.push_back(n);
}
}
@@ -549,7 +605,8 @@ bool NodeMergeRewritePass::RunPass(std::unique_ptr<Graph>* g) {
string n1_name = i.first->name();
string n2_name = i.second->name();
if (MergeNode(g, i.first, i.second) == Status::OK()) {
- VLOG(1) << "Merged nodes " << n1_name << " and " << n2_name;
+ VLOG(1) << "NodeMergeRewritePass: Merged nodes " << n1_name
+ << " and " << n2_name;
result = true;
}
}
@@ -559,7 +616,8 @@ bool NodeMergeRewritePass::RunPass(std::unique_ptr<Graph>* g) {
for (Node* i : nodes_to_be_rewritten) {
string name = i->name();
if (RewriteNode(g, i) == Status::OK()) {
- VLOG(1) << "Rewrite node: " << name << " successful.";
+ VLOG(1) << "NodeMergeRewritePass: Rewrite node: "
+ << name << " successful.";
result = true;
}
}
@@ -574,8 +632,6 @@ bool OptimizeNodeMerge(std::unique_ptr<Graph>* g) {
}
Status NodeMergeRewritePass::Run(const GraphOptimizationPassOptions& options) {
- // Currently checking only for two cases - Conv2D+Bias and Matmul+Bias.
- // It is possible to extend it to other operators in future.
if (options.graph == nullptr) {
return Status::OK();
}
diff --git a/tensorflow/core/graph/mkl_optimizer_merge.h b/tensorflow/core/graph/mkl_optimizer_merge.h
index 554709e9dd..b2caec58af 100644
--- a/tensorflow/core/graph/mkl_optimizer_merge.h
+++ b/tensorflow/core/graph/mkl_optimizer_merge.h
@@ -21,20 +21,14 @@ limitations under the License.
#ifdef INTEL_MKL
#include <sys/types.h>
-#include <vector>
-#include <string>
#include <memory>
#include "tensorflow/core/graph/graph.h"
-#include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/common_runtime/optimization_registry.h"
namespace tensorflow {
-
// Interface to invoke the pass for unit test
//
// Returns true if and only if 'g' is mutated.
extern bool OptimizeNodeMerge(std::unique_ptr<Graph>* g);
-
} // namespace tensorflow
#endif // INTEL_MKL
diff --git a/tensorflow/core/graph/mkl_optimizer_merge_test.cc b/tensorflow/core/graph/mkl_optimizer_merge_test.cc
index da3b01955c..5aae61ad19 100644
--- a/tensorflow/core/graph/mkl_optimizer_merge_test.cc
+++ b/tensorflow/core/graph/mkl_optimizer_merge_test.cc
@@ -105,6 +105,7 @@ class OptimizerMergeTest : public ::testing::Test {
};
REGISTER_OP("Input").Output("o: float").SetIsStateful();
+REGISTER_OP("MklInput").Output("o: uint8").SetIsStateful();
TEST_F(OptimizerMergeTest, Basic) {
InitGraph(
@@ -121,10 +122,40 @@ TEST_F(OptimizerMergeTest, Basic) {
// Test set 1: Conv2D + AddBias
-// C=Conv2D(A,B); E=BiasAdd(C,D); Z=Sub(E,Y)
+// C=MklConv2D(A,M,B,N); E=BiasAdd(C,D); Z=Sub(E,Y)
TEST_F(OptimizerMergeTest, Conv2DWithBias_Positive) {
InitGraph(
"node { name: 'A' op: 'Input'}"
+ "node { name: 'M' op: 'MklInput'}"
+ "node { name: 'B' op: 'Input'}"
+ "node { name: 'N' op: 'MklInput'}"
+ "node { name: 'C' op: 'MklConv2D'"
+ " attr { key: 'T' value { type: DT_FLOAT } }"
+ " attr { key: 'data_format' value { s: 'NCHW' } }"
+ " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+ " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }"
+ " attr { key: 'padding' value { s: 'SAME' } }"
+ " input: ['A', 'M', 'B', 'N']}"
+ "node { name: 'D' op: 'Input'}"
+ "node { name: 'E' op: 'BiasAdd'"
+ " attr { key: 'T' value { type: DT_FLOAT } }"
+ " attr { key: 'data_format' value { s: 'NCHW' } }"
+ " input: ['C', 'D'] }"
+ "node { name: 'Y' op: 'Input'}"
+ "node { name: 'Z' op: 'Sub'"
+ " attr {key: 'T' value { type: DT_FLOAT } }"
+ " input: ['E', 'Y']}");
+ EXPECT_EQ(DoNodeMerge(),
+ "A(Input);B(Input);D(Input);DMT/_0(Const);E(MklConv2DWithBias);"
+ "M(MklInput);N(MklInput);Y(Input);Z(Sub)|A->E;B->E:2;D->E:4;"
+ "DMT/_0->E:5;E->Z;M->E:1;N->E:3;Y->Z:1");
+}
+
+// C=Conv2D(A,B); E=BiasAdd(C,D); Z=Sub(E,Y);
+// We do not merge in this case as op is Conv2D and not MklConv2D.
+TEST_F(OptimizerMergeTest, Conv2DWithBias_Negative_NoMklConv2D) {
+ InitGraph(
+ "node { name: 'A' op: 'Input'}"
"node { name: 'B' op: 'Input'}"
"node { name: 'C' op: 'Conv2D'"
" attr { key: 'T' value { type: DT_FLOAT } }"
@@ -143,63 +174,69 @@ TEST_F(OptimizerMergeTest, Conv2DWithBias_Positive) {
" attr {key: 'T' value { type: DT_FLOAT } }"
" input: ['E', 'Y']}");
EXPECT_EQ(DoNodeMerge(),
- "A(Input);B(Input);D(Input);E(Conv2DWithBias);Y(Input);Z(Sub)|"
- "A->E;B->E:1;D->E:2;E->Z;Y->Z:1");
+ "A(Input);B(Input);C(Conv2D);D(Input);E(BiasAdd);Y(Input);Z(Sub)|"
+ "A->C;B->C:1;C->E;D->E:1;E->Z;Y->Z:1");
}
-// Graph contains only Conv2D, no AddBias.
+// Graph contains only MklConv2D, no AddBias.
TEST_F(OptimizerMergeTest, Conv2DWithBias_Negative_NoAddBias) {
InitGraph(
"node { name: 'A' op: 'Input'}"
+ "node { name: 'M' op: 'MklInput'}"
"node { name: 'B' op: 'Input'}"
- "node { name: 'C' op: 'Conv2D'"
+ "node { name: 'N' op: 'MklInput'}"
+ "node { name: 'C' op: 'MklConv2D'"
" attr { key: 'T' value { type: DT_FLOAT } }"
" attr { key: 'data_format' value { s: 'NCHW' } }"
" attr { key: 'use_cudnn_on_gpu' value { b: false } }"
" attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }"
" attr { key: 'padding' value { s: 'SAME' } }"
- " input: ['A', 'B']}");
+ " input: ['A', 'M', 'B', 'N']}");
EXPECT_EQ(DoNodeMerge(),
- "A(Input);B(Input);C(Conv2D)|"
- "A->C;B->C:1");
+ "A(Input);B(Input);C(MklConv2D);M(MklInput);N(MklInput)|"
+ "A->C;B->C:2;M->C:1;N->C:3");
}
-// Conv2D output does not go to BiasAdd.
+// MklConv2D output does not go to BiasAdd.
TEST_F(OptimizerMergeTest, Conv2DWithBias_Negative_Dataflow1) {
InitGraph(
"node { name: 'A' op: 'Input'}"
+ "node { name: 'M' op: 'MklInput'}"
"node { name: 'B' op: 'Input'}"
- "node { name: 'C' op: 'Conv2D'"
+ "node { name: 'N' op: 'MklInput'}"
+ "node { name: 'C' op: 'MklConv2D'"
" attr { key: 'T' value { type: DT_FLOAT } }"
" attr { key: 'data_format' value { s: 'NCHW' } }"
" attr { key: 'use_cudnn_on_gpu' value { b: false } }"
" attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }"
" attr { key: 'padding' value { s: 'SAME' } }"
- " input: ['A', 'B']}"
+ " input: ['A', 'M', 'B', 'N']}"
"node { name: 'D' op: 'Input'}"
"node { name: 'E' op: 'Input'}"
"node { name: 'F' op: 'BiasAdd'"
" attr { key: 'T' value { type: DT_FLOAT } }"
" attr { key: 'data_format' value { s: 'NCHW' } }"
- " input: ['D', 'E'] }"); // Output of Conv2D does not go to BiasAdd.
+ " input: ['D', 'E'] }"); // Output of MklConv2D does not go to BiasAdd.
EXPECT_EQ(DoNodeMerge(),
- "A(Input);B(Input);C(Conv2D);D(Input);E(Input);F(BiasAdd)|"
- "A->C;B->C:1;D->F;E->F:1");
+ "A(Input);B(Input);C(MklConv2D);D(Input);E(Input);F(BiasAdd);"
+ "M(MklInput);N(MklInput)|A->C;B->C:2;D->F;E->F:1;M->C:1;N->C:3");
}
-// Conv2D has two outgoing edges: BiasAdd and some other dummy node (Add).
+// MklConv2D has two outgoing edges: BiasAdd and some other dummy node (Add).
// Merge should not be done in such case.
TEST_F(OptimizerMergeTest, Conv2DWithBias_Negative_Dataflow2) {
InitGraph(
"node { name: 'A' op: 'Input'}"
+ "node { name: 'M' op: 'MklInput'}"
"node { name: 'B' op: 'Input'}"
- "node { name: 'C' op: 'Conv2D'"
+ "node { name: 'N' op: 'MklInput'}"
+ "node { name: 'C' op: 'MklConv2D'"
" attr { key: 'T' value { type: DT_FLOAT } }"
" attr { key: 'data_format' value { s: 'NCHW' } }"
" attr { key: 'use_cudnn_on_gpu' value { b: false } }"
" attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }"
" attr { key: 'padding' value { s: 'SAME' } }"
- " input: ['A', 'B']}"
+ " input: ['A', 'M', 'B', 'N']}"
"node { name: 'D' op: 'Input'}"
"node { name: 'E' op: 'Input'}"
"node { name: 'F' op: 'BiasAdd'"
@@ -211,8 +248,9 @@ TEST_F(OptimizerMergeTest, Conv2DWithBias_Negative_Dataflow2) {
" attr { key: 'T' value { type: DT_FLOAT } }"
" input: ['C', 'E'] }");
EXPECT_EQ(DoNodeMerge(),
- "A(Input);B(Input);C(Conv2D);D(Input);E(Input);F(BiasAdd);G(Add)|"
- "A->C;B->C:1;C->G;D->F;E->F:1;E->G:1");
+ "A(Input);B(Input);C(MklConv2D);D(Input);E(Input);F(BiasAdd);"
+ "G(Add);M(MklInput);N(MklInput)|A->C;B->C:2;C->G;D->F;"
+ "E->F:1;E->G:1;M->C:1;N->C:3");
}
// data_format attribute value mismatch. Merge should not be done
@@ -220,30 +258,65 @@ TEST_F(OptimizerMergeTest, Conv2DWithBias_Negative_Dataflow2) {
TEST_F(OptimizerMergeTest, Conv2DWithBias_Negative_AttrMismatch) {
InitGraph(
"node { name: 'A' op: 'Input'}"
+ "node { name: 'M' op: 'MklInput'}"
"node { name: 'B' op: 'Input'}"
- "node { name: 'C' op: 'Conv2D'"
+ "node { name: 'N' op: 'MklInput'}"
+ "node { name: 'C' op: 'MklConv2D'"
" attr { key: 'T' value { type: DT_FLOAT } }"
" attr { key: 'data_format' value { s: 'NCHW' } }"
" attr { key: 'use_cudnn_on_gpu' value { b: false } }"
" attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }"
" attr { key: 'padding' value { s: 'SAME' } }"
- " input: ['A', 'B']}"
+ " input: ['A', 'M', 'B', 'N']}"
"node { name: 'D' op: 'Input'}"
"node { name: 'E' op: 'BiasAdd'"
" attr { key: 'T' value { type: DT_FLOAT } }"
" attr { key: 'data_format' value { s: 'NHCW' } }"
" input: ['C', 'D'] }");
EXPECT_EQ(DoNodeMerge(),
- "A(Input);B(Input);C(Conv2D);D(Input);E(BiasAdd)|"
- "A->C;B->C:1;C->E;D->E:1");
+ "A(Input);B(Input);C(MklConv2D);D(Input);E(BiasAdd);M(MklInput);"
+ "N(MklInput)|A->C;B->C:2;C->E;D->E:1;M->C:1;N->C:3");
}
-// Test set 2: Conv2D..BiasAddGrad -> Conv2DWithBiasBackpropBias rewrite tests
+#if 0
+// This test set is disabled temporarily as we do not enable node rewrite.
+// This test set will be enabled when we support Mkl-specific kernels for
+// backward bias.
+//
+// Test set 2: MklConv2D..BiasAddGrad -> Conv2DWithBiasBackpropBias
+// rewrite tests
-// C=Conv2D(A,B); D=Sub(C,A); F=BiasAddGrad(D)
+// C=MklConv2D(A,M,B,N); D=Sub(C,A); E=BiasAddGrad(D)
TEST_F(OptimizerMergeTest, Conv2DBackprop_Positive) {
InitGraph(
"node { name: 'A' op: 'Input'}"
+ "node { name: 'M' op: 'MklInput'}"
+ "node { name: 'B' op: 'Input'}"
+ "node { name: 'N' op: 'MklInput'}"
+ "node { name: 'C' op: 'MklConv2D'"
+ " attr { key: 'T' value { type: DT_FLOAT } }"
+ " attr { key: 'data_format' value { s: 'NCHW' } }"
+ " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+ " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }"
+ " attr { key: 'padding' value { s: 'SAME' } }"
+ " input: ['A', 'M', 'B', 'N']}"
+ "node { name: 'D' op: 'Sub'"
+ " attr {key: 'T' value { type: DT_FLOAT } }"
+ " input: ['C', 'A']}"
+ "node { name: 'E' op: 'BiasAddGrad'"
+ " attr { key: 'T' value { type: DT_FLOAT } }"
+ " attr { key: 'data_format' value { s: 'NCHW' } }"
+ " input: ['D'] }");
+ EXPECT_EQ(DoNodeMerge(),
+ "A(Input);B(Input);C(MklConv2D);D(Sub);E(Conv2DWithBiasBackpropBias);"
+ "M(MklInput);N(MklInput)|A->C;A->D:1;B->C:2;C->D;D->E;M->C:1;N->C:3");
+}
+
+// No MklConv2D in context, but Conv2D in context. No rewrite should happen.
+// C=Conv2D(A,B); D=Sub(C,A); E=BiasAddGrad(D)
+TEST_F(OptimizerMergeTest, Conv2DBackprop_Negative_NoMklConv2D) {
+ InitGraph(
+ "node { name: 'A' op: 'Input'}"
"node { name: 'B' op: 'Input'}"
"node { name: 'C' op: 'Conv2D'"
" attr { key: 'T' value { type: DT_FLOAT } }"
@@ -260,12 +333,12 @@ TEST_F(OptimizerMergeTest, Conv2DBackprop_Positive) {
" attr { key: 'data_format' value { s: 'NCHW' } }"
" input: ['D'] }");
EXPECT_EQ(DoNodeMerge(),
- "A(Input);B(Input);C(Conv2D);D(Sub);E(Conv2DWithBiasBackpropBias)|"
+ "A(Input);B(Input);C(Conv2D);D(Sub);E(BiasAddGrad)|"
"A->C;A->D:1;B->C:1;C->D;D->E");
}
// No Conv2D in the context for BiasAddGrad. No rewrite should happen.
-// C=Add(A,B); D=Sub(C,A); F=BiasAddGrad(D,E)
+// C=Add(A,B); D=Sub(C,A); E=BiasAddGrad(D)
TEST_F(OptimizerMergeTest, Conv2DBackprop_Negative_NoConv2D) {
InitGraph(
"node { name: 'A' op: 'Input'}"
@@ -287,7 +360,7 @@ TEST_F(OptimizerMergeTest, Conv2DBackprop_Negative_NoConv2D) {
// No Conv2D in the context for BiasAddGrad, but MatMul in context.
// Rewrite should happen, but name of BiasAddGrad does not change.
-// C=MatMul(A,B); D=Sub(C,A); F=BiasAddGrad(D,E)
+// C=MatMul(A,B); D=Sub(C,A); E=BiasAddGrad(D)
TEST_F(OptimizerMergeTest, Conv2DBackprop_Negative_NoConv2D_MatMul) {
InitGraph(
"node { name: 'A' op: 'Input'}"
@@ -310,7 +383,7 @@ TEST_F(OptimizerMergeTest, Conv2DBackprop_Negative_NoConv2D_MatMul) {
}
// Test set 3: MatMul..BiasAddGrad -> BiasAddGrad rewrite tests
-// C=MatMul(A,B); D=Sub(C,A); F=BiasAddGrad(D,E)
+// C=MatMul(A,B); D=Sub(C,A); E=BiasAddGrad(D)
TEST_F(OptimizerMergeTest, MatMulBiasAddGrad_Positive) {
InitGraph(
"node { name: 'A' op: 'Input'}"
@@ -333,7 +406,7 @@ TEST_F(OptimizerMergeTest, MatMulBiasAddGrad_Positive) {
}
// No MatMul in the context for BiasAddGrad. No rewrite should happen.
-// C=Add(A,B); D=Sub(C,A); F=BiasAddGrad(D,E)
+// C=Add(A,B); D=Sub(C,A); E=BiasAddGrad(D)
TEST_F(OptimizerMergeTest, MatMulBiasAddGrad_Negative_NoMatMul) {
InitGraph(
"node { name: 'A' op: 'Input'}"
@@ -352,7 +425,7 @@ TEST_F(OptimizerMergeTest, MatMulBiasAddGrad_Negative_NoMatMul) {
"A(Input);B(Input);C(Add);D(Sub);E(BiasAddGrad)|"
"A->C;A->D:1;B->C:1;C->D;D->E");
}
-
+#endif
static void BM_NodeMerge(int iters, int op_nodes) {
testing::StopTiming();
diff --git a/tensorflow/core/graph/mkl_tfconversion_pass.cc b/tensorflow/core/graph/mkl_tfconversion_pass.cc
new file mode 100644
index 0000000000..1e7b5e7094
--- /dev/null
+++ b/tensorflow/core/graph/mkl_tfconversion_pass.cc
@@ -0,0 +1,271 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifdef INTEL_MKL
+
+#include <set>
+#include <vector>
+#include <queue>
+#include <utility>
+#include <string>
+#include <memory>
+
+#include "tensorflow/core/framework/node_def_util.h"
+#include "tensorflow/core/graph/algorithm.h"
+#include "tensorflow/core/graph/node_builder.h"
+#include "tensorflow/core/lib/gtl/map_util.h"
+#include "tensorflow/core/lib/hash/hash.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/common_runtime/function.h"
+#include "tensorflow/core/graph/graph.h"
+#include "tensorflow/core/common_runtime/optimization_registry.h"
+
+#include "tensorflow/core/graph/mkl_tfconversion_pass.h"
+#include "tensorflow/core/util/mkl_util.h"
+
+namespace tensorflow {
+
+// This pass inserts Mkl to Tf tensor conversion nodes (represented by C)
+// in the graph in between A and B, where A and B match any one
+// of the following
+// cases:
+// 1) A = layer/Op that generates output in Mkl format and,
+// B = layer/Op that does not accept input in Mkl format and,
+// A -> B (there is a direct edge between A and B, then
+// We will insert C such that A->C->B.
+//
+// 2) A = layer/Op that generates output in Mkl format and,
+// B = NULL (in other words, A is the last layer in the graph), then
+// We will insert C such that A->C->B. (C will be the last layer.)
+//
+// Note that case 1 applies to all outputs of A that are input to B.
+// In other words, the conversions will be required for every output
+// of A that is input to B. For example, let us say the output of A
+// is A1, A2, A3, of which A1 and A2 are in Mkl format, but A3 is not
+// in Mkl format, and all of them are input to B. In such case, we will
+// do the conversion for A1 and A2 only. We do not need to do any conversion
+// for A3.
+//
+// This pass relies on layers registering themselves about their Mkl compliant.
+// Mkl compliant layer can accept inputs in Mkl format, and produce output in
+// Mkl format. Non-compliant layer accepts inputs and outputs in
+// TensorFlow format.
+//
+class MklToTfConversionPass : public GraphOptimizationPass {
+ public:
+ MklToTfConversionPass() {}
+ Status Run(const GraphOptimizationPassOptions& options);
+
+ // Insert layout conversion node in the graph pointed by g.
+ // Function scans the graph for candidate edges where we
+ // need to insert conversion nodes.
+ //
+ // @return true even if single conversion node is inserted;
+ // false, otherwise.
+ bool RunPass(std::unique_ptr<Graph>* g);
+
+
+ private:
+ // Is the input Op supported by Mkl-specific layout?
+ //
+ // @input op_name string of the op
+ // @return true if op is Mkl supported; false, otherwise.
+ inline bool IsMklSupportedOp(const string& op_name) const {
+ return mkl_layer_registry::IsMklLayer(op_name);
+ }
+
+ // Insert layout conversion node on the edge pointed by 'e' from graph 'g'.
+ //
+ // Edge will be deleted once a call to this function is successful.
+ // Any attempt to use the edge after this call
+ // will lead to undefined behaviors.
+ //
+ // @return Success:OK() if insertion is successful, otherwise returns
+ // appropriate error status code.
+ Status InsertConversionNodeOnEdge(std::unique_ptr<Graph>* g, Edge*);
+};
+
+// We register MklToTf insertion for phase 1 in post-partition grouping.
+// We register this pass after partitioning so that we get a complete
+// picture of inputs and outputs of the nodes in the graphs.
+const OptimizationPassRegistry::Grouping kMklTfConvPassGroup =
+ OptimizationPassRegistry::POST_PARTITIONING;
+REGISTER_OPTIMIZATION(kMklTfConvPassGroup, 1, MklToTfConversionPass);
+
+Status MklToTfConversionPass::InsertConversionNodeOnEdge(
+ std::unique_ptr<Graph>* g, Edge *e) {
+ CHECK_NOTNULL(e);
+
+ Node* src = e->src();
+ Node* dst = e->dst();
+
+ CHECK_NOTNULL(src);
+ CHECK_NOTNULL(dst);
+
+ Node* conversion_node = nullptr;
+ DataType src_datatype = DT_INVALID;
+ DataType dst_datatype = DT_INVALID;
+ string data_format;
+
+ TF_CHECK_OK(GetNodeAttr(src->def(), "T", &src_datatype));
+ TF_CHECK_OK(GetNodeAttr(dst->def(), "T", &dst_datatype));
+ if (src_datatype != dst_datatype) {
+ string err_msg = "T attribute of " + src->name() + " and " +
+ dst->name() + " do not match. Will not insert" +
+ " MklToTf node in such case.";
+ return Status(error::Code::INVALID_ARGUMENT, err_msg.c_str());
+ }
+
+ // Lets build the conversion node and specify src as input.
+ TF_CHECK_OK(NodeBuilder((*g)->NewName("Mkl2Tf"), "MklToTf")
+ .Input(src, e->src_output())
+ .Input(src, e->src_output()+1) // Mkl tensor immediately
+ // follows Tf tensor.
+ .Device(src->def().device()) // We want to get conversion node
+ // on same device as source node.
+ .Attr("T", src_datatype)
+ .Finalize(&**g, &conversion_node));
+
+ CHECK_NOTNULL(conversion_node);
+ if (GetNodeAttr(src->def(), "data_format", &data_format) == Status::OK()) {
+ conversion_node->AddAttr("data_format", data_format);
+ }
+
+ // Get assigned device from source node and apply it to conversion node.
+ // We want conversion node to be on the same device as the source node.
+ conversion_node->set_assigned_device_name(src->assigned_device_name());
+
+ // Set the Mkl layer label for this op.
+ conversion_node->AddAttr("_kernel", mkl_layer_registry::kMklLayerLabel);
+
+ // Now that we have added edge from src->conversion_node, let's add edge from
+ // output of conversion_node to the dest node. Since conversion_node
+ // has only 1 output, the src_output of conversion_node is 0.
+ CHECK_NOTNULL((*g)->AddEdge(conversion_node, 0, dst, e->dst_input()));
+
+ VLOG(1) << "MklToTfConversionPass: Inserting Conversion node on: "
+ << src->type_string() << " and " << dst->type_string()
+ << " successful.";
+
+ // Remove src->dst edge now.
+ (*g)->RemoveEdge(e);
+ return Status::OK();
+}
+
+bool MklToTfConversionPass::RunPass(std::unique_ptr<Graph>* g) {
+ bool result = false;
+
+ CHECK_NOTNULL(g);
+
+ DumpGraph("Before MklToTfConversionPass", &**g);
+
+ // Since we are looking for mkl-supported op node immediately
+ // followed by non-mkl op node, we will just iterate over edge
+ // set of the graph.
+ // vector to maintain candiadate edges whose source and destination
+ // are candidate for inserting conversion node
+ std::vector<Edge*> candidate_edges;
+
+ for (const Edge *e : (*g)->edges()) {
+ Node* src = e->src();
+ Node* dst = e->dst();
+
+ // We skip control edges.
+ if (e->IsControlEdge()) {
+ continue;
+ }
+
+ VLOG(1) << "MklToTfConversionPass: InsertConversionNodes: "
+ << src->type_string() << " and " << dst->type_string();
+
+ // Let's get source and destination data type.
+ DataType src_datatype = DT_INVALID;
+ if (GetNodeAttr(src->def(), "T", &src_datatype) != Status::OK()) {
+ continue;
+ }
+ // We cannot check datatype on destination node because destination node
+ // may not be Mkl node.
+ DataType dst_datatype = DT_INVALID;
+ GetNodeAttr(dst->def(), "T", &dst_datatype);
+
+ // Check if src with is Mkl-compliant, while dst is not Mkl-compliant.
+ if (IsMklSupportedOp(src->type_string()) &&
+ !IsMklSupportedOp(dst->type_string())) {
+ VLOG(1) << "MklToTfConversionPass: Scheduled nodes " << src->name()
+ << " and " << dst->name() << " for inserting conversion nodes";
+ candidate_edges.push_back(const_cast<Edge*>(e));
+ }
+ }
+
+ // Process all candidate edges and insert conversion nodes on them.
+ for (Edge* e : candidate_edges) {
+ // Even if we insert conversion node on a single edge, we
+ // need to return true.
+ string src_name = e->src()->name();
+ string dst_name = e->dst()->name();
+ if (InsertConversionNodeOnEdge(g, e) == Status::OK()) {
+ VLOG(1) << "MklToTfConversionPass: Inserted conversion "
+ << "node on edge between " << src_name << " and " << dst_name;
+ result = true;
+ }
+ }
+
+ DumpGraph("After MklToTfConversionPass", &**g);
+
+ // We need to return true even if we insert one conversion node
+ // anywhere in the graph.
+ return result;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Run function for the pass
+//////////////////////////////////////////////////////////////////////////////
+
+bool InsertMklToTfConversionNodes(std::unique_ptr<Graph>* g) {
+ return MklToTfConversionPass().RunPass(g);
+}
+
+Status MklToTfConversionPass::Run(
+ const GraphOptimizationPassOptions& options) {
+ if (options.graph == nullptr && options.partition_graphs == nullptr) {
+ return Status::OK();
+ }
+
+ auto process_graph = [&](std::unique_ptr<Graph>* g) {
+ // Get the ownership of graph
+ std::unique_ptr<Graph>* ng = std::move(g);
+ RunPass(ng);
+ // Return the ownership of graph back
+ g->reset(ng->release());
+ };
+
+ if (kMklTfConvPassGroup != OptimizationPassRegistry::POST_PARTITIONING) {
+ // For any pre-partitioning phase, graph is stored in options.graph.
+ process_graph(options.graph);
+ } else {
+ // For post partitioning phase, graphs are stored in
+ // options.partition_graphs.
+ for (auto& pg : *options.partition_graphs) {
+ process_graph(&pg.second);
+ }
+ }
+
+ return Status::OK();
+}
+
+} // namespace tensorflow
+
+#endif
diff --git a/tensorflow/core/graph/mkl_tfconversion_pass.h b/tensorflow/core/graph/mkl_tfconversion_pass.h
new file mode 100644
index 0000000000..0562d8b3cd
--- /dev/null
+++ b/tensorflow/core/graph/mkl_tfconversion_pass.h
@@ -0,0 +1,36 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// An optimization pass that inserts MklToTf conversion nodes in the graph
+
+#ifndef TENSORFLOW_CORE_GRAPH_MKL_TFCONVERSION_PASS_H_
+#define TENSORFLOW_CORE_GRAPH_MKL_TFCONVERSION_PASS_H_
+
+#ifdef INTEL_MKL
+
+#include <sys/types.h>
+#include <memory>
+#include "tensorflow/core/graph/graph.h"
+
+namespace tensorflow {
+// Interface to invoke the pass for unit test
+//
+// Returns true if and only if 'g' is mutated.
+extern bool InsertMklToTfConversionNodes(std::unique_ptr<Graph>* g);
+} // namespace tensorflow
+
+#endif
+
+#endif // TENSORFLOW_CORE_GRAPH_MKL_TFCONVERSION_PASS_H_
diff --git a/tensorflow/core/graph/mkl_tfconversion_pass_test.cc b/tensorflow/core/graph/mkl_tfconversion_pass_test.cc
new file mode 100644
index 0000000000..103ff295b3
--- /dev/null
+++ b/tensorflow/core/graph/mkl_tfconversion_pass_test.cc
@@ -0,0 +1,243 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifdef INTEL_MKL
+
+#include "tensorflow/core/graph/mkl_tfconversion_pass.h"
+
+#include <vector>
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/graph/graph.h"
+#include "tensorflow/core/graph/graph_constructor.h"
+#include "tensorflow/core/graph/testlib.h"
+#include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/lib/random/simple_philox.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/protobuf.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/test_benchmark.h"
+
+namespace tensorflow {
+namespace {
+
+class MklToTfConversionPass : public ::testing::Test {
+ public:
+ MklToTfConversionPass() : graph_(OpRegistry::Global()) {}
+
+ static void InitGraph(const string& s, Graph* graph) {
+ GraphDef graph_def;
+
+ auto parser = protobuf::TextFormat::Parser();
+ CHECK(parser.MergeFromString(s, &graph_def)) << s;
+ GraphConstructorOptions opts;
+ TF_CHECK_OK(ConvertGraphDefToGraph(opts, graph_def, graph));
+ }
+
+ void InitGraph(const string& s) {
+ InitGraph(s, &graph_);
+ original_ = CanonicalGraphString(&graph_);
+ }
+
+ static bool IncludeNode(const Node* n) { return n->IsOp(); }
+
+ static string EdgeId(const Node* n, int index) {
+ if (index == 0) {
+ return n->name();
+ } else if (index == Graph::kControlSlot) {
+ return strings::StrCat(n->name(), ":control");
+ } else {
+ return strings::StrCat(n->name(), ":", index);
+ }
+ }
+
+ string CanonicalGraphString(Graph* g) {
+ std::vector<string> nodes;
+ std::vector<string> edges;
+ for (const Node* n : g->nodes()) {
+ if (IncludeNode(n)) {
+ nodes.push_back(strings::StrCat(n->name(), "(", n->type_string(), ")"));
+ }
+ }
+ for (const Edge* e : g->edges()) {
+ if (IncludeNode(e->src()) && IncludeNode(e->dst())) {
+ edges.push_back(strings::StrCat(EdgeId(e->src(), e->src_output()), "->",
+ EdgeId(e->dst(), e->dst_input())));
+ }
+ }
+ // Canonicalize
+ std::sort(nodes.begin(), nodes.end());
+ std::sort(edges.begin(), edges.end());
+ return strings::StrCat(str_util::Join(nodes, ";"), "|",
+ str_util::Join(edges, ";"));
+ }
+
+ string DoRunMklToTfConversionPass() {
+ string before = CanonicalGraphString(&graph_);
+ LOG(ERROR) << "Before MklToTf conversion pass: " << before;
+
+ std::unique_ptr<Graph>* ug = new std::unique_ptr<Graph>(&graph_);
+ InsertMklToTfConversionNodes(ug);
+
+ string result = CanonicalGraphString(&graph_);
+ LOG(ERROR) << "After MklToTf conversion pass: " << result;
+ return result;
+ }
+
+ const string& OriginalGraph() const { return original_; }
+
+ Graph graph_;
+ string original_;
+};
+
+REGISTER_OP("Input").Output("o: float").SetIsStateful();
+REGISTER_OP("HalfInput").Output("o: half").SetIsStateful();
+REGISTER_OP("MklInput").Output("o: uint8").SetIsStateful();
+
+TEST_F(MklToTfConversionPass, Basic) {
+ InitGraph(
+ "node { name: 'A' op: 'Input'}"
+ "node { name: 'B' op: 'Input'}"
+ "node { name: 'C' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+ " input: ['A', 'B'] }"
+ "node { name: 'D' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+ " input: ['A', 'B'] }");
+ EXPECT_EQ(DoRunMklToTfConversionPass(),
+ "A(Input);B(Input);C(Mul);D(Mul)|"
+ "A->C;A->D;B->C:1;B->D:1");
+}
+
+// MklConv2D followed by Non-Mkl layer
+// C=MklConv2D(A,M,B,N); E=Sub(C,D)
+TEST_F(MklToTfConversionPass, Positive) {
+ InitGraph(
+ "node { name: 'A' op: 'Input'}"
+ "node { name: 'M' op: 'MklInput'}"
+ "node { name: 'B' op: 'Input'}"
+ "node { name: 'N' op: 'MklInput'}"
+ "node { name: 'C' op: 'MklConv2D'"
+ " attr { key: 'T' value { type: DT_FLOAT } }"
+ " attr { key: 'data_format' value { s: 'NCHW' } }"
+ " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+ " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }"
+ " attr { key: 'padding' value { s: 'SAME' } }"
+ " input: ['A', 'M', 'B', 'N']}"
+ "node { name: 'D' op: 'Input'}"
+ "node { name: 'E' op: 'Sub'"
+ " attr {key: 'T' value { type: DT_FLOAT } }"
+ " input: ['C', 'D']}");
+ EXPECT_EQ(DoRunMklToTfConversionPass(),
+ "A(Input);B(Input);C(MklConv2D);D(Input);E(Sub);M(MklInput);"
+ "Mkl2Tf/_0(MklToTf);N(MklInput)|A->C;B->C:2;C->Mkl2Tf/_0;"
+ "C:1->Mkl2Tf/_0:1;D->E:1;M->C:1;Mkl2Tf/_0->E;N->C:3");
+}
+
+// MklConv2D followed by Non-Mkl layer, and MklConv2D uses half type
+// C=MklConv2D(A,M,B,N); E=Sub(C,D)
+// MklToTf node should be inserted.
+TEST_F(MklToTfConversionPass, Positive_Type) {
+ InitGraph(
+ "node { name: 'A' op: 'HalfInput'}"
+ "node { name: 'M' op: 'MklInput'}"
+ "node { name: 'B' op: 'HalfInput'}"
+ "node { name: 'N' op: 'MklInput'}"
+ "node { name: 'C' op: 'MklConv2D'"
+ " attr { key: 'T' value { type: DT_HALF } }"
+ " attr { key: 'data_format' value { s: 'NCHW' } }"
+ " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+ " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }"
+ " attr { key: 'padding' value { s: 'SAME' } }"
+ " input: ['A', 'M', 'B', 'N']}"
+ "node { name: 'D' op: 'HalfInput'}"
+ "node { name: 'E' op: 'Sub'"
+ " attr {key: 'T' value { type: DT_HALF } }"
+ " input: ['C', 'D']}");
+ EXPECT_EQ(DoRunMklToTfConversionPass(),
+ "A(HalfInput);B(HalfInput);C(MklConv2D);D(HalfInput);"
+ "E(Sub);M(MklInput);Mkl2Tf/_0(MklToTf);N(MklInput)|"
+ "A->C;B->C:2;C->Mkl2Tf/_0;C:1->Mkl2Tf/_0:1;D->E:1;"
+ "M->C:1;Mkl2Tf/_0->E;N->C:3");
+}
+
+// C=Conv2D(A,B); E=BiasAdd(C,D); Z=Sub(E,Y);
+// There is no Mkl layer so no conversion op should be inserted.
+TEST_F(MklToTfConversionPass, Negative_NoMklLayer) {
+ InitGraph(
+ "node { name: 'A' op: 'Input'}"
+ "node { name: 'B' op: 'Input'}"
+ "node { name: 'C' op: 'Conv2D'"
+ " attr { key: 'T' value { type: DT_FLOAT } }"
+ " attr { key: 'data_format' value { s: 'NCHW' } }"
+ " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+ " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }"
+ " attr { key: 'padding' value { s: 'SAME' } }"
+ " input: ['A', 'B']}"
+ "node { name: 'D' op: 'Input'}"
+ "node { name: 'E' op: 'BiasAdd'"
+ " attr { key: 'T' value { type: DT_FLOAT } }"
+ " attr { key: 'data_format' value { s: 'NCHW' } }"
+ " input: ['C', 'D'] }"
+ "node { name: 'Y' op: 'Input'}"
+ "node { name: 'Z' op: 'Sub'"
+ " attr {key: 'T' value { type: DT_FLOAT } }"
+ " input: ['E', 'Y']}");
+ EXPECT_EQ(DoRunMklToTfConversionPass(),
+ "A(Input);B(Input);C(Conv2D);D(Input);E(BiasAdd);Y(Input);Z(Sub)|"
+ "A->C;B->C:1;C->E;D->E:1;E->Z;Y->Z:1");
+}
+
+static void BM_RunMklToTfConversionPass(int iters, int op_nodes) {
+ testing::StopTiming();
+ string s;
+ for (int in = 0; in < 10; in++) {
+ s += strings::Printf("node { name: 'in%04d' op: 'Input'}", in);
+ }
+ random::PhiloxRandom philox(301, 17);
+ random::SimplePhilox rnd(&philox);
+ for (int op = 0; op < op_nodes; op++) {
+ s += strings::Printf(
+ "node { name: 'op%04d' op: 'Mul' attr { key: 'T' value { "
+ "type: DT_FLOAT } } input: ['in%04d', 'in%04d' ] }",
+ op, rnd.Uniform(10), rnd.Uniform(10));
+ }
+
+ bool first = true;
+ while (iters > 0) {
+ Graph* graph = new Graph(OpRegistry::Global());
+ MklToTfConversionPass::InitGraph(s, graph);
+ int N = graph->num_node_ids();
+ if (first) {
+ testing::SetLabel(strings::StrCat("Per graph node. Nodes: ", N));
+ first = false;
+ }
+ {
+ testing::StartTiming();
+ std::unique_ptr<Graph> ug(graph);
+ InsertMklToTfConversionNodes(&ug);
+ testing::StopTiming();
+ }
+ iters -= N; // Our benchmark units are individual graph nodes,
+ // not whole graphs
+ // delete graph;
+ }
+}
+BENCHMARK(BM_RunMklToTfConversionPass)->Arg(1000)->Arg(10000);
+
+} // namespace
+} // namespace tensorflow
+
+#endif /* INTEL_MKL */
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 9740f96a6d..3b79d4c3db 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -688,8 +688,15 @@ tf_kernel_library(
tf_kernel_library(
name = "transpose_op",
- prefix = "transpose_op",
- deps = ARRAY_DEPS,
+ srcs = [
+ "transpose_op.cc",
+ ] + if_mkl([
+ "mkl_transpose_op.cc",
+ ]),
+ hdrs = ["transpose_op.h"],
+ deps = ARRAY_DEPS + if_mkl([
+ "//third_party/mkl:intel_binary_blob",
+ ]),
)
tf_kernel_library(
@@ -1735,6 +1742,22 @@ tf_cuda_cc_test(
],
)
+tf_cuda_cc_test(
+ name = "resize_benchmark_test",
+ srcs = ["resize_op_benchmark_test.cc"],
+ deps = [
+ ":image",
+ ":ops_testutil",
+ ":ops_util",
+ "//tensorflow/core:core_cpu",
+ "//tensorflow/core:framework",
+ "//tensorflow/core:protos_all_cc",
+ "//tensorflow/core:test",
+ "//tensorflow/core:test_main",
+ "//tensorflow/core:testlib",
+ ],
+)
+
cc_library(
name = "io",
deps = [
@@ -4376,7 +4399,7 @@ tf_cc_test(
if_mkl(
tf_kernel_library(
- name = "mkl_ops",
+ name = "mkl_matmul_op",
prefix = "mkl_matmul",
deps = [
":math",
@@ -4385,6 +4408,40 @@ if_mkl(
),
)
+if_mkl(
+ tf_kernel_library(
+ name = "mkl_conv_op",
+ prefix = "mkl_conv",
+ deps = [
+ ":bounds_check",
+ ":ops_util",
+ "//tensorflow/core:core_cpu",
+ "//tensorflow/core:framework",
+ "//tensorflow/core:lib",
+ "//tensorflow/core:lib_internal",
+ "//tensorflow/core:nn_ops_op_lib",
+ "//third_party/mkl:intel_binary_blob",
+ ],
+ ),
+)
+
+if_mkl(
+ tf_kernel_library(
+ name = "mkl_tfconv_op",
+ prefix = "mkl_tfconv",
+ deps = [
+ ":bounds_check",
+ ":ops_util",
+ "//tensorflow/core:core_cpu",
+ "//tensorflow/core:framework",
+ "//tensorflow/core:lib",
+ "//tensorflow/core:lib_internal",
+ "//tensorflow/core:nn_ops_op_lib",
+ "//third_party/mkl:intel_binary_blob",
+ ],
+ ),
+)
+
# -----------------------------------------------------------------------------
# Google-internal targets. These must be at the end for syncrepo.
diff --git a/tensorflow/core/kernels/adjust_hue_op.cc b/tensorflow/core/kernels/adjust_hue_op.cc
index 09300737c7..e8f32693f7 100644
--- a/tensorflow/core/kernels/adjust_hue_op.cc
+++ b/tensorflow/core/kernels/adjust_hue_op.cc
@@ -1,5 +1,4 @@
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
@@ -12,16 +11,24 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
+#define EIGEN_USE_THREADS
+
+#if GOOGLE_CUDA
+#define EIGEN_USE_GPU
+#endif
+
#include <memory>
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/register_types.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/tensor_types.h"
#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/adjust_hue_op.h"
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/util/work_sharder.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
namespace tensorflow {
@@ -77,6 +84,7 @@ template <class Device>
class AdjustHueOp;
namespace internal {
+
// Helper function to convert a RGB color to H-and-V-range. H is in the range
// of [0, 6] instead of the normal [0, 1]
static void rgb_to_hv_range(float r, float g, float b, float* h, float* v_min,
@@ -185,6 +193,7 @@ static void hv_range_to_rgb(float h, float v_min, float v_max, float* r,
}
} // namespace internal
+
template <>
class AdjustHueOp<CPUDevice> : public AdjustHueOpBase {
public:
@@ -237,4 +246,34 @@ class AdjustHueOp<CPUDevice> : public AdjustHueOpBase {
REGISTER_KERNEL_BUILDER(Name("AdjustHue").Device(DEVICE_CPU),
AdjustHueOp<CPUDevice>);
+#if GOOGLE_CUDA
+template <>
+class AdjustHueOp<GPUDevice> : public AdjustHueOpBase {
+ public:
+ explicit AdjustHueOp(OpKernelConstruction* context)
+ : AdjustHueOpBase(context) {}
+
+ virtual void DoCompute(OpKernelContext* context, const ComputeOptions& options) override {
+ const Tensor* input = options.input;
+ const Tensor* delta = options.delta;
+ Tensor* output = options.output;
+ const int64 number_of_elements = input->NumElements();
+ GPUDevice device = context->eigen_gpu_device();
+ const auto stream = device.stream();
+ OP_REQUIRES(context, stream, errors::Internal("No GPU stream available."));
+ if (number_of_elements > 0) {
+ const float* input_data = input->flat<float>().data();
+ const float* delta_h = delta->flat<float>().data();
+ float* const output_data = output->flat<float>().data();
+ functor::AdjustHueGPU()(&device, number_of_elements, input_data, delta_h,
+ output_data);
+ }
+ }
+};
+
+REGISTER_KERNEL_BUILDER(Name("AdjustHue").Device(DEVICE_GPU), AdjustHueOp<GPUDevice>);
+
+#endif
+
+//} // namespace functor
} // namespace tensorflow
diff --git a/tensorflow/core/kernels/adjust_hue_op.h b/tensorflow/core/kernels/adjust_hue_op.h
new file mode 100644
index 0000000000..5b30bd8540
--- /dev/null
+++ b/tensorflow/core/kernels/adjust_hue_op.h
@@ -0,0 +1,42 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef _TENSORFLOW_CORE_KERNELS_ADJUST_HUE_OP_H
+#define _TENSORFLOW_CORE_KERNELS_ADJUST_HUE_OP_H
+
+#if GOOGLE_CUDA
+#define EIGEN_USE_GPU
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+
+namespace tensorflow {
+
+typedef Eigen::GpuDevice GPUDevice;
+
+namespace functor {
+
+struct AdjustHueGPU {
+ void operator()(
+ GPUDevice* device,
+ const int64 number_of_elements,
+ const float* const input,
+ const float* const delta,
+ float* const output
+ );
+};
+
+} // namespace functor
+} // namespace tensorflow
+
+#endif // GOOGLE_CUDA
+#endif // _TENSORFLOW_CORE_KERNELS_ADJUST_HUE_OP_H
diff --git a/tensorflow/core/kernels/adjust_hue_op_gpu.cu.cc b/tensorflow/core/kernels/adjust_hue_op_gpu.cu.cc
new file mode 100644
index 0000000000..2fc69ed101
--- /dev/null
+++ b/tensorflow/core/kernels/adjust_hue_op_gpu.cu.cc
@@ -0,0 +1,141 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/kernels/adjust_hue_op.h"
+#include "tensorflow/core/util/cuda_kernel_helper.h"
+
+namespace tensorflow {
+namespace internal {
+
+namespace {
+ typedef struct RgbTuple {
+ float r;
+ float g;
+ float b;
+ } RgbTuple;
+
+ typedef struct HsvTuple {
+ float h;
+ float s;
+ float v;
+ } HsvTuple;
+} // anon namespace
+
+__device__ HsvTuple rgb2hsv_cuda(const float r, const float g, const float b)
+{
+ HsvTuple tuple;
+ const float M = fmaxf(r, fmaxf(g, b));
+ const float m = fminf(r, fminf(g, b));
+ const float chroma = M - m;
+ float h = 0.0f, s = 0.0f;
+ // hue
+ if (chroma > 0.0f) {
+ if (M == r) {
+ const float num = (g - b) / chroma;
+ const float sign = copysignf(1.0f, num);
+ h = ((sign < 0.0f) * 6.0f + sign * fmodf(sign * num, 6.0f)) / 6.0f;
+ } else if (M == g) {
+ h = ((b - r) / chroma + 2.0f) / 6.0f;
+ } else {
+ h = ((r - g) / chroma + 4.0f) / 6.0f;
+ }
+ } else {
+ h = 0.0f;
+ }
+ // saturation
+ if (M > 0.0) {
+ s = chroma / M;
+ } else {
+ s = 0.0f;
+ }
+ tuple.h = h;
+ tuple.s = s;
+ tuple.v = M;
+ return tuple;
+}
+
+__device__ RgbTuple hsv2rgb_cuda(const float h, const float s, const float v)
+{
+ RgbTuple tuple;
+ const float new_h = h * 6.0f;
+ const float chroma = v * s;
+ const float x = chroma * (1.0f - fabsf(fmodf(new_h, 2.0f) - 1.0f));
+ const float new_m = v - chroma;
+ const bool between_0_and_1 = new_h >= 0.0f && new_h < 1.0f;
+ const bool between_1_and_2 = new_h >= 1.0f && new_h < 2.0f;
+ const bool between_2_and_3 = new_h >= 2.0f && new_h < 3.0f;
+ const bool between_3_and_4 = new_h >= 3.0f && new_h < 4.0f;
+ const bool between_4_and_5 = new_h >= 4.0f && new_h < 5.0f;
+ const bool between_5_and_6 = new_h >= 5.0f && new_h < 6.0f;
+ tuple.r = chroma * (between_0_and_1 || between_5_and_6) +
+ x * (between_1_and_2 || between_4_and_5) + new_m;
+ tuple.g = chroma * (between_1_and_2 || between_2_and_3) +
+ x * (between_0_and_1 || between_3_and_4) + new_m;
+ tuple.b = chroma * (between_3_and_4 || between_4_and_5) +
+ x * (between_2_and_3 || between_5_and_6) + new_m;
+ return tuple;
+}
+
+__global__ void adjust_hue_nhwc(const int64 number_elements,
+ const float * const __restrict__ input,
+ float * const output,
+ const float * const hue_delta)
+{
+ // multiply by 3 since we're dealing with contiguous RGB bytes for each pixel (NHWC)
+ const int64 idx = (blockDim.x * blockIdx.x + threadIdx.x) * 3;
+ // bounds check
+ if (idx > number_elements - 1) {
+ return;
+ }
+ const float delta = hue_delta[0];
+ const HsvTuple hsv = rgb2hsv_cuda(input[idx], input[idx + 1], input[idx + 2]);
+ // hue adjustment
+ float new_h = fmodf(hsv.h + delta, 1.0f);
+ if (new_h < 0.0f) {
+ new_h = fmodf(1.0f + new_h, 1.0f);
+ }
+ const RgbTuple rgb = hsv2rgb_cuda(new_h, hsv.s, hsv.v);
+ output[idx] = rgb.r;
+ output[idx + 1] = rgb.g;
+ output[idx + 2] = rgb.b;
+}
+} // namespace internal
+
+
+namespace functor {
+
+void AdjustHueGPU::operator()(
+ GPUDevice* device,
+ const int64 number_of_elements,
+ const float* const input,
+ const float* const delta,
+ float* const output
+) {
+ const auto stream = device->stream();
+ const CudaLaunchConfig config = GetCudaLaunchConfig(number_of_elements, *device);
+ const int threads_per_block = config.thread_per_block;
+ const int block_count = (number_of_elements + threads_per_block - 1) / threads_per_block;
+ internal::adjust_hue_nhwc<<<block_count, threads_per_block, 0, stream>>>(
+ number_of_elements, input, output, delta
+ );
+}
+} // namespace functor
+} // namespace tensorflow
+#endif // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/eigen_pooling.h b/tensorflow/core/kernels/eigen_pooling.h
index 492c358a52..f93921d4a5 100644
--- a/tensorflow/core/kernels/eigen_pooling.h
+++ b/tensorflow/core/kernels/eigen_pooling.h
@@ -338,6 +338,7 @@ struct AvgPoolMeanReducer {
// In the case below, 0xd8 implies (false_mask) ? (b) : (a)
// For details, refer to the vpternlogd instruction table at
// http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-vol-2c-manual.pdf
+
#define psel(a, b, false_mask) \
_mm512_castsi512_ps(_mm512_ternarylogic_epi32( \
_mm512_castps_si512(a), _mm512_castps_si512(b), \
diff --git a/tensorflow/core/kernels/fixed_length_record_reader_op.cc b/tensorflow/core/kernels/fixed_length_record_reader_op.cc
index 008ea11017..637a6cef95 100644
--- a/tensorflow/core/kernels/fixed_length_record_reader_op.cc
+++ b/tensorflow/core/kernels/fixed_length_record_reader_op.cc
@@ -40,8 +40,8 @@ class FixedLengthRecordReader : public ReaderBase {
// On success:
// * input_buffer_ != nullptr,
- // * input_buffer_->Tell() == footer_bytes_
- // * file_pos_limit_ == file size - header_bytes_
+ // * input_buffer_->Tell() == header_bytes_
+ // * file_pos_limit_ == file size - footer_bytes_
Status OnWorkStartedLocked() override {
record_number_ = 0;
uint64 file_size = 0;
diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc
new file mode 100644
index 0000000000..93791851b1
--- /dev/null
+++ b/tensorflow/core/kernels/mkl_conv_ops.cc
@@ -0,0 +1,457 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// See docs in ../ops/nn_ops.cc.
+#ifdef INTEL_MKL
+
+#include <string.h>
+#include <map>
+#include <vector>
+#include "tensorflow/core/framework/numeric_op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/tensor_slice.h"
+#include "tensorflow/core/kernels/bounds_check.h"
+#include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/core/lib/strings/numbers.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/util/padding.h"
+#include "tensorflow/core/util/tensor_format.h"
+
+#include "tensorflow/core/util/mkl_util.h"
+#include "third_party/mkl/include/mkl_dnn.h"
+#include "third_party/mkl/include/mkl_dnn_types.h"
+
+namespace tensorflow {
+
+typedef Eigen::ThreadPoolDevice CPUDevice;
+typedef Eigen::GpuDevice GPUDevice;
+
+template <typename Device, typename T, bool biasEnabled>
+class MklConv2DOp : public OpKernel {
+ public:
+ ~MklConv2DOp() {}
+
+ explicit MklConv2DOp(OpKernelConstruction* context) : OpKernel(context) {
+ OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_));
+ string data_format;
+ OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
+ OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
+ errors::InvalidArgument("Invalid data format"));
+ OP_REQUIRES(context, strides_.size() == 4,
+ errors::InvalidArgument("Sliding window strides field must "
+ "specify 4 dimensions"));
+
+ const int64 stride_n = GetTensorDim(strides_, data_format_, 'N');
+ const int64 stride_c = GetTensorDim(strides_, data_format_, 'C');
+ OP_REQUIRES(
+ context, stride_n == 1 && stride_c == 1,
+ errors::InvalidArgument("Current implementation does not yet support "
+ "strides in the batch and depth dimensions."));
+ OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
+ }
+
+ void Compute(OpKernelContext* context) override {
+ const Tensor& input = MklGetInput(context, 0);
+ GetMklShape(context, 0, &(mkl_params_.input_shape));
+ bool input_in_mkl_format = mkl_params_.input_shape.IsMklTensor();
+
+ const Tensor& filter = MklGetInput(context, 1);
+ MklShape mkl_filter_shape;
+ GetMklShape(context, 1, &mkl_filter_shape);
+ CHECK(!mkl_filter_shape.IsMklTensor())
+ << "Conv filter should not be in MKL Layout";
+
+ if (biasEnabled) {
+ const Tensor& bias = MklGetInput(context, 2);
+ OP_REQUIRES(context, bias.dims() == 1,
+ errors::InvalidArgument("bias must be 1-dimensional: ",
+ bias.shape().DebugString()));
+ }
+
+ if (!input_in_mkl_format) {
+ OP_REQUIRES(context, input.dims() == 4,
+ errors::InvalidArgument("input must be 4-dimensional",
+ input.shape().DebugString()));
+ }
+
+ OP_REQUIRES(context, filter.dims() == 4,
+ errors::InvalidArgument("filter must be 4-dimensional: ",
+ filter.shape().DebugString()));
+
+ for (int i = 0; i < 3; i++) {
+ OP_REQUIRES(context, FastBoundsCheck(filter.dim_size(i),
+ std::numeric_limits<int>::max()),
+ errors::InvalidArgument("filter too large"));
+ }
+
+ const int64 input_depth = input_in_mkl_format
+ ? mkl_params_.input_shape.GetSizes()[2]
+ : GetTensorDim(input, data_format_, 'C');
+ OP_REQUIRES(
+ context, input_depth == filter.dim_size(2),
+ errors::InvalidArgument("input and filter must have the same depth: ",
+ input_depth, " vs ", filter.dim_size(2)));
+ // The last dimension for filter is out_depth.
+ const int out_depth = static_cast<int>(filter.dim_size(3));
+
+ // The second dimension for input is rows/height.
+ // The first dimension for filter is rows/height.
+ const int64 input_rows_raw = input_in_mkl_format
+ ? mkl_params_.input_shape.GetSizes()[1]
+ : GetTensorDim(input, data_format_, 'H');
+ OP_REQUIRES(context, FastBoundsCheck(input_rows_raw,
+ std::numeric_limits<int>::max()),
+ errors::InvalidArgument("Input rows too large"));
+ const int input_rows = static_cast<int>(input_rows_raw);
+ const int filter_rows = static_cast<int>(filter.dim_size(0));
+
+ // The third dimension for input is columns/width.
+ // The second dimension for filter is columns/width.
+ const int64 input_cols_raw = input_in_mkl_format
+ ? mkl_params_.input_shape.GetSizes()[0]
+ : GetTensorDim(input, data_format_, 'W');
+ OP_REQUIRES(context, FastBoundsCheck(input_cols_raw,
+ std::numeric_limits<int>::max()),
+ errors::InvalidArgument("Input cols too large"));
+ const int input_cols = static_cast<int>(input_cols_raw);
+ const int filter_cols = static_cast<int>(filter.dim_size(1));
+
+ // The first dimension for input is batch.
+ const int64 input_batch_raw = input_in_mkl_format
+ ? mkl_params_.input_shape.GetSizes()[3]
+ : GetTensorDim(input, data_format_, 'N');
+ OP_REQUIRES(context, FastBoundsCheck(input_batch_raw,
+ std::numeric_limits<int>::max()),
+ errors::InvalidArgument("batch is too large"));
+ const int batch = static_cast<int>(input_batch_raw);
+
+ // For now we take the stride from the second and third dimensions only (we
+ // do not support striding on the batch or depth dimension).
+ const int stride_rows = GetTensorDim(strides_, data_format_, 'H');
+ const int stride_cols = GetTensorDim(strides_, data_format_, 'W');
+
+ int64 out_rows = 0, out_cols = 0, pad_rows = 0, pad_cols = 0;
+ OP_REQUIRES_OK(context,
+ GetWindowedOutputSize(input_rows, filter_rows, stride_rows,
+ padding_, &out_rows, &pad_rows));
+ OP_REQUIRES_OK(context,
+ GetWindowedOutputSize(input_cols, filter_cols, stride_cols,
+ padding_, &out_cols, &pad_cols));
+ TensorShape out_shape =
+ ShapeFromFormat(data_format_, batch, out_rows, out_cols, out_depth);
+
+ // Output tensor is of the following dimensions:
+ // [ in_batch, out_rows, out_cols, out_depth ]
+ Tensor* output = nullptr;
+
+ // If there is nothing to compute, return.
+ if (out_shape.num_elements() == 0) {
+ // TODO(jbobba): Verify correctness here
+ // Need semantics for Null MKL tensor
+ return;
+ }
+
+ if (batch == 0) {
+ // Nothing to do, allocate output tensor and return
+ MklShape mkl_output_mkl_shape;
+ mkl_output_mkl_shape.SetMklTensor(false);
+ AllocateOutputSetMklshape(context, 0, &output, input.shape(),
+ mkl_output_mkl_shape);
+ return;
+ }
+
+ // Create MKL convolution primitives
+ mkl_params_.in_dims = input_in_mkl_format
+ ? mkl_params_.input_shape.GetDimension()
+ : input.dims();
+ mkl_params_.filter_dims = filter.dims();
+ mkl_params_.in_sizes[0] = static_cast<size_t>(input_cols);
+ mkl_params_.in_sizes[1] = static_cast<size_t>(input_rows);
+ mkl_params_.in_sizes[2] = static_cast<size_t>(input_depth);
+ mkl_params_.in_sizes[3] = static_cast<size_t>(batch);
+ mkl_params_.out_sizes[0] = static_cast<size_t>(out_cols);
+ mkl_params_.out_sizes[1] = static_cast<size_t>(out_rows);
+ mkl_params_.out_sizes[2] = static_cast<size_t>(out_depth);
+ mkl_params_.out_sizes[3] = static_cast<size_t>(batch);
+ mkl_params_.input_offset[0] = static_cast<int>(-pad_cols);
+ mkl_params_.input_offset[1] = static_cast<int>(-pad_rows);
+ mkl_params_.conv_stride[0] = static_cast<size_t>(stride_cols);
+ mkl_params_.conv_stride[1] = static_cast<size_t>(stride_rows);
+
+ GetStridesFromSizes(data_format_, mkl_params_.out_strides,
+ mkl_params_.out_sizes);
+ GetStridesFromSizes(data_format_, mkl_params_.in_strides,
+ mkl_params_.in_sizes);
+
+ // TF filter dimension order (out_depth, in_depth, cols, rows) ->
+ // MKL filter dimension order (out_depth, in_depth, rows, cols)
+ mkl_params_.filter_sizes[0] = filter.dim_size(1); // cols
+ mkl_params_.filter_sizes[1] = filter.dim_size(0); // rows
+ mkl_params_.filter_sizes[2] = filter.dim_size(2); // in_depth
+ mkl_params_.filter_sizes[3] = filter.dim_size(3); // out_depth
+
+ // TF filter layout - (rows, cols, in_depth, out_depth)
+ mkl_params_.filter_strides[0] =
+ filter.dim_size(2) * filter.dim_size(3); // cols
+ mkl_params_.filter_strides[1] =
+ filter.dim_size(1) * filter.dim_size(2) * filter.dim_size(3); // rows
+ mkl_params_.filter_strides[2] = filter.dim_size(3); // in_depth
+ mkl_params_.filter_strides[3] = 1; // out_depth
+
+ if (biasEnabled) {
+ const Tensor& bias = MklGetInput(context, 2);
+ mkl_params_.bias_sizes[0] = {static_cast<size_t>(bias.dim_size(0))};
+ mkl_params_.bias_strides[0] = {1};
+ }
+
+ // Create Convolution Primitive
+ if (biasEnabled) {
+ CHECK_EQ(dnnConvolutionCreateForwardBias_F32(
+ &mkl_prim_convolution_fwd_, nullptr,
+ dnnAlgorithmConvolutionDirect, mkl_params_.in_dims,
+ mkl_params_.in_sizes, mkl_params_.out_sizes,
+ mkl_params_.filter_sizes, mkl_params_.conv_stride,
+ mkl_params_.input_offset, dnnBorderZeros),
+ E_SUCCESS);
+ } else {
+ CHECK_EQ(dnnConvolutionCreateForward_F32(
+ &mkl_prim_convolution_fwd_, nullptr,
+ dnnAlgorithmConvolutionDirect, mkl_params_.in_dims,
+ mkl_params_.in_sizes, mkl_params_.out_sizes,
+ mkl_params_.filter_sizes, mkl_params_.conv_stride,
+ mkl_params_.input_offset, dnnBorderZeros),
+ E_SUCCESS);
+ }
+
+ TensorShape mkl_output_tf_shape;
+ MklShape mkl_output_mkl_shape;
+ mkl_output_mkl_shape.SetMklTensor(true);
+ mkl_output_mkl_shape.SetMklLayout(mkl_prim_convolution_fwd_,
+ dnnResourceDst);
+ mkl_output_mkl_shape.SetTfLayout(mkl_params_.in_dims, mkl_params_.out_sizes,
+ mkl_params_.out_strides);
+ mkl_output_tf_shape.AddDim(
+ dnnLayoutGetMemorySize_F32(
+ static_cast<dnnLayout_t>(mkl_output_mkl_shape.GetMklLayout())) /
+ sizeof(T));
+ AllocateOutputSetMklshape(context, 0, &output, mkl_output_tf_shape,
+ mkl_output_mkl_shape);
+ mkl_conv_res_[dnnResourceDst] =
+ static_cast<void*>(output->flat<T>().data());
+
+ MklCreateInputLayouts(context);
+
+ Tensor mkl_tmp_input_buf_tensor, mkl_tmp_filter_buf_tensor,
+ mkl_tmp_bias_buf_tensor; // Temp tensor used to allocate tmp
+ // buffers
+ MklPrepareConvolutionInputs(context, &mkl_tmp_input_buf_tensor,
+ &mkl_tmp_filter_buf_tensor,
+ &mkl_tmp_bias_buf_tensor);
+
+ // Execute convolution
+ CHECK_EQ(dnnExecute_F32(mkl_prim_convolution_fwd_, mkl_conv_res_),
+ E_SUCCESS);
+
+ MklCleanup();
+ }
+
+ private:
+ typedef struct {
+ int in_dims;
+ size_t in_sizes[4];
+ size_t in_strides[4];
+ size_t out_sizes[4];
+ size_t out_strides[4];
+ int filter_dims;
+ size_t filter_sizes[4];
+ size_t filter_strides[4];
+ size_t bias_sizes[1];
+ size_t bias_strides[1];
+ int input_offset[2];
+ size_t conv_stride[2];
+ MklShape input_shape;
+ } MklConv2DOpParams;
+
+ // Create MKL dnnLayout_t objects for tensors coming into the layer
+ void MklCreateInputLayouts(OpKernelContext* context) {
+ bool input_in_mkl_format = mkl_params_.input_shape.IsMklTensor();
+ if (input_in_mkl_format) {
+ mkl_lt_input_ =
+ static_cast<dnnLayout_t>(mkl_params_.input_shape.GetCurLayout());
+ } else {
+ CHECK_EQ(
+ dnnLayoutCreate_F32(&mkl_lt_input_, mkl_params_.in_dims,
+ mkl_params_.in_sizes, mkl_params_.in_strides),
+ E_SUCCESS);
+ }
+
+ CHECK_EQ(dnnLayoutCreate_F32(&mkl_lt_filter_, mkl_params_.filter_dims,
+ mkl_params_.filter_sizes,
+ mkl_params_.filter_strides),
+ E_SUCCESS);
+
+ if (biasEnabled) {
+ CHECK_EQ(dnnLayoutCreate_F32(&mkl_lt_bias_, 1, mkl_params_.bias_sizes,
+ mkl_params_.bias_strides),
+ E_SUCCESS);
+ }
+ }
+
+ // Compare incoming tensor layouts with MKL preferred layouts and convert
+ // data to the preferred layout if necessary
+ void MklPrepareConvolutionInputs(OpKernelContext* context,
+ Tensor* mkl_tmp_input_buf_tensor,
+ Tensor* mkl_tmp_filter_buf_tensor,
+ Tensor* mkl_tmp_bias_buf_tensor) {
+ bool mkl_convert_input, mkl_convert_filter, mkl_convert_bias;
+ dnnPrimitive_t mkl_prim_convert_filter, mkl_prim_convert_bias,
+ mkl_prim_convert_input;
+ dnnLayout_t mkl_lt_internal_filter, mkl_lt_internal_bias,
+ mkl_lt_internal_input;
+ void *mkl_buf_convert_input, *mkl_buf_convert_filter,
+ *mkl_buf_convert_bias;
+ mkl_prim_convert_filter = nullptr;
+ mkl_prim_convert_bias = nullptr;
+ mkl_prim_convert_input = nullptr;
+ mkl_lt_internal_filter = nullptr;
+ mkl_lt_internal_bias = nullptr;
+ mkl_lt_internal_input = nullptr;
+ mkl_buf_convert_input = nullptr;
+ mkl_buf_convert_filter = nullptr;
+ mkl_buf_convert_bias = nullptr;
+
+ // Compare with internal layouts and convert if needed
+ const Tensor& input = MklGetInput(context, 0);
+ void* mkl_buf_input =
+ const_cast<void*>(static_cast<const void*>(input.flat<T>().data()));
+ CHECK_EQ(
+ dnnLayoutCreateFromPrimitive_F32(
+ &mkl_lt_internal_input, mkl_prim_convolution_fwd_, dnnResourceSrc),
+ E_SUCCESS);
+ mkl_convert_input =
+ !dnnLayoutCompare_F32(mkl_lt_internal_input, mkl_lt_input_);
+ if (mkl_convert_input) {
+ CHECK_EQ(dnnConversionCreate_F32(&mkl_prim_convert_input, mkl_lt_input_,
+ mkl_lt_internal_input),
+ E_SUCCESS);
+ AllocTmpBuffer(context, mkl_tmp_input_buf_tensor, mkl_lt_internal_input,
+ &mkl_buf_convert_input);
+ CHECK_EQ(dnnConversionExecute_F32(mkl_prim_convert_input, mkl_buf_input,
+ mkl_buf_convert_input),
+ E_SUCCESS);
+ dnnDelete_F32(mkl_prim_convert_input);
+ }
+ dnnLayoutDelete_F32(mkl_lt_internal_input);
+
+ mkl_conv_res_[dnnResourceSrc] =
+ (mkl_convert_input) ? mkl_buf_convert_input : mkl_buf_input;
+
+ const Tensor& filter = MklGetInput(context, 1);
+ void* mkl_buf_filter =
+ const_cast<void*>(static_cast<const void*>(filter.flat<T>().data()));
+ CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(&mkl_lt_internal_filter,
+ mkl_prim_convolution_fwd_,
+ dnnResourceFilter),
+ E_SUCCESS);
+ mkl_convert_filter =
+ !dnnLayoutCompare_F32(mkl_lt_internal_filter, mkl_lt_filter_);
+ if (mkl_convert_filter) {
+ CHECK_EQ(dnnConversionCreate_F32(&mkl_prim_convert_filter, mkl_lt_filter_,
+ mkl_lt_internal_filter),
+ E_SUCCESS);
+ AllocTmpBuffer(context, mkl_tmp_filter_buf_tensor, mkl_lt_internal_filter,
+ &mkl_buf_convert_filter);
+ CHECK_EQ(dnnConversionExecute_F32(mkl_prim_convert_filter, mkl_buf_filter,
+ mkl_buf_convert_filter),
+ E_SUCCESS);
+ dnnDelete_F32(mkl_prim_convert_filter);
+ }
+ dnnLayoutDelete_F32(mkl_lt_internal_filter);
+
+ mkl_conv_res_[dnnResourceFilter] =
+ (mkl_convert_filter) ? mkl_buf_convert_filter : mkl_buf_filter;
+
+ if (biasEnabled) {
+ const Tensor& bias = MklGetInput(context, 2);
+ void* mkl_buf_bias =
+ const_cast<void*>(static_cast<const void*>(bias.flat<T>().data()));
+ CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(&mkl_lt_internal_bias,
+ mkl_prim_convolution_fwd_,
+ dnnResourceBias),
+ E_SUCCESS);
+ mkl_convert_bias =
+ !dnnLayoutCompare_F32(mkl_lt_internal_bias, mkl_lt_bias_);
+ if (mkl_convert_bias) {
+ CHECK_EQ(dnnConversionCreate_F32(&mkl_prim_convert_bias, mkl_lt_bias_,
+ mkl_lt_internal_bias),
+ E_SUCCESS);
+ AllocTmpBuffer(context, mkl_tmp_bias_buf_tensor, mkl_lt_internal_bias,
+ &mkl_buf_convert_bias);
+ CHECK_EQ(dnnConversionExecute_F32(mkl_prim_convert_bias, mkl_buf_bias,
+ mkl_buf_convert_bias),
+ E_SUCCESS);
+ dnnDelete_F32(mkl_prim_convert_bias);
+ }
+ dnnLayoutDelete_F32(mkl_lt_internal_bias);
+
+ mkl_conv_res_[dnnResourceBias] =
+ (mkl_convert_bias) ? mkl_buf_convert_bias : mkl_buf_bias;
+ }
+ }
+
+ void MklCleanup() {
+ bool input_in_mkl_format = mkl_params_.input_shape.IsMklTensor();
+ dnnDelete_F32(mkl_prim_convolution_fwd_);
+ if (!input_in_mkl_format) dnnLayoutDelete_F32(mkl_lt_input_);
+ dnnLayoutDelete_F32(mkl_lt_filter_);
+ if (biasEnabled) dnnLayoutDelete_F32(mkl_lt_bias_);
+ }
+
+ std::vector<int32> strides_;
+ Padding padding_;
+ TensorFormat data_format_;
+
+ MklConv2DOpParams mkl_params_;
+ dnnPrimitive_t mkl_prim_convolution_fwd_ = nullptr;
+ void* mkl_conv_res_[dnnResourceNumber];
+ dnnLayout_t mkl_lt_filter_ = nullptr, mkl_lt_bias_ = nullptr,
+ mkl_lt_input_ = nullptr;
+
+
+};
+
+#define REGISTER_MKL_CPU(T) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("MklConv2D").Device(DEVICE_CPU) \
+ .TypeConstraint<T>("T") \
+ .Label(mkl_layer_registry::kMklLayerLabel), \
+ MklConv2DOp<CPUDevice, T, false>); \
+ REGISTER_KERNEL_BUILDER( \
+ Name("MklConv2DWithBias").Device(DEVICE_CPU) \
+ .TypeConstraint<T>("T") \
+ .Label(mkl_layer_registry::kMklLayerLabel), \
+ MklConv2DOp<CPUDevice, T, true>);
+
+TF_CALL_float(REGISTER_MKL_CPU);
+
+} // namespace tensorflow
+#endif // INTEL_MKL
diff --git a/tensorflow/core/kernels/mkl_tfconv_op.cc b/tensorflow/core/kernels/mkl_tfconv_op.cc
new file mode 100644
index 0000000000..5925a5b7c1
--- /dev/null
+++ b/tensorflow/core/kernels/mkl_tfconv_op.cc
@@ -0,0 +1,135 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifdef INTEL_MKL
+
+#include <vector>
+#include <algorithm>
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/numeric_op.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/util/tensor_format.h"
+#include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/platform/macros.h"
+
+#include "third_party/mkl/include/mkl_dnn_types.h"
+#include "third_party/mkl/include/mkl_dnn.h"
+#include "tensorflow/core/util/mkl_util.h"
+
+namespace tensorflow {
+ typedef Eigen::ThreadPoolDevice CPUDevice;
+
+///////////////////////////////////////////////////////////
+// Op kernel
+///////////////////////////////////////////////////////////
+
+template <typename Device, typename T>
+class MklToTfOp : public OpKernel {
+ public:
+ explicit MklToTfOp(OpKernelConstruction* context) : OpKernel(context) {
+ OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format_str));
+ OP_REQUIRES_OK(context, context->GetAttr("T", &op_data_type));
+ }
+
+ void Compute(OpKernelContext* context) override {
+ // 1. Check that input tensor is in MKL format.
+ const Tensor& input_tensor = MklGetInput(context, 0);
+ MklShape input_shape;
+ GetMklShape(context, 0, &input_shape);
+
+ // if input is already in Tf format, then just copy input tensor to output.
+ if (!input_shape.IsMklTensor()) {
+ context->set_output(0, input_tensor);
+ VLOG(1) << "MKLToTFConversion: No conversion needed, "
+ << "copying input to output";
+ return;
+ }
+
+ // Check that input data type is same as operator data type and that it is
+ // same as output data type.
+ DataType input_data_type = input_type(0);
+ DataType output_data_type = output_type(0);
+ CHECK_EQ(op_data_type, input_data_type);
+ CHECK_EQ(op_data_type, output_data_type);
+
+ // We need to recreate Tf tensor shape based on sizes and strides.
+ // Ideally, we should know what the data_format is, but that attribute
+ // to this op is not reliable. So below, we rely of sorting logic where
+ // we sort strides first and then sizes.
+ TensorShape output_shape;
+ std::vector<std::pair<int, int>> shape_size;
+ for (size_t i = 0; i < input_shape.GetDimension(); i++) {
+ VLOG(1) << "Size: " << input_shape.GetSizes()[i]
+ << ", Strides: " << input_shape.GetStrides()[i];
+ shape_size.push_back(std::make_pair(input_shape.GetSizes()[i],
+ input_shape.GetStrides()[i]));
+ }
+
+ std::sort(shape_size.begin(), shape_size.end(), [](
+ std::pair<int, int > a, std::pair<int, int> b) {
+ return (a.second > b.second) ||
+ (a.second == b.second && a.first > b.first);
+ });
+
+ for (std::pair<int, int> s_s : shape_size) {
+ VLOG(1) << "Added dimension: " << s_s.first;
+ output_shape.AddDim(s_s.first);
+ }
+
+ // Allocate output tensor.
+ Tensor* output_tensor = NULL;
+ OP_REQUIRES_OK(context,
+ context->allocate_output(0, output_shape, &output_tensor));
+
+ // 3. Get input and output layout pointers.
+ dnnLayout_t output_layout = static_cast<dnnLayout_t>(
+ input_shape.GetTfLayout());
+
+ // 4. Execute DNNConversion.
+ void *input_buffer = static_cast<void*>(const_cast<T*>(
+ input_tensor.flat<T>().data()));
+ void *output_buffer = static_cast<void*>(const_cast<T*>(
+ output_tensor->flat<T>().data()));
+ input_shape.GetConvertedFlatData(output_layout, input_buffer,
+ output_buffer);
+
+ VLOG(1) << "MKLToTFConversion complete successfully.";
+ }
+
+ private:
+ /// Data format of the operation
+ string data_format_str;
+
+ /// Data type of the operation
+ DataType op_data_type;
+};
+
+///////////////////////////////////////////////////////////
+// Register kernel
+///////////////////////////////////////////////////////////
+
+#define REGISTER_CPU(T) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("MklToTf").Device(DEVICE_CPU).TypeConstraint<T>("T") \
+ .Label(mkl_layer_registry::kMklLayerLabel), \
+ MklToTfOp<CPUDevice, T>);
+
+TF_CALL_float(REGISTER_CPU);
+#undef REGISTER_CPU
+} // namespace tensorflow
+#endif /* INTEL_MKL */
diff --git a/tensorflow/core/kernels/mkl_transpose_op.cc b/tensorflow/core/kernels/mkl_transpose_op.cc
new file mode 100644
index 0000000000..c00674d72f
--- /dev/null
+++ b/tensorflow/core/kernels/mkl_transpose_op.cc
@@ -0,0 +1,67 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// See docs in ../ops/array_ops.cc.
+
+#ifdef INTEL_MKL
+#define EIGEN_USE_THREADS
+
+#include "tensorflow/core/kernels/transpose_op.h"
+#include "tensorflow/core/kernels/transpose_functor.h"
+#include "third_party/mkl/include/mkl_trans.h"
+
+namespace tensorflow {
+
+// output = TransposeOp(T<any> input, T<int32> perm) takes a tensor
+// of type T and rank N, and a permutation of 0, 1, ..., N-1. It
+// shuffles the dimensions of the input tensor according to permutation.
+//
+// Specifically, the returned tensor output meets the following condition:
+// 1) output.dims() == input.dims();
+// 2) output.dim_size(i) == input.dim_size(perm[i]);
+// 3) output.tensor<T, N>(i_0, i_1, ..., i_N-1) ==
+// input.tensor<T, N>(j_0, j_1, ..., j_N-1),
+// where i_s == j_{perm[s]}
+//
+// REQUIRES: perm is a vector of int32.
+// REQUIRES: input.dims() == perm.size().
+// REQUIRES: perm is a permutation.
+
+Status MklTransposeCpuOp::DoTranspose(OpKernelContext* ctx, const Tensor& in,
+ gtl::ArraySlice<int32> perm,
+ Tensor* out) {
+ if (in.dims() == 2 && in.dtype() == DT_FLOAT) {
+ float* user_o = out->flat<float>().data();
+ const float* user_i = in.flat<float>().data();
+
+ // Documentation here: https://software.intel.com/en-us/node/520863
+ // Parameters: (ordering:row-major, operation:transpose, num_rows, num_cols,
+ // alpha (for scaling), array, dist_bet_adjacent_cols/rows
+ // (source), array, dist_bet_adjacent_cols/rows (dest))
+ mkl_somatcopy('R', 'T', in.dim_size(0), in.dim_size(1), 1,
+ user_i, in.dim_size(1),
+ user_o, in.dim_size(0));
+
+ return Status::OK();
+ }
+
+ // Fallback to eigen if transpose parameters not supported by MKL
+ typedef Eigen::ThreadPoolDevice CPUDevice;
+ return ::tensorflow::DoTranspose(ctx->eigen_device<CPUDevice>(), in, perm,
+ out);
+} // MklTransposeCpuOp::DoTranspose
+} // namespace tensorflow
+
+#endif // INTEL_MKL
diff --git a/tensorflow/core/kernels/pooling_ops_common.cc b/tensorflow/core/kernels/pooling_ops_common.cc
index ddc9c9823b..3fe16c66b8 100644
--- a/tensorflow/core/kernels/pooling_ops_common.cc
+++ b/tensorflow/core/kernels/pooling_ops_common.cc
@@ -64,6 +64,8 @@ PoolParameters::PoolParameters(OpKernelContext* context,
OP_REQUIRES_OK(
context, GetWindowedOutputSize(tensor_in_cols, window_cols, col_stride,
padding, &out_width, &pad_cols));
+ pad_depth = 0;
+ out_depth = depth;
} else {
// Our current version of depthwise max pooling does not support
// any padding, and expects the depth_window to equal the
diff --git a/tensorflow/core/kernels/resize_nearest_neighbor_op.cc b/tensorflow/core/kernels/resize_nearest_neighbor_op.cc
index 9bfbe2a61a..f1627135c5 100644
--- a/tensorflow/core/kernels/resize_nearest_neighbor_op.cc
+++ b/tensorflow/core/kernels/resize_nearest_neighbor_op.cc
@@ -66,9 +66,7 @@ class ResizeNearestNeighborOp : public OpKernel {
const int64 in_x =
std::min(static_cast<int64>(floorf(x * st.width_scale)),
(st.in_width - 1));
- for (int c = 0; c < st.channels; ++c) {
- output_data(b, y, x, c) = input_data(b, in_y, in_x, c);
- }
+ std::copy_n(&input_data(b, in_y, in_x, 0), st.channels, &output_data(b, y, x, 0));
}
}
}
diff --git a/tensorflow/core/kernels/resize_nearest_neighbor_op_benchmark_test.cc b/tensorflow/core/kernels/resize_op_benchmark_test.cc
index 07cf653c2f..4d0805a737 100644
--- a/tensorflow/core/kernels/resize_nearest_neighbor_op_benchmark_test.cc
+++ b/tensorflow/core/kernels/resize_op_benchmark_test.cc
@@ -21,7 +21,8 @@ limitations under the License.
namespace tensorflow {
-static Graph* BM_ResizeNearestNeighbor(int batches, int width, int height) {
+static Graph* BM_Resize(const char* algorithm,
+ int batches, int width, int height) {
Graph* g = new Graph(OpRegistry::Global());
Tensor in(DT_FLOAT, TensorShape({batches, width, height, 3}));
in.flat<float>().setRandom();
@@ -32,21 +33,26 @@ static Graph* BM_ResizeNearestNeighbor(int batches, int width, int height) {
out_size_flat(1) = height * 2;
Node* ret;
- NodeBuilder(g->NewName("n"), "ResizeNearestNeighbor")
- .Input(test::graph::Constant(g, in))
- .Input(test::graph::Constant(g, out_size))
- .Finalize(g, &ret);
+ Status s = NodeBuilder(g->NewName("n"), algorithm)
+ .Input(test::graph::Constant(g, in))
+ .Input(test::graph::Constant(g, out_size))
+ .Finalize(g, &ret);
+ assert(s.ok());
return g;
}
-#define BM_ResizeNearestNeighborDev(DEVICE, B, W, H) \
- static void BM_ResizeNearestNeighbor_##DEVICE##_##B##_##W##_##H(int iters) { \
+#define BM_ResizeDev(DEVICE, ALGORITHM, B, W, H) \
+ static void BM_Resize_##ALGORITHM##_##DEVICE##_##B##_##W##_##H(int iters) { \
testing::ItemsProcessed(iters* B* W* H * 3); \
- test::Benchmark(#DEVICE, BM_ResizeNearestNeighbor(B, W, H)).Run(iters); \
+ test::Benchmark(#DEVICE, BM_Resize(#ALGORITHM, B, W, H)).Run(iters); \
} \
- BENCHMARK(BM_ResizeNearestNeighbor_##DEVICE##_##B##_##W##_##H)
+ BENCHMARK(BM_Resize_##ALGORITHM##_##DEVICE##_##B##_##W##_##H)
-BM_ResizeNearestNeighborDev(cpu, 1, 499, 499);
-BM_ResizeNearestNeighborDev(gpu, 1, 499, 499);
+BM_ResizeDev(cpu, ResizeNearestNeighbor, 10, 499, 499);
+BM_ResizeDev(gpu, ResizeNearestNeighbor, 10, 499, 499);
+
+BM_ResizeDev(cpu, ResizeBilinear, 10, 499, 499);
+BM_ResizeDev(gpu, ResizeBilinear, 10, 499, 499);
} // namespace tensorflow
+
diff --git a/tensorflow/core/kernels/transpose_op.cc b/tensorflow/core/kernels/transpose_op.cc
index 4d303f0173..fb2ceb4a4a 100644
--- a/tensorflow/core/kernels/transpose_op.cc
+++ b/tensorflow/core/kernels/transpose_op.cc
@@ -180,6 +180,20 @@ Status TransposeCpuOp::DoTranspose(OpKernelContext* ctx, const Tensor& in,
out);
}
+#ifdef INTEL_MKL
+#define REGISTER(T) \
+ REGISTER_KERNEL_BUILDER(Name("Transpose") \
+ .Device(DEVICE_CPU) \
+ .TypeConstraint<T>("T") \
+ .TypeConstraint<int32>("Tperm") \
+ .HostMemory("perm"), \
+ MklTransposeCpuOp);
+TF_CALL_ALL_TYPES(REGISTER);
+REGISTER(bfloat16);
+#undef REGISTER
+
+#else // INTEL_MKL
+
#define REGISTER(T) \
REGISTER_KERNEL_BUILDER(Name("Transpose") \
.Device(DEVICE_CPU) \
@@ -190,6 +204,7 @@ Status TransposeCpuOp::DoTranspose(OpKernelContext* ctx, const Tensor& in,
TF_CALL_ALL_TYPES(REGISTER)
REGISTER(bfloat16);
#undef REGISTER
+#endif // INTEL_MKL
#if GOOGLE_CUDA
Status TransposeGpuOp::DoTranspose(OpKernelContext* ctx, const Tensor& in,
diff --git a/tensorflow/core/kernels/transpose_op.h b/tensorflow/core/kernels/transpose_op.h
index 5f40bcecc1..a69eecc2f8 100644
--- a/tensorflow/core/kernels/transpose_op.h
+++ b/tensorflow/core/kernels/transpose_op.h
@@ -41,6 +41,17 @@ class TransposeCpuOp : public TransposeOp {
gtl::ArraySlice<int32> perm, Tensor* out) override;
};
+#ifdef INTEL_MKL
+class MklTransposeCpuOp : public TransposeOp {
+ public:
+ explicit MklTransposeCpuOp(OpKernelConstruction* ctx) : TransposeOp(ctx) {}
+
+ protected:
+ Status DoTranspose(OpKernelContext* ctx, const Tensor& in,
+ gtl::ArraySlice<int32> perm, Tensor* out) override;
+};
+#endif // INTEL_MKL
+
class TransposeGpuOp : public TransposeOp {
public:
explicit TransposeGpuOp(OpKernelConstruction* ctx) : TransposeOp(ctx) {}
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index eee9961b28..e56b27b0c0 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -2502,4 +2502,45 @@ scale_after_normalization: A bool indicating whether the resulted tensor
needs to be multiplied with gamma.
)doc");
+#ifdef INTEL_MKL
+REGISTER_OP("MklConv2D")
+ .Input("input: T")
+ .Input("mkl_input: uint8")
+ .Input("filter: T")
+ .Input("mkl_filter: uint8")
+ .Output("output: T")
+ .Output("mkl_output: uint8")
+ .Attr("T: {half, float, double}")
+ .Attr("strides: list(int)")
+ .Attr("use_cudnn_on_gpu: bool = true")
+ .Attr(GetPaddingAttrString())
+ .Attr(GetConvnetDataFormatAttrString())
+ .SetShapeFn(shape_inference::Conv2DShape)
+ .Doc(R"doc(
+MKL version of Conv2D
+)doc");
+
+REGISTER_OP("MklConv2DWithBias")
+ .Input("input: T")
+ .Input("mkl_input: uint8")
+ .Input("filter: T")
+ .Input("mkl_filter: uint8")
+ .Input("bias: T")
+ .Input("mkl_bias: uint8")
+ .Output("output: T")
+ .Output("mkl_output: uint8")
+ .Attr("T: {half, float, double}")
+ .Attr("strides: list(int)")
+ .Attr("use_cudnn_on_gpu: bool = true")
+ .Attr(GetPaddingAttrString())
+ .Attr(GetConvnetDataFormatAttrString());
+
+REGISTER_OP("MklToTf")
+ .Input("input: T")
+ .Input("mkl_input: uint8")
+ .Output("output: T")
+ .Attr("T: {half, float, double}")
+ .Attr(GetConvnetDataFormatAttrString());
+#endif // INTEL_MKL
+
} // namespace tensorflow
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index d17b52306d..aa2177dba4 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -25759,6 +25759,59 @@ op {
description: "Read [the section on\nSegmentation](../../api_docs/python/math_ops.md#segmentation) for an explanation\nof segments.\n\nComputes a tensor such that\n`(output[i] = sum_{j...} data[j...]` where the sum is over tuples `j...` such\nthat `segment_ids[j...] == i`. Unlike `SegmentSum`, `segment_ids`\nneed not be sorted and need not cover all values in the full\nrange of valid values.\n\nIf the sum is empty for a given segment ID `i`, `output[i] = 0`.\n\n`num_segments` should equal the number of distinct segment IDs.\n\n<div style=\"width:70%; margin:auto; margin-bottom:10px; margin-top:20px;\">\n<img style=\"width:100%\" src=\"../../images/UnsortedSegmentSum.png\" alt>\n</div>"
}
op {
+ name: "UnsortedSegmentSum"
+ input_arg {
+ name: "data"
+ type_attr: "T"
+ }
+ input_arg {
+ name: "segment_ids"
+ description: "A tensor whose shape is a prefix of `data.shape`."
+ type_attr: "Tindices"
+ }
+ input_arg {
+ name: "num_segments"
+ type: DT_INT32
+ }
+ output_arg {
+ name: "output"
+ description: "Has same shape as data, except for the first `segment_ids.rank`\ndimensions, which are replaced with a single dimension which has size\n`num_segments`."
+ type_attr: "T"
+ }
+ attr {
+ name: "T"
+ type: "type"
+ allowed_values {
+ list {
+ type: DT_FLOAT
+ type: DT_DOUBLE
+ type: DT_INT64
+ type: DT_INT32
+ type: DT_UINT8
+ type: DT_UINT16
+ type: DT_INT16
+ type: DT_INT8
+ type: DT_QINT8
+ type: DT_QUINT8
+ type: DT_QINT32
+ type: DT_HALF
+ }
+ }
+ }
+ attr {
+ name: "Tindices"
+ type: "type"
+ allowed_values {
+ list {
+ type: DT_INT32
+ type: DT_INT64
+ }
+ }
+ }
+ summary: "Computes the max along segments of a tensor."
+ description: "Read [the section on\nSegmentation](../../api_docs/python/math_ops.md#segmentation) for an explanation\nof segments.\n\nComputes a tensor such that\n\\\\(output_i = \\sum_j data_j\\\\) where sum is over `j` such\nthat `segment_ids[j] == i`. Unlike `SegmentSum`, `segment_ids`\nneed not be sorted and need not cover all values in the full\n range of valid values.\n\nIf the sum is empty for a given segment ID `i`, `output[i] = 0`.\n\n`num_segments` should equal the number of distinct segment IDs.\n\n<div style=\"width:70%; margin:auto; margin-bottom:10px; margin-top:20px;\">\n<img style=\"width:100%\" src=\"../../images/UnsortedSegmentSum.png\" alt>\n</div>"
+}
+op {
name: "Unstage"
output_arg {
name: "values"
diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl
index 5db8b68048..f21a646ca1 100644
--- a/tensorflow/core/platform/default/build_config.bzl
+++ b/tensorflow/core/platform/default/build_config.bzl
@@ -4,11 +4,6 @@ load("@protobuf//:protobuf.bzl", "cc_proto_library")
load("@protobuf//:protobuf.bzl", "py_proto_library")
load("//tensorflow:tensorflow.bzl", "if_not_mobile")
-# configure may change the following lines
-WITH_GCP_SUPPORT = False
-WITH_HDFS_SUPPORT = False
-WITH_JEMALLOC = True
-
# Appends a suffix to a list of deps.
def tf_deps(deps, suffix):
tf_deps = []
@@ -196,61 +191,54 @@ def tf_additional_test_srcs():
def tf_kernel_tests_linkstatic():
return 0
-# jemalloc only enabled on Linux for now.
-# TODO(jhseu): Enable on other platforms.
def tf_additional_lib_defines():
- defines = []
- if WITH_JEMALLOC:
- defines += select({
- "//tensorflow:linux_x86_64": [
- "TENSORFLOW_USE_JEMALLOC"
- ],
- "//conditions:default": [],
- })
- return defines
+ return select({
+ "//tensorflow:with_jemalloc": ["TENSORFLOW_USE_JEMALLOC"],
+ "//conditions:default": [],
+ })
def tf_additional_lib_deps():
- deps = []
- if WITH_JEMALLOC:
- deps += select({
- "//tensorflow:linux_x86_64": ["@jemalloc"],
- "//conditions:default": [],
- })
- return deps
+ return select({
+ "//tensorflow:with_jemalloc": ["@jemalloc"],
+ "//conditions:default": [],
+ })
def tf_additional_core_deps():
- deps = []
- if WITH_GCP_SUPPORT:
- deps.append("//tensorflow/core/platform/cloud:gcs_file_system")
- if WITH_HDFS_SUPPORT:
- deps.append("//tensorflow/core/platform/hadoop:hadoop_file_system")
- return deps
+ return select({
+ "//tensorflow:with_gcp_support": [
+ "//tensorflow/core/platform/cloud:gcs_file_system",
+ ],
+ "//conditions:default": [],
+ }) + select({
+ "//tensorflow:with_hdfs_support": [
+ "//tensorflow/core/platform/hadoop:hadoop_file_system",
+ ],
+ "//conditions:default": [],
+ })
# TODO(jart, jhseu): Delete when GCP is default on.
def tf_additional_cloud_op_deps():
- deps = []
- if WITH_GCP_SUPPORT:
- deps = select({
+ return select({
"//tensorflow:windows": [],
"//tensorflow:android": [],
"//tensorflow:ios": [],
- "//conditions:default":
- ["//tensorflow/contrib/cloud:bigquery_reader_ops_op_lib"],
- })
- return deps
+ "//tensorflow:with_gcp_support": [
+ "//tensorflow/contrib/cloud:bigquery_reader_ops_op_lib",
+ ],
+ "//conditions:default": [],
+ })
# TODO(jart, jhseu): Delete when GCP is default on.
def tf_additional_cloud_kernel_deps():
- deps = []
- if WITH_GCP_SUPPORT:
- deps = select({
+ return select({
"//tensorflow:windows": [],
"//tensorflow:android": [],
"//tensorflow:ios": [],
- "//conditions:default":
- ["//tensorflow/contrib/cloud/kernels:bigquery_reader_ops"],
- })
- return deps
+ "//tensorflow:with_gcp_support": [
+ "//tensorflow/contrib/cloud/kernels:bigquery_reader_ops",
+ ],
+ "//conditions:default": [],
+ })
def tf_lib_proto_parsing_deps():
return [
diff --git a/tensorflow/core/platform/default/build_config_root.bzl b/tensorflow/core/platform/default/build_config_root.bzl
index 23a7b9065a..79f97c1234 100644
--- a/tensorflow/core/platform/default/build_config_root.bzl
+++ b/tensorflow/core/platform/default/build_config_root.bzl
@@ -2,8 +2,6 @@
# The functions in this file might be referred by tensorflow.bzl. They have to
# be separate to avoid cyclic references.
-WITH_XLA_SUPPORT = False
-
def tf_cuda_tests_tags():
return ["local"]
@@ -11,16 +9,16 @@ def tf_sycl_tests_tags():
return ["local"]
def tf_additional_plugin_deps():
- deps = []
- if WITH_XLA_SUPPORT:
- deps.append("//tensorflow/compiler/jit")
- return deps
+ return select({
+ "//tensorflow:with_xla_support": ["//tensorflow/compiler/jit"],
+ "//conditions:default": [],
+ })
def tf_additional_xla_deps_py():
return []
def tf_additional_license_deps():
- licenses = []
- if WITH_XLA_SUPPORT:
- licenses.append("@llvm//:LICENSE.TXT")
- return licenses
+ return select({
+ "//tensorflow:with_xla_support": ["@llvm//:LICENSE.TXT"],
+ "//conditions:default": [],
+ })
diff --git a/tensorflow/core/platform/hadoop/hadoop_file_system.cc b/tensorflow/core/platform/hadoop/hadoop_file_system.cc
index 1d0c9dc8cd..66bda85b2f 100644
--- a/tensorflow/core/platform/hadoop/hadoop_file_system.cc
+++ b/tensorflow/core/platform/hadoop/hadoop_file_system.cc
@@ -58,6 +58,7 @@ class LibHDFS {
std::function<hdfsFS(hdfsBuilder*)> hdfsBuilderConnect;
std::function<hdfsBuilder*()> hdfsNewBuilder;
std::function<void(hdfsBuilder*, const char*)> hdfsBuilderSetNameNode;
+ std::function<int(const char*, char**)> hdfsConfGetStr;
std::function<void(hdfsBuilder*, const char* kerbTicketCachePath)>
hdfsBuilderSetKerbTicketCachePath;
std::function<int(hdfsFS, hdfsFile)> hdfsCloseFile;
@@ -85,6 +86,7 @@ class LibHDFS {
BIND_HDFS_FUNC(hdfsBuilderConnect);
BIND_HDFS_FUNC(hdfsNewBuilder);
BIND_HDFS_FUNC(hdfsBuilderSetNameNode);
+ BIND_HDFS_FUNC(hdfsConfGetStr);
BIND_HDFS_FUNC(hdfsBuilderSetKerbTicketCachePath);
BIND_HDFS_FUNC(hdfsCloseFile);
BIND_HDFS_FUNC(hdfsPread);
@@ -147,6 +149,18 @@ Status HadoopFileSystem::Connect(StringPiece fname, hdfsFS* fs) {
hdfsBuilder* builder = hdfs_->hdfsNewBuilder();
if (scheme == "file") {
hdfs_->hdfsBuilderSetNameNode(builder, nullptr);
+ } else if (scheme == "viewfs") {
+ char *defaultFS = NULL;
+ hdfs_->hdfsConfGetStr("fs.defaultFS", &defaultFS);
+ StringPiece defaultScheme, defaultCluster, defaultPath;
+ io::ParseURI(defaultFS, &defaultScheme, &defaultCluster, &defaultPath);
+
+ if (scheme != defaultScheme || namenode != defaultCluster) {
+ return errors::Unimplemented("viewfs is only supported as a fs.defaultFS.");
+ }
+ // The default NameNode configuration will be used (from the XML configuration files). See:
+ // https://github.com/tensorflow/tensorflow/blob/v1.0.0/third_party/hadoop/hdfs.h#L259
+ hdfs_->hdfsBuilderSetNameNode(builder, "default");
} else {
hdfs_->hdfsBuilderSetNameNode(builder, nn.c_str());
}
@@ -478,5 +492,6 @@ Status HadoopFileSystem::Stat(const string& fname, FileStatistics* stats) {
}
REGISTER_FILE_SYSTEM("hdfs", HadoopFileSystem);
+REGISTER_FILE_SYSTEM("viewfs", HadoopFileSystem);
} // namespace tensorflow
diff --git a/tensorflow/core/platform/macros.h b/tensorflow/core/platform/macros.h
index aad35890af..b6fb18bd99 100644
--- a/tensorflow/core/platform/macros.h
+++ b/tensorflow/core/platform/macros.h
@@ -53,6 +53,17 @@ limitations under the License.
#define TF_SCANF_ATTRIBUTE(string_index, first_to_check)
#endif
+// Control visiblity outside .so
+#if defined(COMPILER_MSVC)
+# ifdef TF_COMPILE_LIBRARY
+# define TF_EXPORT __declspec(dllexport)
+# else
+# define TF_EXPORT __declspec(dllimport)
+# endif // TF_COMPILE_LIBRARY
+#else
+# define TF_EXPORT __attribute__((visibility("default")))
+#endif // COMPILER_MSVC
+
// GCC can be told that a certain branch is not likely to be taken (for
// instance, a CHECK failure), and use that information in static analysis.
// Giving it this information can help it optimize for the common case in
diff --git a/tensorflow/core/platform/windows/cpu_info.h b/tensorflow/core/platform/windows/cpu_info.h
index 77a1946e61..d6e78dbc8f 100644
--- a/tensorflow/core/platform/windows/cpu_info.h
+++ b/tensorflow/core/platform/windows/cpu_info.h
@@ -16,6 +16,9 @@ limitations under the License.
#ifndef TENSORFLOW_PLATFORM_WINDOWS_CPU_INFO_H_
#define TENSORFLOW_PLATFORM_WINDOWS_CPU_INFO_H_
+// included so __cpuidex function is available for GETCPUID on Windows
+#include <intrin.h>
+
// Byte order defines provided by gcc. MSVC doesn't define those so
// we define them here.
// We assume that all windows platform out there are little endian.
diff --git a/tensorflow/core/platform/windows/intrinsics_port.h b/tensorflow/core/platform/windows/intrinsics_port.h
index a4fa1e9971..e52f5b1646 100644
--- a/tensorflow/core/platform/windows/intrinsics_port.h
+++ b/tensorflow/core/platform/windows/intrinsics_port.h
@@ -24,6 +24,9 @@ limitations under the License.
#include "tensorflow/core/platform/types.h"
#define _mm_load_pd1 _mm_load1_pd
+
+// only define these intrinsics if immintrin.h doesn't have them (VS2015 and earlier)
+#if _MSC_VER < 1910
static inline int
_mm256_extract_epi32(__m256i a, const int i)
{
@@ -39,3 +42,4 @@ _mm256_insert_epi32(__m256i a, int b, const int i)
}
#endif
#endif
+#endif
diff --git a/tensorflow/core/platform/windows/windows_file_system.cc b/tensorflow/core/platform/windows/windows_file_system.cc
index facadc7f57..72e7e06e65 100644
--- a/tensorflow/core/platform/windows/windows_file_system.cc
+++ b/tensorflow/core/platform/windows/windows_file_system.cc
@@ -230,11 +230,9 @@ Status WindowsFileSystem::NewRandomAccessFile(
result->reset();
// Open the file for read-only random access
- // Random access is to disable read-ahead as the system reads too much data
// Open in async mode which makes Windows allow more parallelism even
// if we need to do sync I/O on top of it.
- DWORD file_flags = FILE_ATTRIBUTE_READONLY | FILE_FLAG_RANDOM_ACCESS |
- FILE_FLAG_OVERLAPPED;
+ DWORD file_flags = FILE_ATTRIBUTE_READONLY | FILE_FLAG_OVERLAPPED;
// Shared access is necessary for tests to pass
// almost all tests would work with a possible exception of fault_injection.
DWORD share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE;
@@ -306,8 +304,8 @@ Status WindowsFileSystem::NewReadOnlyMemoryRegionFromFile(
result->reset();
Status s = Status::OK();
- // Open the file for read-only random access
- DWORD file_flags = FILE_ATTRIBUTE_READONLY | FILE_FLAG_RANDOM_ACCESS;
+ // Open the file for read-only
+ DWORD file_flags = FILE_ATTRIBUTE_READONLY;
// Open in async mode which makes Windows allow more parallelism even
// if we need to do sync I/O on top of it.
diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h
new file mode 100644
index 0000000000..6d09995b51
--- /dev/null
+++ b/tensorflow/core/util/mkl_util.h
@@ -0,0 +1,296 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_UTIL_MKL_UTIL_H_
+#define TENSORFLOW_CORE_UTIL_MKL_UTIL_H_
+#ifdef INTEL_MKL
+#include "third_party/mkl/include/mkl_dnn.h"
+#include "third_party/mkl/include/mkl_dnn_types.h"
+#include "third_party/mkl/include/mkl_service.h"
+
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/util/tensor_format.h"
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/macros.h"
+
+// The file contains a number of utility classes and functions used by MKL
+// enabled kernels
+
+namespace tensorflow {
+
+// This class encapsulates all the meta data that is associated with an MKL
+// tensor. A tensor is an MKL tensor if it was created as the result of an
+// MKL operation, and did not go through a conversion to a standard
+// Tensorflow tensor.
+
+class MklShape {
+ public:
+ MklShape() {}
+ TF_DISALLOW_COPY_AND_ASSIGN(MklShape); // Cannot copy
+
+ ~MklShape() {
+ if (sizes_) delete[] sizes_;
+ if (strides_) delete[] strides_;
+ if (mklLayout_) CHECK_EQ(dnnLayoutDelete_F32(mklLayout_), E_SUCCESS);
+ if (tfLayout_) CHECK_EQ(dnnLayoutDelete_F32(tfLayout_), E_SUCCESS);
+ }
+
+ const bool IsMklTensor() const { return isMklTensor_; }
+
+ void SetMklTensor(const bool isMklTensor) { isMklTensor_ = isMklTensor; }
+
+ void SetMklLayout(const void* primitive, size_t resourceType) {
+ CHECK_EQ(
+ dnnLayoutCreateFromPrimitive_F32(&mklLayout_, (dnnPrimitive_t)primitive,
+ (dnnResourceType_t)resourceType),
+ E_SUCCESS);
+ }
+
+ void SetTfLayout(const size_t dimension, const size_t* sizes,
+ const size_t* strides) {
+ dimension_ = dimension;
+ if (dimension > 0) { // MKl doesn't support dimension 0
+ sizes_ = new size_t[dimension];
+ strides_ = new size_t[dimension];
+
+ for (int ii = 0; ii < dimension; ii++) {
+ sizes_[ii] = sizes[ii];
+ strides_[ii] = strides[ii];
+ }
+ CHECK_EQ(dnnLayoutCreate_F32(&tfLayout_, dimension, sizes, strides),
+ E_SUCCESS);
+ }
+ }
+
+ const dnnLayout_t GetMklLayout() const { return mklLayout_; }
+ const dnnLayout_t GetTfLayout() const { return tfLayout_; }
+ const dnnLayout_t GetCurLayout() const {
+ return isMklTensor_ ? mklLayout_ : tfLayout_;
+ }
+ size_t GetDimension() const { return dimension_; }
+ const size_t* GetSizes() const { return sizes_; }
+ const size_t* GetStrides() const { return strides_; }
+
+ void GetConvertedFlatData(dnnLayout_t targetLayout, void* input,
+ void* output) const {
+ dnnLayout_t curLayout;
+ if (isMklTensor_)
+ curLayout = mklLayout_;
+ else
+ curLayout = tfLayout_;
+ dnnPrimitive_t convert;
+ CHECK_EQ(dnnConversionCreate_F32(&convert, curLayout, targetLayout),
+ E_SUCCESS);
+ CHECK_EQ(dnnConversionExecute_F32(convert, input, output), E_SUCCESS);
+ CHECK_EQ(dnnDelete_F32(convert), E_SUCCESS);
+ }
+
+// The following methods are used for serializing and de-serializing the
+// contents of the mklshape object.
+// The data is serialized in this order
+// isMklTensor_
+// dimension_
+// sizes
+// strides
+// mklLayout_
+// tfLayout_
+
+#define SIZE_OF_MKL_DNN_BUF \
+ (dnnLayoutSerializationBufferSize_F32()) // Size of buffer needed to
+ // serialize dnn_layout pointer
+
+// Size of buffer to hold the serialized object, the size is computed as follows
+// sizeof(isMklTensor_) + sizeof(dimension_) + sizeof(sizes) + sizeof(strides)
+// + sizeof(mklLayout_ buffer) + sizeof(tfLayout_ buffer)
+
+#define SIZE_OF_MKL_SERIAL_DATA(dims) \
+ (2 * sizeof(size_t) + 2 * dims * sizeof(size_t) + 2 * SIZE_OF_MKL_DNN_BUF)
+
+// First we need to define some macro for offsets into the serial buffer where
+// different elements of Mklshape is written/read from
+
+#define IS_MKL_TENSOR_OFFSET 0
+// Location from start of buffer where isMklTensor_ is serialized
+#define DIMS_OFFSET \
+ (IS_MKL_TENSOR_OFFSET + sizeof(size_t)) // Location of dimension_
+#define SIZES_OFFSET(dims) \
+ (DIMS_OFFSET + \
+ sizeof(size_t)) // Location of sizes. Note dim is not used here, left here
+ // to make macros consistent.
+#define STRIDES_OFFSET(dims) \
+ (SIZES_OFFSET(dims) + dims * sizeof(size_t)) // Location of strides
+#define MKL_LAYOUT_OFFSET(dims) \
+ (STRIDES_OFFSET(dims) + dims * sizeof(size_t)) // Location of mklLayout_
+#define TF_LAYOUT_OFFSET(dims) \
+ (MKL_LAYOUT_OFFSET(dims) + SIZE_OF_MKL_DNN_BUF) // Location of tfLayout_
+
+ // TODO(agramesh1) make sure to create a const to share with rewrite pass
+ // for min size of MKL metadata tensor.
+
+ void DeSerializeMklShape(const unsigned char* buf, size_t buf_size) {
+ CHECK(buf_size >= sizeof(size_t)) << "Bufsize too small in DeSerialize";
+ // Make sure buffer holds at least isMklTensor_
+ isMklTensor_ =
+ *reinterpret_cast<const size_t*>(buf + IS_MKL_TENSOR_OFFSET) != 0;
+
+ if (isMklTensor_) { // If it is an MKL Tensor then read the rest
+ dimension_ = *(reinterpret_cast<const size_t*>(buf + DIMS_OFFSET));
+ CHECK(buf_size >= SIZE_OF_MKL_SERIAL_DATA(dimension_))
+ << "Bufsize too small in DeSerialize";
+ sizes_ = new size_t[dimension_];
+ strides_ = new size_t[dimension_];
+ for (int i = 0; i < dimension_; i++) {
+ sizes_[i] =
+ reinterpret_cast<const size_t*>(buf + SIZES_OFFSET(dimension_))[i];
+ strides_[i] = reinterpret_cast<const size_t*>(
+ buf + STRIDES_OFFSET(dimension_))[i];
+ }
+ CHECK_EQ(dnnLayoutDeserialize_F32(&mklLayout_,
+ buf + MKL_LAYOUT_OFFSET(dimension_)),
+ E_SUCCESS);
+ CHECK_EQ(dnnLayoutDeserialize_F32(&tfLayout_,
+ buf + TF_LAYOUT_OFFSET(dimension_)),
+ E_SUCCESS);
+ }
+ }
+
+ void SerializeMklShape(unsigned char* buf, size_t buf_size) const {
+ CHECK(buf_size >= SIZE_OF_MKL_SERIAL_DATA(dimension_))
+ << "Bufsize too small to Serialize";
+ *reinterpret_cast<size_t*>(buf + IS_MKL_TENSOR_OFFSET) =
+ isMklTensor_ ? 1 : 0;
+ if (isMklTensor_) {
+ *(reinterpret_cast<size_t*>(buf + DIMS_OFFSET)) = dimension_;
+ for (int i = 0; i < dimension_; i++) {
+ reinterpret_cast<size_t*>(buf + SIZES_OFFSET(dimension_))[i] =
+ sizes_[i];
+ reinterpret_cast<size_t*>(buf + STRIDES_OFFSET(dimension_))[i] =
+ strides_[i];
+ }
+ CHECK_EQ(dnnLayoutSerialize_F32(mklLayout_,
+ buf + MKL_LAYOUT_OFFSET(dimension_)),
+ E_SUCCESS);
+ CHECK_EQ(
+ dnnLayoutSerialize_F32(tfLayout_, buf + TF_LAYOUT_OFFSET(dimension_)),
+ E_SUCCESS);
+ }
+ }
+
+ private:
+ bool isMklTensor_ =
+ false; // Flag to indicate if the tensor is an MKL tensor or not
+ dnnLayout_t mklLayout_ = nullptr; // Pointer to the MKL layout
+ dnnLayout_t tfLayout_ = nullptr; // Pointer to layout of corresponding
+ // Tensorflow tensor, used when conversion from MKL to standard tensor
+ size_t dimension_ = 0;
+ size_t* sizes_ = nullptr; // Required by MKL for conversions
+ size_t* strides_ = nullptr; // Required by MKL for conversions
+};
+
+int inline GetTensorDataIndex(int n) {
+ return 2 * n; // index corresponding to nth input/output tensor
+}
+
+int inline GetTensorMetaDataIndex(int n) {
+ // index corresponding to meta data of nth input/output tensor
+ return 2 * n + 1;
+}
+// Get the MKL shape from the second string tensor
+inline void GetMklShape(OpKernelContext* ctext, int n, MklShape* mklshape) {
+ mklshape->DeSerializeMklShape(
+ ctext->input(GetTensorMetaDataIndex(n)).flat<uint8>().data(),
+ ctext->input(GetTensorMetaDataIndex(n)).flat<uint8>().size() *
+ sizeof(uint8));
+}
+
+// Gets the actual input
+inline const Tensor& MklGetInput(OpKernelContext* ctext, int n) {
+ return ctext->input(GetTensorDataIndex(n));
+}
+
+// Allocate the output tensor, create a second output tensor that will contain
+// the MKL shape serialized
+inline void AllocateOutputSetMklshape(OpKernelContext* ctext, int n,
+ Tensor** output,
+ const TensorShape& tfshape,
+ const MklShape& mklshape) {
+ Tensor* second_tensor = nullptr;
+ TensorShape second_shape;
+ second_shape.AddDim(SIZE_OF_MKL_SERIAL_DATA(mklshape.GetDimension()));
+ OP_REQUIRES_OK(
+ ctext, ctext->allocate_output(GetTensorDataIndex(n), tfshape, output));
+ OP_REQUIRES_OK(ctext, ctext->allocate_output(GetTensorMetaDataIndex(n),
+ second_shape, &second_tensor));
+ mklshape.SerializeMklShape(
+ second_tensor->flat<uint8>().data(),
+ second_tensor->flat<uint8>().size() * sizeof(uint8));
+}
+
+// Allocates a temp tensor and returns the data buffer for temporary storage.
+// Currently
+// we only support F32, will need to templatize if other types are added
+inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out,
+ dnnLayout_t lt_buff, void** buf_out) {
+ TensorShape tf_shape;
+
+ tf_shape.AddDim(
+ dnnLayoutGetMemorySize_F32(static_cast<dnnLayout_t>(lt_buff)) /
+ sizeof(float) +
+ 1);
+ OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum<float>::v(),
+ tf_shape, tensor_out));
+ *buf_out = static_cast<void*>(tensor_out->flat<float>().data());
+}
+
+inline void GetStridesFromSizes(TensorFormat data_format, size_t* strides,
+ const size_t* sizes) {
+ // MKL requires strides in NCHW
+ if (data_format == FORMAT_NHWC) {
+ strides[0] = sizes[2];
+ strides[1] = sizes[0] * sizes[2];
+ strides[2] = 1;
+ strides[3] = sizes[0] * sizes[1] * sizes[2];
+ } else {
+ strides[0] = 1;
+ strides[1] = sizes[0];
+ strides[2] = sizes[0] * sizes[1];
+ strides[3] = sizes[0] * sizes[1] * sizes[2];
+ }
+}
+
+namespace mkl_layer_registry {
+
+static const char* kMklLayerLabel = "MklLayer";
+static const string kMklLayerLabelPattern = "label='MklLayer'";
+
+// Check whether opname is registered as MKL-compliant in the registry.
+//
+// @input: name of the op
+// @return: true if opname is registered as Mkl layer op
+static inline bool IsMklLayer(const std::string& op_name) {
+ string kernel = KernelsRegisteredForOp(op_name);
+ return kernel.find(kMklLayerLabelPattern) != string::npos;
+}
+
+} // namespace mkl_layer_registry
+
+} // namespace tensorflow
+#endif // INTEL_MKL
+#endif // TENSORFLOW_CORE_UTIL_MKL_UTIL_H_
diff --git a/tensorflow/docs_src/extend/adding_an_op.md b/tensorflow/docs_src/extend/adding_an_op.md
index f95298d377..4fc4c2faa2 100644
--- a/tensorflow/docs_src/extend/adding_an_op.md
+++ b/tensorflow/docs_src/extend/adding_an_op.md
@@ -1056,7 +1056,7 @@ cuda_op_kernel.cu.o -I $TF_INC -fPIC -lcudart
Note that if your CUDA libraries are not installed in `/usr/local/lib64`,
you'll need to specify the path explicitly in the second (g++) command above.
-For example, add `-L /usr/local/cuda-8.0/lib64/` if your CUDA is installed in
+For example, add `-L /usr/local/cuda-8.0/lib64/` if your CUDA is installed in
`/usr/local/cuda-8.0`.
### Implement the gradient in Python {#implement-gradient}
@@ -1160,7 +1160,9 @@ for ZeroOut:
```
`c->set_output(0, c->input(0));` declares that the first output's shape should
-be set to the first input's shape. There are a number of common shape functions
+be set to the first input's shape. If the output is selected by its index as in the above example, the second parameter of `set_output` should be a `ShapeHandle` object. You can create an empty `ShapeHandle` object by its default constructor. The `ShapeHandle` object for an input with index `idx` can be obtained by `c->input(idx)`.
+
+There are a number of common shape functions
that apply to many ops, such as `shape_inference::UnchangedShape` which can be
found in [common_shape_fns.h](https://www.tensorflow.org/code/tensorflow/core/framework/common_shape_fns.h) and used as follows:
@@ -1220,7 +1222,15 @@ particular dimension has a very specific value using `InferenceContext::Dim` and
`InferenceContext::WithValue`; you can specify that an output dimension is the
sum / product of two input dimensions using `InferenceContext::Add` and
`InferenceContext::Multiply`. See the `InferenceContext` class for
-all of the various shape manipulations you can specify.
+all of the various shape manipulations you can specify. The following example sets
+shape of the first output to (n, 3), where first input has shape (n, ...)
+
+```c++
+.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
+ c->set_output(0, c->Matrix(c->Dim(c->input(0), 0), 3));
+ return Status::OK();
+});
+```
If you have a complicated shape function, you should consider adding a test for
validating that various input shape combinations produce the expected output
diff --git a/tensorflow/docs_src/get_started/get_started.md b/tensorflow/docs_src/get_started/get_started.md
index ae0007359d..b71249de0a 100644
--- a/tensorflow/docs_src/get_started/get_started.md
+++ b/tensorflow/docs_src/get_started/get_started.md
@@ -374,7 +374,7 @@ estimator.fit(input_fn=input_fn, steps=1000)
# Here we evaluate how well our model did. In a real example, we would want
# to use a separate validation and testing data set to avoid overfitting.
-estimator.evaluate(input_fn=input_fn)
+print(estimator.evaluate(input_fn=input_fn))
```
When run, it produces
```
diff --git a/tensorflow/docs_src/get_started/mnist/mechanics.md b/tensorflow/docs_src/get_started/mnist/mechanics.md
index afd9039017..b55a5c19ff 100644
--- a/tensorflow/docs_src/get_started/mnist/mechanics.md
+++ b/tensorflow/docs_src/get_started/mnist/mechanics.md
@@ -351,7 +351,7 @@ training.
```python
if step % 100 == 0:
- print 'Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration)
+ print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration))
```
#### Visualize the Status
@@ -421,19 +421,19 @@ the training and test datasets. The `do_eval()` function is called thrice, for
the training, validation, and test datasets.
```python
-print 'Training Data Eval:'
+print('Training Data Eval:')
do_eval(sess,
eval_correct,
images_placeholder,
labels_placeholder,
data_sets.train)
-print 'Validation Data Eval:'
+print('Validation Data Eval:')
do_eval(sess,
eval_correct,
images_placeholder,
labels_placeholder,
data_sets.validation)
-print 'Test Data Eval:'
+print('Test Data Eval:')
do_eval(sess,
eval_correct,
images_placeholder,
diff --git a/tensorflow/docs_src/programmers_guide/faq.md b/tensorflow/docs_src/programmers_guide/faq.md
index a400d91654..fa8b6fb7f1 100644
--- a/tensorflow/docs_src/programmers_guide/faq.md
+++ b/tensorflow/docs_src/programmers_guide/faq.md
@@ -92,12 +92,12 @@ two following snippets of code are equivalent:
# Using `Session.run()`.
sess = tf.Session()
c = tf.constant(5.0)
-print sess.run(c)
+print(sess.run(c))
# Using `Tensor.eval()`.
c = tf.constant(5.0)
with tf.Session():
- print c.eval()
+ print(c.eval())
```
In the second example, the session acts as a
diff --git a/tensorflow/docs_src/programmers_guide/variables.md b/tensorflow/docs_src/programmers_guide/variables.md
index 9189618368..04bfca5f3b 100644
--- a/tensorflow/docs_src/programmers_guide/variables.md
+++ b/tensorflow/docs_src/programmers_guide/variables.md
@@ -144,6 +144,11 @@ specified list, of the variables in the graph. The saver object provides
methods to run these ops, specifying paths for the checkpoint files to write to
or read from.
+Note that to restore a model checkpoint without a graph one must first import
+the graph from the meta graph file (typical extension is `.meta`). This is
+done with @{tf.train.import_meta_graph}, which in turn returns a `Saver` from
+which one can than perform a `restore`.
+
### Checkpoint Files
Variables are saved in binary files that, roughly, contain a map from variable
diff --git a/tensorflow/docs_src/tutorials/linear.md b/tensorflow/docs_src/tutorials/linear.md
index 3569d47efd..30daf335bf 100644
--- a/tensorflow/docs_src/tutorials/linear.md
+++ b/tensorflow/docs_src/tutorials/linear.md
@@ -217,7 +217,7 @@ results = e.evaluate(input_fn=input_fn_test, steps=1)
# Print the stats for the evaluation.
for key in sorted(results):
- print "%s: %s" % (key, results[key])
+ print("%s: %s" % (key, results[key]))
```
### Wide and deep learning
diff --git a/tensorflow/docs_src/tutorials/using_gpu.md b/tensorflow/docs_src/tutorials/using_gpu.md
index e4e342adfe..d64cdafdef 100644
--- a/tensorflow/docs_src/tutorials/using_gpu.md
+++ b/tensorflow/docs_src/tutorials/using_gpu.md
@@ -28,7 +28,7 @@ c = tf.matmul(a, b)
# Creates a session with log_device_placement set to True.
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
# Runs the op.
-print sess.run(c)
+print(sess.run(c))
```
You should see the following output:
@@ -61,7 +61,7 @@ with tf.device('/cpu:0'):
# Creates a session with log_device_placement set to True.
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
# Runs the op.
-print sess.run(c)
+print(sess.run(c))
```
You will see that now `a` and `b` are assigned to `cpu:0`.
@@ -131,7 +131,7 @@ with tf.device('/gpu:2'):
# Creates a session with log_device_placement set to True.
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
# Runs the op.
-print sess.run(c)
+print(sess.run(c))
```
If the device you have specified does not exist, you will get
@@ -160,7 +160,7 @@ with tf.device('/gpu:2'):
sess = tf.Session(config=tf.ConfigProto(
allow_soft_placement=True, log_device_placement=True))
# Runs the op.
-print sess.run(c)
+print(sess.run(c))
```
## Using multiple GPUs
@@ -182,7 +182,7 @@ with tf.device('/cpu:0'):
# Creates a session with log_device_placement set to True.
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
# Runs the op.
-print sess.run(sum)
+print(sess.run(sum))
```
You will see the following output.
diff --git a/tensorflow/docs_src/tutorials/wide.md b/tensorflow/docs_src/tutorials/wide.md
index 079efb201e..471811ea1a 100644
--- a/tensorflow/docs_src/tutorials/wide.md
+++ b/tensorflow/docs_src/tutorials/wide.md
@@ -188,7 +188,7 @@ def input_fn(df):
categorical_cols = {k: tf.SparseTensor(
indices=[[i, 0] for i in range(df[k].size)],
values=df[k].values,
- shape=[df[k].size, 1])
+ dense_shape=[df[k].size, 1])
for k in CATEGORICAL_COLUMNS}
# Merges the two dictionaries into one.
feature_cols = dict(continuous_cols.items() + categorical_cols.items())
@@ -261,6 +261,8 @@ learned through the model training process we'll go through later.
We'll do the similar trick to define the other categorical features:
```python
+race = tf.contrib.layers.sparse_column_with_hash_bucket("race", hash_bucket_size=100)
+marital_status = tf.contrib.layers.sparse_column_with_hash_bucket("marital_status", hash_bucket_size=100)
relationship = tf.contrib.layers.sparse_column_with_hash_bucket("relationship", hash_bucket_size=100)
workclass = tf.contrib.layers.sparse_column_with_hash_bucket("workclass", hash_bucket_size=100)
occupation = tf.contrib.layers.sparse_column_with_hash_bucket("occupation", hash_bucket_size=1000)
@@ -377,7 +379,7 @@ the labels of the holdout data:
```python
results = m.evaluate(input_fn=eval_input_fn, steps=1)
for key in sorted(results):
- print "%s: %s" % (key, results[key])
+ print("%s: %s" % (key, results[key]))
```
The first line of the output should be something like `accuracy: 0.83557522`,
diff --git a/tensorflow/docs_src/tutorials/wide_and_deep.md b/tensorflow/docs_src/tutorials/wide_and_deep.md
index b5e5981fe1..dd830eeca9 100644
--- a/tensorflow/docs_src/tutorials/wide_and_deep.md
+++ b/tensorflow/docs_src/tutorials/wide_and_deep.md
@@ -255,7 +255,7 @@ After reading in the data, you can train and evaluate the model:
m.fit(input_fn=train_input_fn, steps=200)
results = m.evaluate(input_fn=eval_input_fn, steps=1)
for key in sorted(results):
- print "%s: %s" % (key, results[key])
+ print("%s: %s" % (key, results[key]))
```
The first line of the output should be something like `accuracy: 0.84429705`. We
diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/StylizeActivity.java b/tensorflow/examples/android/src/org/tensorflow/demo/StylizeActivity.java
index a95e93ce69..c1a893e9ee 100644
--- a/tensorflow/examples/android/src/org/tensorflow/demo/StylizeActivity.java
+++ b/tensorflow/examples/android/src/org/tensorflow/demo/StylizeActivity.java
@@ -432,7 +432,7 @@ public class StylizeActivity extends CameraActivity implements OnImageAvailableL
// Everything else is 0, so just pick a suitable slider to push up when the
// selected one goes down.
if (adapter.items[lastOtherStyle] == slider) {
- lastOtherStyle = lastOtherStyle + 1 % NUM_STYLES;
+ lastOtherStyle = (lastOtherStyle + 1) % NUM_STYLES;
}
adapter.items[lastOtherStyle].setValue(1.0f - value);
}
diff --git a/tensorflow/examples/learn/README.md b/tensorflow/examples/learn/README.md
index b36986855f..37157fc296 100644
--- a/tensorflow/examples/learn/README.md
+++ b/tensorflow/examples/learn/README.md
@@ -1,7 +1,7 @@
# TF Learn Examples
Learn is a high-level API for TensorFlow that allows you to create,
-train, and use deep learning models easily. See the [Quickstart tutorial](../../g3doc/tutorials/tflearn/index.md)
+train, and use deep learning models easily. See the [Quickstart tutorial](https://www.tensorflow.org/get_started/tflearn)
for an introduction to the API.
To run most of these examples, you need to install the `scikit learn` library (`sudo pip install sklearn`).
diff --git a/tensorflow/examples/learn/boston.py b/tensorflow/examples/learn/boston.py
index 2986ff9106..19cfdee513 100644
--- a/tensorflow/examples/learn/boston.py
+++ b/tensorflow/examples/learn/boston.py
@@ -16,19 +16,22 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
-from sklearn import cross_validation
+
+from sklearn import datasets
+from sklearn import model_selection
from sklearn import metrics
from sklearn import preprocessing
+
import tensorflow as tf
def main(unused_argv):
# Load dataset
- boston = tf.contrib.learn.datasets.load_dataset('boston')
+ boston = datasets.load_boston()
x, y = boston.data, boston.target
# Split dataset into train / test
- x_train, x_test, y_train, y_test = cross_validation.train_test_split(
+ x_train, x_test, y_train, y_test = model_selection.train_test_split(
x, y, test_size=0.2, random_state=42)
# Scale data (training set) to 0 mean and unit standard deviation.
diff --git a/tensorflow/examples/learn/iris.py b/tensorflow/examples/learn/iris.py
index 7b65eb521a..ec2aa9b573 100644
--- a/tensorflow/examples/learn/iris.py
+++ b/tensorflow/examples/learn/iris.py
@@ -17,7 +17,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
-
+from sklearn import datasets
from sklearn import metrics
from sklearn import model_selection
@@ -26,7 +26,7 @@ import tensorflow as tf
def main(unused_argv):
# Load dataset.
- iris = tf.contrib.learn.datasets.load_dataset('iris')
+ iris = datasets.load_iris()
x_train, x_test, y_train, y_test = model_selection.train_test_split(
iris.data, iris.target, test_size=0.2, random_state=42)
diff --git a/tensorflow/examples/learn/text_classification.py b/tensorflow/examples/learn/text_classification.py
index c3d00a11b9..7e10014c39 100644
--- a/tensorflow/examples/learn/text_classification.py
+++ b/tensorflow/examples/learn/text_classification.py
@@ -24,6 +24,7 @@ import numpy as np
import pandas
from sklearn import metrics
import tensorflow as tf
+from tensorflow.contrib.layers.python.layers import encoders
learn = tf.contrib.learn
@@ -37,7 +38,7 @@ n_words = 0
def bag_of_words_model(features, target):
"""A bag-of-words model. Note it disregards the word order in the text."""
target = tf.one_hot(target, 15, 1, 0)
- features = tf.contrib.layers.bow_encoder(
+ features = encoders.bow_encoder(
features, vocab_size=n_words, embed_dim=EMBEDDING_SIZE)
logits = tf.contrib.layers.fully_connected(features, 15, activation_fn=None)
loss = tf.contrib.losses.softmax_cross_entropy(logits, target)
diff --git a/tensorflow/examples/tutorials/deepdream/deepdream.ipynb b/tensorflow/examples/tutorials/deepdream/deepdream.ipynb
index cbcc54ce3c..016b21cd12 100644
--- a/tensorflow/examples/tutorials/deepdream/deepdream.ipynb
+++ b/tensorflow/examples/tutorials/deepdream/deepdream.ipynb
@@ -278,7 +278,7 @@
" tensor = n.attr['value'].tensor\n",
" size = len(tensor.tensor_content)\n",
" if size > max_const_size:\n",
- " tensor.tensor_content = bytes(\"<stripped %d bytes>\"%size, 'utf-8')\n",
+ " tensor.tensor_content = bytes(\"<stripped %d bytes>\"%size)\n",
" return strip_def\n",
" \n",
"def rename_nodes(graph_def, rename_func):\n",
diff --git a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
index 25800c109e..f54a7c37a1 100644
--- a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
+++ b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
@@ -62,7 +62,7 @@ print('Data size', len(words))
vocabulary_size = 50000
-def build_dataset(words):
+def build_dataset(words, vocabulary_size):
count = [['UNK', -1]]
count.extend(collections.Counter(words).most_common(vocabulary_size - 1))
dictionary = dict()
@@ -81,7 +81,7 @@ def build_dataset(words):
reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
return data, count, dictionary, reverse_dictionary
-data, count, dictionary, reverse_dictionary = build_dataset(words)
+data, count, dictionary, reverse_dictionary = build_dataset(words, vocabulary_size)
del words # Hint to reduce memory.
print('Most common words (+UNK)', count[:5])
print('Sample data', data[:10], [reverse_dictionary[i] for i in data[:10]])
@@ -181,7 +181,7 @@ with graph.as_default():
valid_embeddings, normalized_embeddings, transpose_b=True)
# Add variable initializer.
- init = tf.initialize_all_variables()
+ init = tf.global_variables_initializer()
# Step 5: Begin training.
num_steps = 100001
diff --git a/tensorflow/go/genop/generate.sh b/tensorflow/go/genop/generate.sh
index b961f7200a..d791e39c40 100644
--- a/tensorflow/go/genop/generate.sh
+++ b/tensorflow/go/genop/generate.sh
@@ -20,11 +20,17 @@ go get github.com/golang/protobuf/proto
go get github.com/golang/protobuf/protoc-gen-go
cd $(dirname $0)
-TF_DIR=${GOPATH}/src/github.com/tensorflow/tensorflow
-PROTOC="${TF_DIR}/bazel-out/host/bin/external/protobuf/protoc"
+for g in $(echo $GOPATH | sed "s/:/ /g"); do
+ TF_DIR="${g}/src/github.com/tensorflow/tensorflow"
+ PROTOC="${TF_DIR}/bazel-out/host/bin/external/protobuf/protoc"
+ if [ -x "${PROTOC}" ]; then
+ break
+ fi
+done
if [ ! -x "${PROTOC}" ]
then
+ set +e
PATH_PROTOC=$(which protoc)
if [ ! -x "${PATH_PROTOC}" ]
then
@@ -34,6 +40,7 @@ then
exit 1
fi
PROTOC=$PATH_PROTOC
+ set -e
fi
# Ensure that protoc-gen-go is available in $PATH
diff --git a/tensorflow/java/README.md b/tensorflow/java/README.md
index 26377ba0d2..20eb6a8265 100644
--- a/tensorflow/java/README.md
+++ b/tensorflow/java/README.md
@@ -110,7 +110,7 @@ libraries will need to be built from source.
brew install swig
```
-3. [Configure](https://www.tensorflow.org/get_started/os_setup#configure_the_installation)
+3. [Configure](https://www.tensorflow.org/install/install_sources#configure_the_installation)
(e.g., enable GPU support) and build:
```sh
@@ -120,8 +120,8 @@ libraries will need to be built from source.
//tensorflow/java:libtensorflow_jni
```
-The JAR (`libtensorflow.jar`) and native library (`libtensorflow_jni.so`) will
-be in `bazel-bin/tensorflow/java`.
+The JAR (`libtensorflow.jar`) and native library (`libtensorflow_jni.so` on Linux or `libtensorflow_jni.dylib` on OS X) will
+be in `bazel-bin/tensorflow/java`. Using these artifacts follow both steps 3 and 4 in the [quickstart](#quickstart) section in order to get your application up and running.
### Maven
diff --git a/tensorflow/java/src/main/java/org/tensorflow/SavedModelBundle.java b/tensorflow/java/src/main/java/org/tensorflow/SavedModelBundle.java
index c3938fe23f..b4591dd869 100644
--- a/tensorflow/java/src/main/java/org/tensorflow/SavedModelBundle.java
+++ b/tensorflow/java/src/main/java/org/tensorflow/SavedModelBundle.java
@@ -27,7 +27,8 @@ package org.tensorflow;
public class SavedModelBundle implements AutoCloseable {
/**
- * Load a saved model from an export directory.
+ * Load a saved model from an export directory. The model that is being loaded should be created using
+ * the <a href="https://www.tensorflow.org/api_docs/python/tf/saved_model">Saved Model API</a>.
*
* @param exportDir the directory path containing a saved model.
* @param tags the tags identifying the specific metagraphdef to load.
diff --git a/tensorflow/java/src/main/java/org/tensorflow/Tensor.java b/tensorflow/java/src/main/java/org/tensorflow/Tensor.java
index efd6c81b30..692de2289d 100644
--- a/tensorflow/java/src/main/java/org/tensorflow/Tensor.java
+++ b/tensorflow/java/src/main/java/org/tensorflow/Tensor.java
@@ -172,8 +172,7 @@ public final class Tensor implements AutoCloseable {
*
* <p>Creates a Tensor with the provided shape of any type where the tensor's data has been
* encoded into {@code data} as per the specification of the TensorFlow <a
- * href="https://www.tensorflow.org/code/tensorflow/c/c_api.h">C
- * API</a>.
+ * href="https://www.tensorflow.org/code/tensorflow/c/c_api.h">C API</a>.
*
* @param dataType the tensor datatype.
* @param shape the tensor shape.
diff --git a/tensorflow/java/src/main/java/org/tensorflow/package-info.java b/tensorflow/java/src/main/java/org/tensorflow/package-info.java
index 3b7b8079f9..dd4859e1b1 100644
--- a/tensorflow/java/src/main/java/org/tensorflow/package-info.java
+++ b/tensorflow/java/src/main/java/org/tensorflow/package-info.java
@@ -19,8 +19,8 @@ limitations under the License.
* <p><b>WARNING</b>: The API is currently experimental and is not covered by TensorFlow <a
* href="https://www.tensorflow.org/programmers_guide/version_semantics">API stability
* guarantees</a>. See <a
- * href="https://www.tensorflow.org/code/tensorflow/java/README.md">README.md</a>
- * for installation instructions.
+ * href="https://www.tensorflow.org/code/tensorflow/java/README.md">README.md</a> for installation
+ * instructions.
*
* <p>The <a
* href="https://www.tensorflow.org/code/tensorflow/java/src/main/java/org/tensorflow/examples/LabelImage.java">LabelImage</a>
diff --git a/tensorflow/python/client/session.py b/tensorflow/python/client/session.py
index cc31555690..038dc4147a 100644
--- a/tensorflow/python/client/session.py
+++ b/tensorflow/python/client/session.py
@@ -711,14 +711,14 @@ class BaseSession(SessionInterface):
# v is the numpy array [10, 20]
# 'fetches' can be a list.
v = session.run([a, b])
- # v a Python list with 2 numpy arrays: the numpy array [10, 20] and the
+ # v is a Python list with 2 numpy arrays: the 1-D array [10, 20] and the
# 1-D array [1.0, 2.0]
# 'fetches' can be arbitrary lists, tuples, namedtuple, dicts:
MyData = collections.namedtuple('MyData', ['a', 'b'])
v = session.run({'k1': MyData(a, b), 'k2': [b, a]})
# v is a dict with
- # v['k1'] is a MyData namedtuple with 'a' the numpy array [10, 20] and
- # 'b' the numpy array [1.0, 2.0]
+ # v['k1'] is a MyData namedtuple with 'a' (the numpy array [10, 20]) and
+ # 'b' (the numpy array [1.0, 2.0])
# v['k2'] is a list with the numpy array [1.0, 2.0] and the numpy array
# [10, 20].
```
diff --git a/tensorflow/python/debug/BUILD b/tensorflow/python/debug/BUILD
index 84bcd8e701..952c4adbfa 100644
--- a/tensorflow/python/debug/BUILD
+++ b/tensorflow/python/debug/BUILD
@@ -15,6 +15,7 @@ exports_files(["LICENSE"])
load("//tensorflow:tensorflow.bzl", "cuda_py_test")
load("//tensorflow:tensorflow.bzl", "py_test")
+load("//tensorflow:tensorflow.bzl", "if_not_windows")
py_library(
name = "debug_py",
@@ -33,11 +34,12 @@ py_library(
py_library(
name = "debug_pip",
deps = [
- ":debug_examples",
":debug_py",
":offline_analyzer",
":session_debug_testlib",
- ],
+ ] + if_not_windows([
+ ":debug_examples",
+ ]),
)
py_library(
diff --git a/tensorflow/python/kernel_tests/tensordot_op_test.py b/tensorflow/python/kernel_tests/tensordot_op_test.py
index bce7e30b68..71230ba000 100644
--- a/tensorflow/python/kernel_tests/tensordot_op_test.py
+++ b/tensorflow/python/kernel_tests/tensordot_op_test.py
@@ -84,9 +84,7 @@ class TensordotTest(test_lib.TestCase):
b_ph: b,
axes_ph: axes_value})
- def test_no_partial_shape_inference(self):
- # If one of the shapes is only partially defined, the output shape is
- # unknown.
+ def test_partial_shape_inference(self):
a = array_ops.placeholder(dtypes.float32)
b = array_ops.placeholder(dtypes.float32)
axes = ([1], [0])
@@ -95,13 +93,21 @@ class TensordotTest(test_lib.TestCase):
a.set_shape([None, 2])
b.set_shape([2, 3])
output = math_ops.tensordot(a, b, axes)
- self.assertEqual(output.get_shape().ndims, None)
+ output_shape = output.get_shape()
+ self.assertEqual(output_shape.ndims, 2)
+ output_shape = output_shape.as_list()
+ self.assertEqual(output_shape[0], None)
+ self.assertEqual(output_shape[1], 3)
a = array_ops.placeholder(dtypes.float32)
b = array_ops.placeholder(dtypes.float32)
a.set_shape([2, 2])
b.set_shape([2, None])
output = math_ops.tensordot(a, b, axes)
- self.assertEqual(output.get_shape().ndims, None)
+ output_shape = output.get_shape()
+ self.assertEqual(output_shape.ndims, 2)
+ output_shape = output_shape.as_list()
+ self.assertEqual(output_shape[0], 2)
+ self.assertEqual(output_shape[1], None)
def _get_tensordot_tests(dtype_, rank_a_, rank_b_, num_dims_, dynamic_shape_):
diff --git a/tensorflow/python/layers/pooling.py b/tensorflow/python/layers/pooling.py
index 2601c61c47..3e40423ad6 100644
--- a/tensorflow/python/layers/pooling.py
+++ b/tensorflow/python/layers/pooling.py
@@ -294,7 +294,7 @@ class AveragePooling2D(_Pooling2D):
data_format: A string. The ordering of the dimensions in the inputs.
`channels_last` (default) and `channels_first` are supported.
`channels_last` corresponds to inputs with shape
- `(batch, height, channels, width)` while `channels_first` corresponds to
+ `(batch, height, width, channels)` while `channels_first` corresponds to
inputs with shape `(batch, channels, height, width)`.
name: A string, the name of the layer.
"""
@@ -329,7 +329,7 @@ def average_pooling2d(inputs,
data_format: A string. The ordering of the dimensions in the inputs.
`channels_last` (default) and `channels_first` are supported.
`channels_last` corresponds to inputs with shape
- `(batch, height, channels, width)` while `channels_first` corresponds to
+ `(batch, height, width, channels)` while `channels_first` corresponds to
inputs with shape `(batch, channels, height, width)`.
name: A string, the name of the layer.
diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index f9fd5d77c9..c4a27009c3 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -275,7 +275,7 @@ def exit(data, name=None):
def switch(data, pred, dtype=None, name=None):
"""Forwards `data` to an output determined by `pred`.
- If `pred` is true, the `data` input is forwared to the first output.
+ If `pred` is false, the `data` input is forwared to the first output.
Otherwise, the data goes to the second output.
This op handles `Tensor`s and `IndexedSlices`.
@@ -323,7 +323,7 @@ def switch(data, pred, dtype=None, name=None):
def _SwitchRefOrTensor(data, pred, name="Switch"):
"""Forwards `data` to an output determined by `pred`.
- If `pred` is true, the `data` input is forwared to the first output.
+ If `pred` is false, the `data` input is forwared to the first output.
Otherwise, the data goes to the second output.
This op handles `Tensor`s and `IndexedSlices`.
diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py
index 62072e1279..0a2d4e4792 100644
--- a/tensorflow/python/ops/image_ops_impl.py
+++ b/tensorflow/python/ops/image_ops_impl.py
@@ -90,22 +90,23 @@ def _is_tensor(x):
return isinstance(x, (ops.Tensor, variables.Variable))
-def _ImageDimensions(image):
+def _ImageDimensions(image, rank):
"""Returns the dimensions of an image tensor.
Args:
- image: A 3-D Tensor of shape `[height, width, channels]`.
+ image: A rank-D Tensor. For 3-D of shape: `[height, width, channels]`.
+ rank: The expected rank of the image
Returns:
- A list of `[height, width, channels]` corresponding to the dimensions of the
+ A list of corresponding to the dimensions of the
input image. Dimensions that are statically known are python integers,
otherwise they are integer scalar tensors.
"""
if image.get_shape().is_fully_defined():
return image.get_shape().as_list()
else:
- static_shape = image.get_shape().with_rank(3).as_list()
- dynamic_shape = array_ops.unstack(array_ops.shape(image), 3)
+ static_shape = image.get_shape().with_rank(rank).as_list()
+ dynamic_shape = array_ops.unstack(array_ops.shape(image), rank)
return [s if s is not None else d
for s, d in zip(static_shape, dynamic_shape)]
@@ -144,22 +145,39 @@ def _Check3DImage(image, require_static=True):
return []
-def _CheckAtLeast3DImage(image):
+def _CheckAtLeast3DImage(image, require_static=True):
"""Assert that we are working with properly shaped image.
Args:
image: >= 3-D Tensor of size [*, height, width, depth]
+ require_static: If `True`, requires that all dimensions of `image` are
+ known and non-zero.
Raises:
ValueError: if image.shape is not a [>= 3] vector.
+
+ Returns:
+ An empty list, if `image` has fully defined dimensions. Otherwise, a list
+ containing an assert op is returned.
"""
- if not image.get_shape().is_fully_defined():
+ try:
+ if image.get_shape().ndims is None:
+ image_shape = image.get_shape().with_rank(3)
+ else:
+ image_shape = image.get_shape().with_rank_at_least(3)
+ except ValueError:
+ raise ValueError("'image' must be at least three-dimensional.")
+ if require_static and not image_shape.is_fully_defined():
raise ValueError('\'image\' must be fully defined.')
- if image.get_shape().ndims < 3:
- raise ValueError('\'image\' must be at least three-dimensional.')
- if not all(x > 0 for x in image.get_shape()):
+ if any(x == 0 for x in image_shape):
raise ValueError('all dims of \'image.shape\' must be > 0: %s' %
- image.get_shape())
+ image_shape)
+ if not image_shape.is_fully_defined():
+ return [check_ops.assert_positive(array_ops.shape(image),
+ ["all dims of 'image.shape' "
+ "must be > 0."])]
+ else:
+ return []
def fix_image_flip_shape(image, result):
@@ -397,14 +415,18 @@ def pad_to_bounding_box(image, offset_height, offset_width, target_height,
`target_height` by `target_width`.
Args:
- image: 3-D tensor with shape `[height, width, channels]`
+ image: 4-D Tensor of shape `[batch, height, width, channels]` or
+ 3-D Tensor of shape `[height, width, channels]`.
offset_height: Number of rows of zeros to add on top.
offset_width: Number of columns of zeros to add on the left.
target_height: Height of output image.
target_width: Width of output image.
Returns:
- 3-D tensor of shape `[target_height, target_width, channels]`
+ If `image` was 4-D, a 4-D float Tensor of shape
+ `[batch, target_height, target_width, channels]`
+ If `image` was 3-D, a 3-D float Tensor of shape
+ `[target_height, target_width, channels]`
Raises:
ValueError: If the shape of `image` is incompatible with the `offset_*` or
@@ -414,9 +436,22 @@ def pad_to_bounding_box(image, offset_height, offset_width, target_height,
image = ops.convert_to_tensor(image, name='image')
assert_ops = []
- assert_ops += _Check3DImage(image, require_static=False)
+ assert_ops += _CheckAtLeast3DImage(image, require_static=False)
+
+ is_batch = True
+ image_shape = image.get_shape()
+ if image_shape.ndims == 3:
+ is_batch = False
+ image = array_ops.expand_dims(image, 0)
+ elif image_shape.ndims is None:
+ is_batch = False
+ image = array_ops.expand_dims(image, 0)
+ image.set_shape([None] * 4)
+ elif image_shape.ndims != 4:
+ raise ValueError('\'image\' must have either 3 or 4 dimensions.')
+
+ batch, height, width, depth = _ImageDimensions(image, rank=4)
- height, width, depth = _ImageDimensions(image)
after_padding_width = target_width - offset_width - width
after_padding_height = target_height - offset_height - height
@@ -433,15 +468,18 @@ def pad_to_bounding_box(image, offset_height, offset_width, target_height,
# Do not pad on the depth dimensions.
paddings = array_ops.reshape(
array_ops.stack([
- offset_height, after_padding_height, offset_width,
+ 0, 0, offset_height, after_padding_height, offset_width,
after_padding_width, 0, 0
- ]), [3, 2])
+ ]), [4, 2])
padded = array_ops.pad(image, paddings)
padded_shape = [None if _is_tensor(i) else i
- for i in [target_height, target_width, depth]]
+ for i in [batch, target_height, target_width, depth]]
padded.set_shape(padded_shape)
+ if not is_batch:
+ padded = array_ops.squeeze(padded, squeeze_dims=[0])
+
return padded
@@ -455,7 +493,8 @@ def crop_to_bounding_box(image, offset_height, offset_width, target_height,
`offset_height + target_height, offset_width + target_width`.
Args:
- image: 3-D tensor with shape `[height, width, channels]`
+ image: 4-D Tensor of shape `[batch, height, width, channels]` or
+ 3-D Tensor of shape `[height, width, channels]`.
offset_height: Vertical coordinate of the top-left corner of the result in
the input.
offset_width: Horizontal coordinate of the top-left corner of the result in
@@ -464,7 +503,10 @@ def crop_to_bounding_box(image, offset_height, offset_width, target_height,
target_width: Width of the result.
Returns:
- 3-D tensor of image with shape `[target_height, target_width, channels]`
+ If `image` was 4-D, a 4-D float Tensor of shape
+ `[batch, target_height, target_width, channels]`
+ If `image` was 3-D, a 3-D float Tensor of shape
+ `[target_height, target_width, channels]`
Raises:
ValueError: If the shape of `image` is incompatible with the `offset_*` or
@@ -474,9 +516,21 @@ def crop_to_bounding_box(image, offset_height, offset_width, target_height,
image = ops.convert_to_tensor(image, name='image')
assert_ops = []
- assert_ops += _Check3DImage(image, require_static=False)
+ assert_ops += _CheckAtLeast3DImage(image, require_static=False)
+
+ is_batch = True
+ image_shape = image.get_shape()
+ if image_shape.ndims == 3:
+ is_batch = False
+ image = array_ops.expand_dims(image, 0)
+ elif image_shape.ndims is None:
+ is_batch = False
+ image = array_ops.expand_dims(image, 0)
+ image.set_shape([None] * 4)
+ elif image_shape.ndims != 4:
+ raise ValueError('\'image\' must have either 3 or 4 dimensions.')
- height, width, depth = _ImageDimensions(image)
+ batch, height, width, depth = _ImageDimensions(image, rank=4)
assert_ops += _assert(offset_width >= 0, ValueError,
'offset_width must be >= 0.')
@@ -493,13 +547,16 @@ def crop_to_bounding_box(image, offset_height, offset_width, target_height,
image = control_flow_ops.with_dependencies(assert_ops, image)
cropped = array_ops.slice(image,
- array_ops.stack([offset_height, offset_width, 0]),
- array_ops.stack([target_height, target_width, -1]))
+ array_ops.stack([0, offset_height, offset_width, 0]),
+ array_ops.stack([-1, target_height, target_width, -1]))
cropped_shape = [None if _is_tensor(i) else i
- for i in [target_height, target_width, depth]]
+ for i in [batch, target_height, target_width, depth]]
cropped.set_shape(cropped_shape)
+ if not is_batch:
+ cropped = array_ops.squeeze(cropped, squeeze_dims=[0])
+
return cropped
@@ -516,7 +573,8 @@ def resize_image_with_crop_or_pad(image, target_height, target_width):
dimension.
Args:
- image: 3-D tensor of shape `[height, width, channels]`
+ image: 4-D Tensor of shape `[batch, height, width, channels]` or
+ 3-D Tensor of shape `[height, width, channels]`.
target_height: Target height.
target_width: Target width.
@@ -524,13 +582,27 @@ def resize_image_with_crop_or_pad(image, target_height, target_width):
ValueError: if `target_height` or `target_width` are zero or negative.
Returns:
- Cropped and/or padded image of shape
- `[target_height, target_width, channels]`
+ Cropped and/or padded image.
+ If `images` was 4-D, a 4-D float Tensor of shape
+ `[batch, new_height, new_width, channels]`.
+ If `images` was 3-D, a 3-D float Tensor of shape
+ `[new_height, new_width, channels]`.
"""
image = ops.convert_to_tensor(image, name='image')
+ image_shape = image.get_shape()
+ is_batch = True
+ if image_shape.ndims == 3:
+ is_batch = False
+ image = array_ops.expand_dims(image, 0)
+ elif image_shape.ndims is None:
+ is_batch = False
+ image = array_ops.expand_dims(image, 0)
+ image.set_shape([None] * 4)
+ elif image_shape.ndims != 4:
+ raise ValueError('\'image\' must have either 3 or 4 dimensions.')
assert_ops = []
- assert_ops += _Check3DImage(image, require_static=False)
+ assert_ops += _CheckAtLeast3DImage(image, require_static=False)
assert_ops += _assert(target_width > 0, ValueError,
'target_width must be > 0.')
assert_ops += _assert(target_height > 0, ValueError,
@@ -563,7 +635,7 @@ def resize_image_with_crop_or_pad(image, target_height, target_width):
else:
return x == y
- height, width, _ = _ImageDimensions(image)
+ _, height, width, _ = _ImageDimensions(image, rank=4)
width_diff = target_width - width
offset_crop_width = max_(-width_diff // 2, 0)
offset_pad_width = max_(width_diff // 2, 0)
@@ -585,7 +657,7 @@ def resize_image_with_crop_or_pad(image, target_height, target_width):
if resized.get_shape().ndims is None:
raise ValueError('resized contains no shape.')
- resized_height, resized_width, _ = _ImageDimensions(resized)
+ _, resized_height, resized_width, _ = _ImageDimensions(resized, rank=4)
assert_ops = []
assert_ops += _assert(equal_(resized_height, target_height), ValueError,
@@ -594,6 +666,10 @@ def resize_image_with_crop_or_pad(image, target_height, target_width):
'resized width is not correct.')
resized = control_flow_ops.with_dependencies(assert_ops, resized)
+
+ if not is_batch:
+ resized = array_ops.squeeze(resized, squeeze_dims=[0])
+
return resized
diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py
index c8691f4eb8..799f7e4935 100644
--- a/tensorflow/python/ops/image_ops_test.py
+++ b/tensorflow/python/ops/image_ops_test.py
@@ -299,7 +299,7 @@ class AdjustHueTest(test_util.TensorFlowTestCase):
return y_v.reshape(x_np.shape)
def _adjustHueTf(self, x_np, delta_h):
- with self.test_session(use_gpu=False):
+ with self.test_session(use_gpu=True):
x = constant_op.constant(x_np)
y = image_ops.adjust_hue(x, delta_h)
y_tf = y.eval()
@@ -1185,9 +1185,13 @@ class CropToBoundingBoxTest(test_util.TensorFlowTestCase):
offset_height, offset_width = [0, 0]
target_height, target_width = [2, 2]
- for x_shape in ([1, 3, 5, 1], [3, 5]):
+ for x_shape in ([3, 5],):
+ self._assertRaises(x, x_shape, offset_height, offset_width, target_height,
+ target_width, "'image' must be at least three-dimensional.")
+
+ for x_shape in ([1, 3, 5, 1, 1],):
self._assertRaises(x, x_shape, offset_height, offset_width, target_height,
- target_width, "must be three-dimensional")
+ target_width, "'image' must have either 3 or 4 dimensions.")
def testZeroLengthInput(self):
# Input image has 0-length dimension(s).
@@ -1430,9 +1434,13 @@ class PadToBoundingBoxTest(test_util.TensorFlowTestCase):
offset_height, offset_width = [0, 0]
target_height, target_width = [2, 2]
- for x_shape in ([1, 3, 5, 1], [3, 5]):
+ for x_shape in ([3, 5],):
self._assertRaises(x, x_shape, offset_height, offset_width, target_height,
- target_width, "must be three-dimensional")
+ target_width, "'image' must be at least three-dimensional")
+
+ for x_shape in ([1, 3, 5, 1, 1],):
+ self._assertRaises(x, x_shape, offset_height, offset_width, target_height,
+ target_width, "'image' must have either 3 or 4 dimensions.")
def testZeroLengthInput(self):
# Input image has 0-length dimension(s).
@@ -2220,9 +2228,13 @@ class ResizeImageWithCropOrPadTest(test_util.TensorFlowTestCase):
x = [0] * 15
target_height, target_width = [4, 4]
- for x_shape in ([1, 3, 5, 1], [3, 5]):
+ for x_shape in ([3, 5],):
+ self._assertRaises(x, x_shape, target_height, target_width,
+ "'image' must have either 3 or 4 dimensions.")
+
+ for x_shape in ([1, 3, 5, 1, 1],):
self._assertRaises(x, x_shape, target_height, target_width,
- "must be three-dimensional")
+ "'image' must have either 3 or 4 dimensions.")
def testZeroLengthInput(self):
# Input image has 0-length dimension(s).
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index d3d954f33d..fe4a47b9ae 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -2298,12 +2298,14 @@ def tensordot(a, b, axes, name=None):
assumes that `a` is the second argument in the contraction operation.
Returns:
- A pair `(reshaped_a, free_dims)` where `reshaped_a` is the tensor `a`
- reshaped to allow contraction via `matmul` and `free_dims` is either a
- list of integers or an `int32` `Tensor`, depending on if `axes` is a list
- and the shape of `a` is fully defined.
+ A tuple `(reshaped_a, free_dims, free_dims_static)` where `reshaped_a` is
+ the tensor `a` reshaped to allow contraction via `matmul`, `free_dims` is
+ either a list of integers or an `int32` `Tensor`, depending on whether
+ the shape of a is fully specified, and free_dims_static is either a list
+ of integers and None values, or None, representing the inferred
+ static shape of the free dimensions
+
"""
- # TODO(b/33084409): Implement partial shape inference.
if a.get_shape().is_fully_defined() and isinstance(axes, (list, tuple)):
shape_a = a.get_shape().as_list()
axes = [i if i >= 0 else i + len(shape_a) for i in axes]
@@ -2314,8 +2316,15 @@ def tensordot(a, b, axes, name=None):
perm = list(axes) + free if flipped else free + list(axes)
new_shape = [prod_axes, prod_free] if flipped else [prod_free, prod_axes]
reshaped_a = array_ops.reshape(array_ops.transpose(a, perm), new_shape)
- return reshaped_a, free_dims
+ return reshaped_a, free_dims, free_dims
else:
+ if a.get_shape().ndims is not None and isinstance(axes, (list, tuple)):
+ shape_a = a.get_shape().as_list()
+ axes = [i if i >= 0 else i + len(shape_a) for i in axes]
+ free = [i for i in xrange(len(shape_a)) if i not in axes]
+ free_dims_static = [shape_a[i] for i in free]
+ else:
+ free_dims_static = None
shape_a = array_ops.shape(a)
rank_a = array_ops.rank(a)
axes = ops.convert_to_tensor(axes, dtype=dtypes.int32, name="axes")
@@ -2334,7 +2343,7 @@ def tensordot(a, b, axes, name=None):
perm = array_ops.concat([free, axes], 0)
new_shape = array_ops.stack([prod_free_dims, prod_axes_dims])
reshaped_a = array_ops.reshape(array_ops.transpose(a, perm), new_shape)
- return reshaped_a, free_dims
+ return reshaped_a, free_dims, free_dims_static
def _tensordot_axes(a, axes):
"""Generates two sets of contraction axes for the two tensor arguments."""
@@ -2366,16 +2375,19 @@ def tensordot(a, b, axes, name=None):
a = ops.convert_to_tensor(a, name="a")
b = ops.convert_to_tensor(b, name="b")
a_axes, b_axes = _tensordot_axes(a, axes)
- a_reshape, a_free_dims = _tensordot_reshape(a, a_axes)
- b_reshape, b_free_dims = _tensordot_reshape(b, b_axes, True)
+ a_reshape, a_free_dims, a_free_dims_static = _tensordot_reshape(a, a_axes)
+ b_reshape, b_free_dims, b_free_dims_static = _tensordot_reshape(b, b_axes, True)
ab_matmul = matmul(a_reshape, b_reshape)
if isinstance(a_free_dims, list) and isinstance(b_free_dims, list):
return array_ops.reshape(ab_matmul, a_free_dims + b_free_dims, name=name)
else:
- a_free_dims = ops.convert_to_tensor(a_free_dims)
- b_free_dims = ops.convert_to_tensor(b_free_dims)
- return array_ops.reshape(
+ a_free_dims = ops.convert_to_tensor(a_free_dims, dtype=dtypes.int32)
+ b_free_dims = ops.convert_to_tensor(b_free_dims, dtype=dtypes.int32)
+ product = array_ops.reshape(
ab_matmul, array_ops.concat([a_free_dims, b_free_dims], 0), name=name)
+ if a_free_dims_static is not None and b_free_dims_static is not None:
+ product.set_shape(a_free_dims_static + b_free_dims_static)
+ return product
# FFT ops were moved to tf.spectral. tf.fft symbols were part of the TensorFlow
diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py
index c267fb8ccd..bdb34dd78e 100644
--- a/tensorflow/python/ops/metrics_impl.py
+++ b/tensorflow/python/ops/metrics_impl.py
@@ -1473,7 +1473,7 @@ def false_negatives(labels, predictions, weights=None,
metrics_collections=None,
updates_collections=None,
name=None):
- """Computes the total number of false positives.
+ """Computes the total number of false negatives.
If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index 51ec1c313b..4a8ac42161 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -278,7 +278,8 @@ def with_space_to_batch(
For N=3, the valid values are "NDHWC" (default) and "NCDHW".
Returns:
- The output Tensor as described above.
+ The output Tensor as described above, dimensions will vary based on the op
+ provided.
Raises:
ValueError: if `padding` is invalid or the arguments are incompatible.
@@ -529,17 +530,16 @@ def convolution(input, filter, # pylint: disable=redefined-builtin
of N `strides` (defaulting [1]*N), this computes for each N-D spatial output
position (x[0], ..., x[N-1]):
+ ```
output[b, x[0], ..., x[N-1], k] =
-
sum_{z[0], ..., z[N-1], q}
-
filter[z[0], ..., z[N-1], q, k] *
padded_input[b,
x[0]*strides[0] + dilation_rate[0]*z[0],
...,
x[N-1]*strides[N-1] + dilation_rate[N-1]*z[N-1],
q]
-
+ ```
where `padded_input` is obtained by zero padding the input using an effective
spatial filter shape of `(spatial_filter_shape-1) * dilation_rate + 1` and
output striding `strides` as described in the
@@ -682,6 +682,7 @@ def pool(input, # pylint: disable=redefined-builtin
0 <= x[i] < output_spatial_shape[i],
0 <= c < num_channels:
+ ```
output[b, x[0], ..., x[N-1], c] =
REDUCE_{z[0], ..., z[N-1]}
input[b,
@@ -689,6 +690,7 @@ def pool(input, # pylint: disable=redefined-builtin
...
x[N-1]*strides[N-1] - pad_before[N-1] + dilation_rate[N-1]*z[N-1],
c],
+ ```
where the reduction function REDUCE depends on the value of `pooling_type`,
and pad_before is defined based on the value of `padding` as described in the
@@ -698,10 +700,12 @@ def pool(input, # pylint: disable=redefined-builtin
In the case that `data_format` starts with `"NC"`, the `input` and output are
simply transposed as follows:
+ ```
pool(input, data_format, **kwargs) =
tf.transpose(pool(tf.transpose(input, [0] + range(2,N+2) + [1]),
**kwargs),
[0, N+1] + range(1, N+1))
+ ```
Args:
input: Tensor of rank N+2, of shape
@@ -740,6 +744,7 @@ def pool(input, # pylint: disable=redefined-builtin
If padding = "SAME":
output_spatial_shape[i] = ceil(input_spatial_shape[i] / strides[i])
+
If padding = "VALID":
output_spatial_shape[i] =
ceil((input_spatial_shape[i] - (window_shape[i] - 1) * dilation_rate[i])
@@ -844,9 +849,14 @@ def atrous_conv2d(value, filters, rate, padding, name=None):
More specifically:
- output[b, i, j, k] = sum_{di, dj, q} filters[di, dj, q, k] *
- value[b, i + rate * di, j + rate * dj, q]
-
+ ```
+ output[batch, height, width, out_channel] =
+ sum_{dheight, dwidth, in_channel} (
+ filters[dheight, dwidth, in_channel, out_channel] *
+ value[batch, height + rate * dheight, width + rate * dwidth, in_channel]
+ )
+ ```
+
Atrous convolution allows us to explicitly control how densely to compute
feature responses in fully convolutional networks. Used in conjunction with
bilinear interpolation, it offers an alternative to `conv2d_transpose` in
@@ -932,6 +942,14 @@ def atrous_conv2d(value, filters, rate, padding, name=None):
Returns:
A `Tensor` with the same type as `value`.
+ Output shape with `'VALID`` padding is:
+
+ [batch, height - 2 * (filter_width - 1),
+ width - 2 * (filter_height - 1), out_channels].
+
+ Output shape with `'SAME'` padding is:
+
+ [batch, height, width, out_channels].
Raises:
ValueError: If input/output depth does not match `filters`' shape, or if
diff --git a/tensorflow/python/ops/rnn_cell_impl.py b/tensorflow/python/ops/rnn_cell_impl.py
index 076c6d41d9..c3dddf85f3 100644
--- a/tensorflow/python/ops/rnn_cell_impl.py
+++ b/tensorflow/python/ops/rnn_cell_impl.py
@@ -13,7 +13,12 @@
# limitations under the License.
# ==============================================================================
-"""Module implementing RNN Cells."""
+"""Module implementing RNN Cells.
+
+This module contains the abstract definition of a RNN cell: `_RNNCell`.
+Actual implementations of various types of RNN cells are located in
+`tensorflow.contrib`.
+"""
from __future__ import absolute_import
from __future__ import division
@@ -72,10 +77,12 @@ def _zero_state_tensors(state_size, batch_size, dtype):
class _RNNCell(object):
"""Abstract object representing an RNN cell.
- The definition of cell in this package differs from the definition used in the
- literature. In the literature, cell refers to an object with a single scalar
- output. The definition in this package refers to a horizontal array of such
- units.
+ Every `RNNCell` must have the properties below and implement `__call__` with
+ the following signature.
+
+ This definition of cell differs from the definition used in the literature.
+ In the literature, 'cell' refers to an object with a single scalar output.
+ This definition refers to a horizontal array of such units.
An RNN cell, in the most abstract setting, is anything that has
a state and performs some operation that takes a matrix of inputs.
@@ -84,13 +91,6 @@ class _RNNCell(object):
state matrix with `self.state_size` columns. If `self.state_size` is a
tuple of integers, then it results in a tuple of `len(state_size)` state
matrices, each with a column size corresponding to values in `state_size`.
-
- This module provides a number of basic commonly used RNN cells, such as
- LSTM (Long Short Term Memory) or GRU (Gated Recurrent Unit), and a number
- of operators that allow add dropouts, projections, or embeddings for inputs.
- Constructing multi-layer cells is supported by the class `MultiRNNCell`,
- or by calling the `rnn` ops several times. Every `RNNCell` must have the
- properties below and implement `__call__` with the following signature.
"""
def __call__(self, inputs, state, scope=None):
@@ -140,7 +140,7 @@ class _RNNCell(object):
If `state_size` is a nested list or tuple, then the return value is
a nested list or tuple (of the same structure) of `2-D` tensors with
- the shapes `[batch_size x s]` for each s in `state_size`.
+ the shapes `[batch_size x s]` for each s in `state_size`.
"""
with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]):
state_size = self.state_size
diff --git a/tensorflow/python/platform/tf_logging.py b/tensorflow/python/platform/tf_logging.py
index 70ecda1dda..335fd110e7 100644
--- a/tensorflow/python/platform/tf_logging.py
+++ b/tensorflow/python/platform/tf_logging.py
@@ -37,6 +37,7 @@ from tensorflow.python.util.all_util import remove_undocumented
# Determine whether we are in an interactive environment
+_interactive = False
try:
# This is only defined in interactive shells
if _sys.ps1: _interactive = True
diff --git a/tensorflow/tensorboard/README.md b/tensorflow/tensorboard/README.md
index b9addd4b68..0c21bb508f 100644
--- a/tensorflow/tensorboard/README.md
+++ b/tensorflow/tensorboard/README.md
@@ -91,7 +91,7 @@ produce a consistent history of what happened.
### Runs: Comparing different executions of your model
You may want to visually compare multiple executions of your model; for example,
-suppose you've changed the hyperparameters and want to see if its converging
+suppose you've changed the hyperparameters and want to see if it's converging
faster. TensorBoard enables this through different "runs". When TensorBoard is
passed a `logdir` at startup, it recursively walks the directory tree rooted at
`logdir` looking for subdirectories that contain tfevents data. Every time it
diff --git a/tensorflow/tensorboard/defs.bzl b/tensorflow/tensorboard/defs.bzl
index 7ad97f91f8..bae7078c5b 100644
--- a/tensorflow/tensorboard/defs.bzl
+++ b/tensorflow/tensorboard/defs.bzl
@@ -36,7 +36,7 @@ def tensorboard_typescript_genrule(name, srcs, typings=[], **kwargs):
# data attribute won't be considered when --genrule_strategy=sandboxed. See
# https://github.com/bazelbuild/bazel/issues/1147 and its linked issues.
data = [
- "@org_nodejs//:bin/node",
+ "@org_nodejs",
"@com_microsoft_typescript",
]
native.genrule(
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 471a2173aa..aebdfed837 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -124,6 +124,7 @@ def tf_copts():
"/DLANG_CXX11",
"/D__VERSION__=\\\"MSVC\\\"",
"/DPLATFORM_WINDOWS",
+ "/DTF_COMPILE_LIBRARY",
"/DEIGEN_HAS_C99_MATH",
"/DTENSORFLOW_USE_EIGEN_THREADPOOL",
],
@@ -392,7 +393,7 @@ def tf_cc_tests(srcs, deps, name='', linkstatic=0, tags=[], size="medium",
def tf_cc_test_mkl(srcs, deps, name='', linkstatic=0, tags=[], size="medium",
args=None):
- tf_cc_tests(srcs, deps, linkstatic, tags=tags, size=size, args=args)
+ if_mkl(tf_cc_tests(srcs, deps, linkstatic, tags=tags, size=size, args=args))
def tf_cc_tests_gpu(srcs, deps, name='', linkstatic=0, tags=[], size="medium",
args=None):
diff --git a/tensorflow/tools/benchmark/BUILD b/tensorflow/tools/benchmark/BUILD
index 3b1901fd56..a2ffca97ec 100644
--- a/tensorflow/tools/benchmark/BUILD
+++ b/tensorflow/tools/benchmark/BUILD
@@ -34,6 +34,7 @@ cc_library(
"//tensorflow/core:lib",
"//tensorflow/core:framework",
"//tensorflow/core:framework_internal",
+ "//tensorflow/core:framework_lite",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:tensorflow",
"//tensorflow/core:test",
diff --git a/tensorflow/tools/benchmark/benchmark_model.cc b/tensorflow/tools/benchmark/benchmark_model.cc
index 0fcfaf747b..db2ac31baf 100644
--- a/tensorflow/tools/benchmark/benchmark_model.cc
+++ b/tensorflow/tools/benchmark/benchmark_model.cc
@@ -36,6 +36,7 @@ limitations under the License.
#include "tensorflow/core/platform/env.h"
#include "tensorflow/core/platform/init_main.h"
#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/platform.h"
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/public/session.h"
#include "tensorflow/core/util/command_line_flags.h"
@@ -272,7 +273,11 @@ Status TimeMultipleRuns(double sleep_seconds, int num_runs,
// This can be helpful to determine the effect of mobile processor
// scaling and thermal throttling.
if (sleep_seconds > 0.0) {
+#ifdef PLATFORM_WINDOWS
+ Sleep(sleep_seconds * 1000);
+#else
nanosleep(&req, nullptr);
+#endif
}
}
std::stringstream stream;
diff --git a/tensorflow/tools/ci_build/Dockerfile.android b/tensorflow/tools/ci_build/Dockerfile.android
index 887589bc93..4d46c672ab 100644
--- a/tensorflow/tools/ci_build/Dockerfile.android
+++ b/tensorflow/tools/ci_build/Dockerfile.android
@@ -1,4 +1,4 @@
-FROM ubuntu:16.04
+FROM ubuntu:14.04
MAINTAINER Jan Prach <jendap@google.com>
@@ -10,9 +10,8 @@ RUN add-apt-repository -y ppa:openjdk-r/ppa && \
RUN /install/install_deb_packages.sh
RUN /install/install_bazel.sh
-# Set up bazelrc.
-COPY install/.bazelrc /root/.bazelrc
-ENV BAZELRC /root/.bazelrc
+# Set up the master bazelrc configuration file.
+COPY install/.bazelrc /etc/bazel.bazelrc
# Install extra libraries for android sdk.
RUN apt-get update && apt-get install -y \
diff --git a/tensorflow/tools/ci_build/Dockerfile.cmake b/tensorflow/tools/ci_build/Dockerfile.cmake
index 8a28fe6cdf..22eaf11b91 100644
--- a/tensorflow/tools/ci_build/Dockerfile.cmake
+++ b/tensorflow/tools/ci_build/Dockerfile.cmake
@@ -7,9 +7,10 @@ COPY install/*.sh /install/
RUN /install/install_bootstrap_deb_packages.sh
RUN /install/install_deb_packages.sh
+RUN apt-get update
+RUN apt-get install -y --no-install-recommends python-pip
RUN pip install --upgrade numpy
# Install golang
RUN add-apt-repository -y ppa:ubuntu-lxc/lxd-stable
-RUN apt-get update
RUN apt-get install -y golang
diff --git a/tensorflow/tools/ci_build/Dockerfile.cpu b/tensorflow/tools/ci_build/Dockerfile.cpu
index 8e0be14ca6..206108930a 100644
--- a/tensorflow/tools/ci_build/Dockerfile.cpu
+++ b/tensorflow/tools/ci_build/Dockerfile.cpu
@@ -1,4 +1,4 @@
-FROM ubuntu:16.04
+FROM ubuntu:14.04
MAINTAINER Jan Prach <jendap@google.com>
@@ -15,6 +15,5 @@ RUN /install/install_buildifier.sh
RUN /install/install_auditwheel.sh
RUN /install/install_golang.sh
-# Set up bazelrc.
-COPY install/.bazelrc /root/.bazelrc
-ENV BAZELRC /root/.bazelrc
+# Set up the master bazelrc configuration file.
+COPY install/.bazelrc /etc/bazel.bazelrc
diff --git a/tensorflow/tools/ci_build/Dockerfile.debian.jessie.cpu b/tensorflow/tools/ci_build/Dockerfile.debian.jessie.cpu
index 79cf1844f2..b914f51918 100644
--- a/tensorflow/tools/ci_build/Dockerfile.debian.jessie.cpu
+++ b/tensorflow/tools/ci_build/Dockerfile.debian.jessie.cpu
@@ -22,6 +22,5 @@ RUN /install/install_golang.sh
# Fix a virtualenv install issue specific to Debian Jessie.
RUN pip install --upgrade virtualenv
-# Set up bazelrc.
-COPY install/.bazelrc /root/.bazelrc
-ENV BAZELRC /root/.bazelrc
+# Set up the master bazelrc configuration file.
+COPY install/.bazelrc /etc/bazel.bazelrc
diff --git a/tensorflow/tools/ci_build/Dockerfile.gpu b/tensorflow/tools/ci_build/Dockerfile.gpu
index 1cf1e40404..68493965fa 100644
--- a/tensorflow/tools/ci_build/Dockerfile.gpu
+++ b/tensorflow/tools/ci_build/Dockerfile.gpu
@@ -1,7 +1,12 @@
-FROM nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04
+FROM nvidia/cuda:8.0-cudnn5-devel-ubuntu14.04
MAINTAINER Jan Prach <jendap@google.com>
+# In the Ubuntu 14.04 images, cudnn is placed in system paths. Move them to
+# /usr/local/cuda
+RUN cp /usr/include/cudnn.h /usr/local/cuda/include
+RUN cp /usr/lib/x86_64-linux-gnu/libcudnn* /usr/local/cuda/lib64
+
# Copy and run the install scripts.
COPY install/*.sh /install/
RUN /install/install_bootstrap_deb_packages.sh
@@ -12,9 +17,8 @@ RUN /install/install_pip_packages.sh
RUN /install/install_bazel.sh
RUN /install/install_golang.sh
-# Set up bazelrc.
-COPY install/.bazelrc /root/.bazelrc
-ENV BAZELRC /root/.bazelrc
+# Set up the master bazelrc configuration file.
+COPY install/.bazelrc /etc/bazel.bazelrc
ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH
# Configure the build for our CUDA configuration.
diff --git a/tensorflow/tools/ci_build/Dockerfile.hadoop b/tensorflow/tools/ci_build/Dockerfile.hadoop
index 7af9f38708..489493c26e 100644
--- a/tensorflow/tools/ci_build/Dockerfile.hadoop
+++ b/tensorflow/tools/ci_build/Dockerfile.hadoop
@@ -1,4 +1,4 @@
-FROM ubuntu:16.04
+FROM ubuntu:14.04
MAINTAINER Jonathan Hseu <jhseu@google.com>
@@ -14,6 +14,5 @@ RUN /install/install_proto3.sh
RUN /install/install_buildifier.sh
RUN /install/install_hadoop.sh
-# Set up bazelrc.
-COPY install/.bazelrc /root/.bazelrc
-ENV BAZELRC /root/.bazelrc
+# Set up the master bazelrc configuration file.
+COPY install/.bazelrc /etc/bazel.bazelrc
diff --git a/tensorflow/tools/ci_build/Dockerfile.tensorboard b/tensorflow/tools/ci_build/Dockerfile.tensorboard
index 12b8aa18da..9795872e2c 100644
--- a/tensorflow/tools/ci_build/Dockerfile.tensorboard
+++ b/tensorflow/tools/ci_build/Dockerfile.tensorboard
@@ -1,4 +1,4 @@
-FROM ubuntu:16.04
+FROM ubuntu:14.04
MAINTAINER Jan Prach <jendap@google.com>
diff --git a/tensorflow/tools/ci_build/README.md b/tensorflow/tools/ci_build/README.md
index 4b7858ca89..1fa618e698 100644
--- a/tensorflow/tools/ci_build/README.md
+++ b/tensorflow/tools/ci_build/README.md
@@ -20,20 +20,20 @@ run continuous integration [ci.tensorflow.org](https://ci.tensorflow.org).
2. Clone tensorflow repository.
```bash
-git clone https://github.com/tensorflow/tensorflow.git
-```
+ git clone https://github.com/tensorflow/tensorflow.git
+ ```
3. Go to tensorflow directory
```bash
-cd tensorflow
-```
+ cd tensorflow
+ ```
4. Build what you want, for example
```bash
-tensorflow/tools/ci_build/ci_build.sh CPU bazel test //tensorflow/...
-```
+ tensorflow/tools/ci_build/ci_build.sh CPU bazel test //tensorflow/...
+ ```
diff --git a/tensorflow/tools/ci_build/builds/run_pip_tests.sh b/tensorflow/tools/ci_build/builds/run_pip_tests.sh
index be076cd4c0..10bed0b786 100755
--- a/tensorflow/tools/ci_build/builds/run_pip_tests.sh
+++ b/tensorflow/tools/ci_build/builds/run_pip_tests.sh
@@ -104,28 +104,26 @@ export TF_NEED_CUDA=$IS_GPU
yes "" | ./configure
# Figure out how many concurrent tests we can run and do run the tests.
+BAZEL_PARALLEL_TEST_FLAGS=""
if [[ $IS_GPU == 1 ]]; then
# Number of test threads is the number of GPU cards available.
if [[ $IS_MAC == 1 ]]; then
- PAR_TEST_JOBS=1
+ BAZEL_PARALLEL_TEST_FLAGS="--local_test_jobs=1"
else
PAR_TEST_JOBS=$TF_GPU_COUNT
+ BAZEL_PARALLEL_TEST_FLAGS="--local_test_jobs=${TF_GPU_COUNT} \
+ --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute"
fi
-
- # Actually run the tests.
- bazel test ${BAZEL_FLAGS} --local_test_jobs=${PAR_TEST_JOBS} \
- --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \
- -- ${BAZEL_TEST_TARGETS}
-
else
# Number of test threads is the number of physical CPUs.
if [[ $IS_MAC == 1 ]]; then
- PAR_TEST_JOBS=$(sysctl -n hw.ncpu)
+ BAZEL_PARALLEL_TEST_FLAGS="--local_test_jobs=$(sysctl -n hw.ncpu)"
else
- PAR_TEST_JOBS=$(grep -c ^processor /proc/cpuinfo)
+ BAZEL_PARALLEL_TEST_FLAGS="--local_test_jobs=$(grep -c ^processor /proc/cpuinfo)"
fi
-
- # Actually run the tests.
- bazel test ${BAZEL_FLAGS} --local_test_jobs=${PAR_TEST_JOBS} \
- -- ${BAZEL_TEST_TARGETS}
fi
+
+# Actually run the tests.
+bazel test ${BAZEL_FLAGS} ${BAZEL_PARALLEL_TEST_FLAGS} -- \
+ ${BAZEL_TEST_TARGETS}
+
diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh
index e1a312b858..cb204bc25f 100755
--- a/tensorflow/tools/ci_build/ci_parameterized_build.sh
+++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh
@@ -19,7 +19,7 @@
#
# The script obeys the following required environment variables:
# TF_BUILD_CONTAINER_TYPE: (CPU | GPU | ANDROID | ANDROID_FULL)
-# TF_BUILD_PYTHON_VERSION: (PYTHON2 | PYTHON3)
+# TF_BUILD_PYTHON_VERSION: (PYTHON2 | PYTHON3 | PYTHON3.5)
# TF_BUILD_IS_PIP: (NO_PIP | PIP | BOTH)
#
# The below environment variable is required, but will be deprecated together
@@ -33,7 +33,8 @@
# ANDROID & PIP (Android and PIP builds are mutually exclusive)
#
# 2) TF_BUILD_PYTHON_VERSION is set to PYTHON3, the build will use the version
-# pointed to by "which python3" on the system.
+# pointed to by "which python3" on the system, which is typically python3.4. To
+# build for python3.5, set the environment variable to PYTHON3.5
#
#
# Additionally, the script follows the directions of optional environment
@@ -426,7 +427,9 @@ fi
# Process Python version
if [[ ${TF_BUILD_PYTHON_VERSION} == "python2" ]]; then
:
-elif [[ ${TF_BUILD_PYTHON_VERSION} == "python3" ]]; then
+elif [[ ${TF_BUILD_PYTHON_VERSION} == "python3" || \
+ ${TF_BUILD_PYTHON_VERSION} == "python3.4" || \
+ ${TF_BUILD_PYTHON_VERSION} == "python3.5" ]]; then
# Supply proper environment variable to select Python 3
if [[ "${DO_DOCKER}" == "1" ]]; then
EXTRA_PARAMS="${EXTRA_PARAMS} -e CI_BUILD_PYTHON=${TF_BUILD_PYTHON_VERSION}"
@@ -493,6 +496,30 @@ echo ""
TMP_DIR=""
DOCKERFILE_FLAG=""
+if [[ "${TF_BUILD_PYTHON_VERSION}" == "python3.5" ]]; then
+ # Modify Dockerfile for Python3.5 build
+ TMP_DIR=$(mktemp -d)
+ echo "Docker build will occur in temporary directory: ${TMP_DIR}"
+
+ # Copy the files required for the docker build
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+ cp -r "${SCRIPT_DIR}/install" "${TMP_DIR}/install" || \
+ die "ERROR: Failed to copy directory ${SCRIPT_DIR}/install"
+
+ DOCKERFILE="${SCRIPT_DIR}/Dockerfile.${TF_BUILD_CONTAINER_TYPE}"
+ cp "${DOCKERFILE}" "${TMP_DIR}/" || \
+ die "ERROR: Failed to copy Dockerfile at ${DOCKERFILE}"
+ DOCKERFILE="${TMP_DIR}/Dockerfile.${TF_BUILD_CONTAINER_TYPE}"
+
+ # Replace a line in the Dockerfile
+ sed -i \
+ 's/RUN \/install\/install_pip_packages.sh/RUN \/install\/install_python3.5_pip_packages.sh/g' \
+ "${DOCKERFILE}" && \
+ echo "Copied and modified Dockerfile for Python 3.5 build: ${DOCKERFILE}" || \
+ die "ERROR: Faild to copy and modify Dockerfile: ${DOCKERFILE}"
+
+ DOCKERFILE_FLAG="--dockerfile ${DOCKERFILE}"
+fi
chmod +x ${TMP_SCRIPT}
diff --git a/tensorflow/tools/ci_build/ci_sanity.sh b/tensorflow/tools/ci_build/ci_sanity.sh
index 44aaed8ae9..9ecf16c46f 100755
--- a/tensorflow/tools/ci_build/ci_sanity.sh
+++ b/tensorflow/tools/ci_build/ci_sanity.sh
@@ -105,7 +105,7 @@ do_pylint() {
if [[ $1 == "PYTHON2" ]]; then
PYLINT_BIN="python /usr/local/lib/python2.7/dist-packages/pylint/lint.py"
elif [[ $1 == "PYTHON3" ]]; then
- PYLINT_BIN="python3 /usr/local/lib/python3.5/dist-packages/pylint/lint.py"
+ PYLINT_BIN="python3 /usr/local/lib/python3.4/dist-packages/pylint/lint.py"
else
echo "Unrecognized python version (PYTHON2 | PYTHON3): $1"
return 1
diff --git a/tensorflow/tools/ci_build/install/install_deb_packages.sh b/tensorflow/tools/ci_build/install/install_deb_packages.sh
index 23dc6d42c4..a62a6f8a3c 100755
--- a/tensorflow/tools/ci_build/install/install_deb_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_deb_packages.sh
@@ -42,14 +42,14 @@ apt-get install -y --no-install-recommends \
openjdk-8-jre-headless \
pkg-config \
python-dev \
- python-pip \
+ python-setuptools \
+ python-virtualenv \
python3-dev \
- python3-pip \
+ python3-setuptools \
rsync \
sudo \
swig \
unzip \
- virtualenv \
wget \
zip \
zlib1g-dev
diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh
index 19c46bbcd4..8011f8de24 100755
--- a/tensorflow/tools/ci_build/install/install_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh
@@ -16,56 +16,64 @@
set -e
+# We don't apt-get install so that we can install a newer version of pip. Not
+# needed after we upgrade to Ubuntu 16.04
+easy_install -U pip
+easy_install3 -U pip
+
# Install pip packages from whl files to avoid the time-consuming process of
# building from source.
-pip install wheel
+pip2 install wheel
pip3 install wheel
# Install six.
-pip install --upgrade six==1.10.0
+pip2 install --upgrade six==1.10.0
pip3 install --upgrade six==1.10.0
# Install werkzeug.
-pip install --upgrade werkzeug==0.11.10
+pip2 install --upgrade werkzeug==0.11.10
pip3 install --upgrade werkzeug==0.11.10
# Install protobuf.
-pip install --upgrade protobuf==3.2.0
+pip2 install --upgrade protobuf==3.2.0
pip3 install --upgrade protobuf==3.2.0
# Remove obsolete version of six, which can sometimes confuse virtualenv.
rm -rf /usr/lib/python3/dist-packages/six*
-pip install --upgrade numpy==1.12.0
-pip3 install --upgrade numpy==1.12.0
+# numpy needs to be installed from source to fix segfaults. See:
+# https://github.com/tensorflow/tensorflow/issues/6968
+# This workaround isn't needed for Ubuntu 16.04 or later.
+pip2 install --no-binary=:all: --upgrade numpy==1.12.0
+pip3 install --no-binary=:all: --upgrade numpy==1.12.0
-pip install scipy==0.18.1
+pip2 install scipy==0.18.1
pip3 install scipy==0.18.1
-pip install scikit-learn==0.18.1
+pip2 install scikit-learn==0.18.1
pip3 install scikit-learn==0.18.1
# pandas required by tf.learn/inflow
-pip install pandas==0.19.2
+pip2 install pandas==0.19.2
pip3 install pandas==0.19.2
# Benchmark tests require the following:
-pip install psutil
+pip2 install psutil
pip3 install psutil
-pip install py-cpuinfo
+pip2 install py-cpuinfo
pip3 install py-cpuinfo
# pylint tests require the following:
-pip install pylint
+pip2 install pylint
pip3 install pylint
# pep8 tests require the following:
-pip install pep8
+pip2 install pep8
pip3 install pep8
# tf.mock require the following for python2:
-pip install mock
+pip2 install mock
-pip install portpicker
+pip2 install portpicker
pip3 install portpicker
diff --git a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
new file mode 100755
index 0000000000..e7e2d256cd
--- /dev/null
+++ b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
@@ -0,0 +1,91 @@
+#!/usr/bin/env bash
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# Install packages required by Python3.5 build
+
+# TODO(cais): Remove this file once we upgrade to ubuntu:16.04 docker images for
+# Python 3.5 builds.
+
+# fkrull/deadsnakes is for Python3.5
+add-apt-repository -y ppa:fkrull/deadsnakes
+apt-get update
+
+set +e
+# Upgrade swig to 3.0.8
+SWIG_VERSION="3.0.8"
+swig_ver_flat=$(echo $SWIG_VERSION | sed 's/\.//g' | sed 's/^0*//g')
+local_swig_ver=$(swig -version | grep -i version | awk '{print $3}')
+local_swig_ver_flat=$(echo $local_swig_ver | sed 's/\.//g' | sed 's/^0*//g')
+if [[ -z $local_swig_ver_flat ]]; then
+ local_swig_ver_flat=0
+fi
+if (( $local_swig_ver_flat < $swig_ver_flat )); then
+ set -e
+ wget -q http://downloads.sourceforge.net/swig/swig-3.0.8.tar.gz
+ tar xzf swig-3.0.8.tar.gz
+ pushd swig-3.0.8
+ apt-get install -y --no-install-recommends libpcre3-dev
+ ./configure
+ make
+ make install
+ rm -f /usr/bin/swig
+ ln -s /usr/local/bin/swig /usr/bin/swig
+ popd
+ rm -rf swig-3.0.8 swig-3.0.8.tar.gz
+fi
+set -e
+# Install Python 3.5 and dev library
+apt-get install -y --no-install-recommends python3.5 libpython3.5-dev
+
+# Install pip3.5
+set +e
+pip35_version=$(pip3.5 --version | grep "python 3.5")
+if [[ -z $pip35_version ]]; then
+ set -e
+ wget -q https://bootstrap.pypa.io/get-pip.py
+ python3.5 get-pip.py
+ rm -f get-pip.py
+fi
+
+set -e
+# Install six.
+pip3.5 install --upgrade six==1.10.0
+
+# Install protobuf.
+pip3.5 install --upgrade protobuf==3.2.0
+
+# Remove obsolete version of six, which can sometimes confuse virtualenv.
+rm -rf /usr/lib/python3/dist-packages/six*
+
+# Install numpy, scipy and scikit-learn required by the builds
+
+# numpy needs to be installed from source to fix segfaults. See:
+# https://github.com/tensorflow/tensorflow/issues/6968
+# This workaround isn't needed for Ubuntu 16.04 or later.
+pip3.5 install --no-binary=:all: --upgrade numpy==1.12.0
+
+pip3.5 install scipy==0.18.1
+
+pip3.5 install scikit-learn==0.18.1
+
+# pandas required by tf.learn/inflow
+pip3 install pandas==0.19.2
+
+# Install recent-enough version of wheel for Python 3.5 wheel builds
+pip3.5 install wheel==0.29.0
+
+pip3.5 install portpicker
+
+pip3.5 install werkzeug
diff --git a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
index 73c08e5d0b..1488e8d78c 100644
--- a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
+++ b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
@@ -101,11 +101,8 @@ exclude_gpu_cc_tests="${extra_failing_gpu_cc_tests} + ${exclude_cpu_cc_tests}"
function get_failing_cpu_py_tests() {
echo "
//$1/tensorflow/python:basic_session_run_hooks_test + \
- //$1/tensorflow/python:bigquery_reader_ops_test + \
//$1/tensorflow/python:contrib_test + \
//$1/tensorflow/python:dequantize_op_test + \
- //$1/tensorflow/python:directory_watcher_test + \
- //$1/tensorflow/python:event_multiplexer_test + \
//$1/tensorflow/python:file_io_test + \
//$1/tensorflow/python:file_system_test + \
//$1/tensorflow/python:framework_meta_graph_test + \
diff --git a/tensorflow/tools/compatibility/README.md b/tensorflow/tools/compatibility/README.md
index 9dba070a4f..aabc7b253d 100644
--- a/tensorflow/tools/compatibility/README.md
+++ b/tensorflow/tools/compatibility/README.md
@@ -11,7 +11,10 @@ It will print a list of errors it finds that it can't fix. You can also run
it on a directory tree:
```
+# just upgrade the .py files
tf_upgrade.py --intree coolcode --outtree coolcode-upgraded
+# after upgrade the .py files, then copy all the other files to the outtree
+tf_upgrade.py --intree coolcode --outtree coolcode-upgraded --copyotherfiles True
```
In either case, it will also dump out a report e.g. which will detail changes
diff --git a/tensorflow/tools/compatibility/tf_upgrade.py b/tensorflow/tools/compatibility/tf_upgrade.py
index 26bf117256..80439f835a 100644
--- a/tensorflow/tools/compatibility/tf_upgrade.py
+++ b/tensorflow/tools/compatibility/tf_upgrade.py
@@ -140,6 +140,7 @@ class APIChangeSpec(object):
"tf.batch_svd": "tf.svd",
"tf.batch_fft": "tf.fft",
"tf.batch_ifft": "tf.ifft",
+ "tf.batch_fft2d": "tf.fft2d",
"tf.batch_ifft2d": "tf.ifft2d",
"tf.batch_fft3d": "tf.fft3d",
"tf.batch_ifft3d": "tf.ifft3d",
@@ -566,7 +567,7 @@ class TensorFlowCodeUpgrader(object):
return 1, text, process_errors
# pylint: enable=broad-except
- def process_tree(self, root_directory, output_root_directory):
+ def process_tree(self, root_directory, output_root_directory, copy_other_files):
"""Processes upgrades on an entire tree of python files in place.
Note that only Python files. If you have custom code in other languages,
@@ -596,13 +597,21 @@ class TensorFlowCodeUpgrader(object):
# Collect list of files to process (we do this to correctly handle if the
# user puts the output directory in some sub directory of the input dir)
files_to_process = []
+ files_to_copy = []
for dir_name, _, file_list in os.walk(root_directory):
py_files = [f for f in file_list if f.endswith(".py")]
+ copy_files = [f for f in file_list if not f.endswith(".py")]
for filename in py_files:
fullpath = os.path.join(dir_name, filename)
fullpath_output = os.path.join(
output_root_directory, os.path.relpath(fullpath, root_directory))
files_to_process.append((fullpath, fullpath_output))
+ if copy_other_files:
+ for filename in copy_files:
+ fullpath = os.path.join(dir_name, filename)
+ fullpath_output = os.path.join(
+ output_root_directory, os.path.relpath(fullpath, root_directory))
+ files_to_copy.append((fullpath, fullpath_output))
file_count = 0
tree_errors = []
@@ -619,6 +628,11 @@ class TensorFlowCodeUpgrader(object):
_, l_report, l_errors = self.process_file(input_path, output_path)
tree_errors += l_errors
report += l_report
+ for input_path, output_path in files_to_copy:
+ output_directory = os.path.dirname(output_path)
+ if not os.path.isdir(output_directory):
+ os.makedirs(output_directory)
+ shutil.copy(input_path, output_path)
return file_count, report, tree_errors
@@ -651,6 +665,13 @@ Simple usage:
help="If converting a whole tree of files, the output "
"directory (relative or absolute).")
parser.add_argument(
+ "--copyotherfiles",
+ dest="copy_other_files",
+ help=("If converting a whole tree of files, whether to "
+ "copy the other files."),
+ type=bool,
+ default=False)
+ parser.add_argument(
"--reportfile",
dest="report_filename",
help=("The name of the file where the report log is "
@@ -669,7 +690,7 @@ Simple usage:
files_processed = 1
elif args.input_tree:
files_processed, report_text, errors = upgrade.process_tree(
- args.input_tree, args.output_tree)
+ args.input_tree, args.output_tree, args.copy_other_files)
else:
parser.print_help()
if report_text:
diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel
index a67f1af2bd..dd18b61017 100644
--- a/tensorflow/tools/docker/Dockerfile.devel
+++ b/tensorflow/tools/docker/Dockerfile.devel
@@ -61,12 +61,11 @@ RUN add-apt-repository -y ppa:openjdk-r/ppa && \
# Running bazel inside a `docker build` command causes trouble, cf:
# https://github.com/bazelbuild/bazel/issues/134
# The easiest solution is to set up a bazelrc file forcing --batch.
-RUN echo "startup --batch" >>/root/.bazelrc
+RUN echo "startup --batch" >>/etc/bazel.bazelrc
# Similarly, we need to workaround sandboxing issues:
# https://github.com/bazelbuild/bazel/issues/418
RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
- >>/root/.bazelrc
-ENV BAZELRC /root/.bazelrc
+ >>/etc/bazel.bazelrc
# Install the most recent bazel release.
ENV BAZEL_VERSION 0.4.5
WORKDIR /
diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu
index d1a733458d..8ead2f15ae 100644
--- a/tensorflow/tools/docker/Dockerfile.devel-gpu
+++ b/tensorflow/tools/docker/Dockerfile.devel-gpu
@@ -61,12 +61,11 @@ RUN add-apt-repository -y ppa:openjdk-r/ppa && \
# Running bazel inside a `docker build` command causes trouble, cf:
# https://github.com/bazelbuild/bazel/issues/134
# The easiest solution is to set up a bazelrc file forcing --batch.
-RUN echo "startup --batch" >>/root/.bazelrc
+RUN echo "startup --batch" >>/etc/bazel.bazelrc
# Similarly, we need to workaround sandboxing issues:
# https://github.com/bazelbuild/bazel/issues/418
RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
- >>/root/.bazelrc
-ENV BAZELRC /root/.bazelrc
+ >>/etc/bazel.bazelrc
# Install the most recent bazel release.
ENV BAZEL_VERSION 0.4.5
WORKDIR /
diff --git a/tensorflow/tools/git/gen_git_source.py b/tensorflow/tools/git/gen_git_source.py
index c97ce7561f..299d50c359 100755
--- a/tensorflow/tools/git/gen_git_source.py
+++ b/tensorflow/tools/git/gen_git_source.py
@@ -76,7 +76,11 @@ def configure(src_base_path, debug=False):
# Remove and recreate the path
if os.path.exists(gen_path):
if os.path.isdir(gen_path):
- shutil.rmtree(gen_path)
+ try:
+ shutil.rmtree(gen_path)
+ except PermissionError:
+ raise RuntimeError("Cannot delete directory %s due to permission "
+ "error, inspect and remove manually" % gen_path)
else:
raise RuntimeError("Cannot delete non-directory %s, inspect ",
"and remove manually" % gen_path)
diff --git a/tensorflow/tools/graph_transforms/summarize_graph_main.cc b/tensorflow/tools/graph_transforms/summarize_graph_main.cc
index 8c23ae7a74..f45dfbba0c 100644
--- a/tensorflow/tools/graph_transforms/summarize_graph_main.cc
+++ b/tensorflow/tools/graph_transforms/summarize_graph_main.cc
@@ -109,7 +109,7 @@ Status SummarizeGraph(const GraphDef& graph, const string& graph_path) {
if (node.op() == "Placeholder") {
placeholders.push_back(&node);
}
- if (node.op() == "Variable") {
+ if (node.op() == "Variable" || node.op() == "VariableV2") {
variables.push_back(&node);
}
}
@@ -168,7 +168,8 @@ Status SummarizeGraph(const GraphDef& graph, const string& graph_path) {
if (node.device() != "") {
++device_counts[node.device()];
}
- if ((node.op() == "Const") || (node.op() == "Variable")) {
+ if ((node.op() == "Const") || (node.op() == "Variable") ||
+ (node.op() == "VariableV2")) {
Tensor tensor;
if (node.attr().count("value") &&
tensor.FromProto(node.attr().at("value").tensor())) {
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index 74a7818967..d9c67862e7 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -66,23 +66,21 @@ py_binary(
"README",
"setup.py",
":included_headers",
- "//tensorflow/contrib/ndlstm",
"//tensorflow/contrib/nn:nn_py",
"//tensorflow/contrib/session_bundle:session_bundle_pip",
- "//tensorflow/contrib/slim",
"//tensorflow/contrib/slim/python/slim/data:data_pip",
- "//tensorflow/contrib/slim/python/slim/nets:nets_pip",
- "//tensorflow/contrib/specs",
- "//tensorflow/contrib/tensor_forest:init_py",
- "//tensorflow/contrib/tensor_forest/hybrid:hybrid_pip",
"//tensorflow/python:util_example_parser_configuration",
"//tensorflow/python/debug:debug_pip",
"//tensorflow/python/saved_model",
"//tensorflow/python/tools:tools_pip",
- # The following target has an issue when archiving them into the python
- # zip, exclude them for now.
- # "//tensorflow/tensorboard",
- # This package does not build. Exclude it in windows for now.
+ "//tensorflow/tensorboard",
+ # These targets don't build on Windows yet. Exclude them for now.
+ # "//tensorflow/contrib/ndlstm",
+ # "//tensorflow/contrib/slim",
+ # "//tensorflow/contrib/slim/python/slim/nets:nets_pip",
+ # "//tensorflow/contrib/specs",
+ # "//tensorflow/contrib/tensor_forest:init_py",
+ # "//tensorflow/contrib/tensor_forest/hybrid:hybrid_pip",
# "//tensorflow/examples/tutorials/mnist:package",
],
srcs_version = "PY2AND3",
diff --git a/tensorflow/tools/pip_package/MANIFEST.in b/tensorflow/tools/pip_package/MANIFEST.in
index 22b00c4284..fe21f221b1 100644
--- a/tensorflow/tools/pip_package/MANIFEST.in
+++ b/tensorflow/tools/pip_package/MANIFEST.in
@@ -1,4 +1,6 @@
include README
recursive-include * *.py
recursive-include * *.so
+recursive-include * *.dll
+recursive-include * *.lib
recursive-include * *.csv
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 25aecb5707..4c4973080f 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -64,6 +64,10 @@ temp_workaround_http_archive = repository_rule(
# If TensorFlow is linked as a submodule.
# path_prefix and tf_repo_name are no longer used.
def tf_workspace(path_prefix = "", tf_repo_name = ""):
+ # We must check the bazel version before trying to parse any other BUILD
+ # files, in case the parsing of those build files depends on the bazel
+ # version we require here.
+ check_version("0.4.5")
cuda_configure(name = "local_config_cuda")
sycl_configure(name = "local_config_sycl")
if path_prefix:
diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl
index 01e070f2be..a2b3e7d79e 100644
--- a/third_party/gpus/cuda_configure.bzl
+++ b/third_party/gpus/cuda_configure.bzl
@@ -826,8 +826,17 @@ def _cuda_autoconf_impl(repository_ctx):
cuda_configure = repository_rule(
implementation = _cuda_autoconf_impl,
- local = True,
+ environ = [
+ _GCC_HOST_COMPILER_PATH,
+ "TF_NEED_CUDA",
+ _CUDA_TOOLKIT_PATH,
+ _CUDNN_INSTALL_PATH,
+ _TF_CUDA_VERSION,
+ _TF_CUDNN_VERSION,
+ _TF_CUDA_COMPUTE_CAPABILITIES,
+ ],
)
+
"""Detects and configures the local CUDA toolchain.
Add the following to your WORKSPACE FILE:
diff --git a/third_party/sycl/crosstool/computecpp.tpl b/third_party/sycl/crosstool/computecpp.tpl
index 66dd9aea7b..595e7136a6 100755
--- a/third_party/sycl/crosstool/computecpp.tpl
+++ b/third_party/sycl/crosstool/computecpp.tpl
@@ -65,7 +65,7 @@ def main():
# strip asan for the device
computecpp_device_compiler_flags = ['-sycl-compress-name', '-DTENSORFLOW_USE_SYCL', '-Wno-unused-variable', '-I', COMPUTECPP_INCLUDE, '-isystem',
COMPUTECPP_INCLUDE, '-std=c++11', '-sycl', '-emit-llvm', '-no-serial-memop', '-Xclang', '-cl-denorms-are-zero', '-Xclang', '-cl-fp32-correctly-rounded-divide-sqrt']
- computecpp_device_compiler_flags += [flag for flag in compiler_flags if not flag.startswith(('-fsanitize'))]
+ computecpp_device_compiler_flags += [flag for flag in compiler_flags if not flag.startswith(('-fsanitize', '-march=native', '-mavx'))]
x = subprocess.call([COMPUTECPP_DRIVER] + computecpp_device_compiler_flags )
if(x == 0):
diff --git a/util/python/python_config.sh b/util/python/python_config.sh
index 789c4b35b3..4b18bf3578 100755
--- a/util/python/python_config.sh
+++ b/util/python/python_config.sh
@@ -181,7 +181,7 @@ function setup_python {
# Write tools/bazel.rc
echo "# Autogenerated by configure: DO NOT EDIT" > tools/bazel.rc
sed -e "s/\$PYTHON_MAJOR_VERSION/$python_major_version/g" \
- -e "s[\$PYTHON_BINARY[\"$PYTHON_BIN_PATH\"[g" \
+ -e "s|\$PYTHON_BINARY|\"$PYTHON_BIN_PATH\"|g" \
tools/bazel.rc.template >> tools/bazel.rc
# Write tools/python_bin_path.sh
echo "export PYTHON_BIN_PATH=\"$PYTHON_BIN_PATH\"" > tools/python_bin_path.sh