aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow
diff options
context:
space:
mode:
authorGravatar Akshay Modi <nareshmodi@google.com>2018-06-18 09:57:19 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-06-18 09:59:59 -0700
commite80732c9895d1283af9b98d6277ad1a1015e2e9a (patch)
tree14895657394f9cdfed8435460e37fe89a45ba599 /tensorflow
parent8ecf506fb8464dd273ce59f512f5e20d37dd5cfd (diff)
Merge changes from github.
PiperOrigin-RevId: 201011811
Diffstat (limited to 'tensorflow')
-rw-r--r--tensorflow/BUILD4
-rwxr-xr-xtensorflow/c/generate-pc.sh11
-rw-r--r--tensorflow/cc/gradients/math_grad.cc1
-rw-r--r--tensorflow/cc/gradients/nn_grad.cc47
-rw-r--r--tensorflow/cc/gradients/nn_grad_test.cc84
-rw-r--r--tensorflow/compiler/aot/codegen_test_h.golden4
-rw-r--r--tensorflow/compiler/aot/embedded_protocol_buffers.h2
-rw-r--r--tensorflow/compiler/aot/runtime.h4
-rw-r--r--tensorflow/compiler/aot/runtime_test.cc16
-rw-r--r--tensorflow/compiler/xla/service/cpu/BUILD18
-rw-r--r--tensorflow/compiler/xla/service/cpu/cpu_runtime.cc2
-rw-r--r--tensorflow/compiler/xla/service/cpu/cpu_runtime.h1
-rw-r--r--tensorflow/compiler/xla/service/cpu/ir_emitter.cc8
-rw-r--r--tensorflow/compiler/xla/service/cpu/runtime_fft_impl.h20
-rw-r--r--tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.cc32
-rw-r--r--tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.h31
-rw-r--r--tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc2
-rw-r--r--tensorflow/compiler/xla/service/pattern_matcher.h2
-rw-r--r--tensorflow/compiler/xla/service/tuple_simplifier.cc7
-rw-r--r--tensorflow/compiler/xla/service/tuple_simplifier.h9
-rw-r--r--tensorflow/compiler/xla/service/tuple_simplifier_test.cc77
-rw-r--r--tensorflow/contrib/autograph/__init__.py3
-rw-r--r--tensorflow/contrib/cmake/tf_c.cmake22
-rw-r--r--tensorflow/contrib/cmake/tf_cc_ops.cmake2
-rwxr-xr-xtensorflow/contrib/cmake/tf_python.cmake3
-rw-r--r--tensorflow/contrib/cmake/tools/create_def_file.py9
-rw-r--r--tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_bijector_test.py28
-rw-r--r--tensorflow/contrib/eager/python/datasets.py3
-rw-r--r--tensorflow/contrib/eager/python/examples/notebooks/4_high_level.ipynb4
-rw-r--r--tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py22
-rw-r--r--tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py41
-rw-r--r--tensorflow/contrib/ffmpeg/__init__.py1
-rw-r--r--tensorflow/contrib/ffmpeg/ffmpeg_ops.py1
-rw-r--r--tensorflow/contrib/framework/__init__.py3
-rw-r--r--tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py11
-rw-r--r--tensorflow/contrib/hvx/hexagon_controller/src_impl/hexagon_controller.c2
-rwxr-xr-xtensorflow/contrib/lite/download_dependencies.sh4
-rw-r--r--tensorflow/contrib/lite/examples/minimal/minimal.cc2
-rw-r--r--tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md14
-rw-r--r--tensorflow/contrib/lite/java/ovic/README.md4
-rw-r--r--tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h4
-rw-r--r--tensorflow/contrib/lite/python/interpreter.py2
-rw-r--r--tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc9
-rw-r--r--tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h3
-rw-r--r--tensorflow/contrib/lite/python/lite.py11
-rw-r--r--tensorflow/contrib/lite/toco/import_tensorflow.cc2
-rw-r--r--tensorflow/contrib/lite/toco/toco_port.cc6
-rw-r--r--tensorflow/contrib/lite/toco/toco_port.h18
-rwxr-xr-xtensorflow/contrib/makefile/compile_nsync.sh2
-rwxr-xr-xtensorflow/contrib/makefile/download_dependencies.sh4
-rw-r--r--tensorflow/contrib/metrics/python/ops/metric_ops.py2
-rw-r--r--tensorflow/contrib/mpi_collectives/kernels/ring.h2
-rw-r--r--tensorflow/contrib/opt/python/training/adamax_test.py6
-rw-r--r--tensorflow/contrib/opt/python/training/model_average_optimizer.py2
-rw-r--r--tensorflow/contrib/periodic_resample/BUILD20
-rw-r--r--tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.cc5
-rw-r--r--tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h415
-rw-r--r--tensorflow/contrib/periodic_resample/ops/array_ops.cc53
-rw-r--r--tensorflow/contrib/periodic_resample/ops/array_ops_test.cc41
-rw-r--r--tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py27
-rw-r--r--tensorflow/contrib/periodic_resample/python/ops/periodic_resample_op.py8
-rw-r--r--tensorflow/contrib/predictor/contrib_estimator_predictor.py5
-rw-r--r--tensorflow/contrib/predictor/core_estimator_predictor.py5
-rw-r--r--tensorflow/contrib/predictor/predictor_factories.py24
-rw-r--r--tensorflow/contrib/predictor/predictor_factories_test.py19
-rw-r--r--tensorflow/contrib/predictor/saved_model_predictor.py6
-rw-r--r--tensorflow/contrib/quantize/README.md2
-rw-r--r--tensorflow/contrib/slim/python/slim/evaluation_test.py25
-rw-r--r--tensorflow/contrib/summary/summary.py5
-rw-r--r--tensorflow/contrib/tensor_forest/client/eval_metrics.py45
-rw-r--r--tensorflow/contrib/tensor_forest/python/tensor_forest.py34
-rw-r--r--tensorflow/contrib/tensor_forest/python/tensor_forest_test.py45
-rw-r--r--tensorflow/contrib/tensorrt/convert/convert_graph.cc66
-rw-r--r--tensorflow/contrib/tensorrt/convert/convert_nodes.cc97
-rw-r--r--tensorflow/contrib/tpu/python/tpu/datasets.py16
-rw-r--r--tensorflow/contrib/tpu/python/tpu/datasets_test.py26
-rw-r--r--tensorflow/core/BUILD9
-rw-r--r--tensorflow/core/api_def/base_api/api_def_Selu.pbtxt4
-rw-r--r--tensorflow/core/api_def/base_api/api_def_StringSplitV2.pbtxt48
-rw-r--r--tensorflow/core/api_def/python_api/api_def_StringSplitV2.pbtxt4
-rw-r--r--tensorflow/core/common_runtime/bfc_allocator.cc8
-rw-r--r--tensorflow/core/common_runtime/bfc_allocator.h3
-rw-r--r--tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc16
-rw-r--r--tensorflow/core/common_runtime/mkl_threadpool_device_test.cc53
-rw-r--r--tensorflow/core/common_runtime/process_util.cc11
-rw-r--r--tensorflow/core/common_runtime/threadpool_device.cc25
-rw-r--r--tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.cc4
-rw-r--r--tensorflow/core/distributed_runtime/rpc/grpc_testlib.cc10
-rw-r--r--tensorflow/core/framework/allocator.h5
-rw-r--r--tensorflow/core/framework/op_gen_lib.cc1
-rw-r--r--tensorflow/core/framework/remote_fused_graph_execute_info.proto2
-rw-r--r--tensorflow/core/framework/tensor_test.cc24
-rw-r--r--tensorflow/core/graph/mkl_layout_pass.cc148
-rw-r--r--tensorflow/core/graph/mkl_layout_pass_test.cc31
-rw-r--r--tensorflow/core/grappler/costs/graph_properties.cc1
-rw-r--r--tensorflow/core/grappler/optimizers/BUILD2
-rw-r--r--tensorflow/core/grappler/optimizers/remapper.cc4
-rw-r--r--tensorflow/core/kernels/as_string_op.cc2
-rw-r--r--tensorflow/core/kernels/cwise_op_clip.cc43
-rw-r--r--tensorflow/core/kernels/dense_update_functor_gpu.cu.cc1
-rw-r--r--tensorflow/core/kernels/gather_functor.cc1
-rw-r--r--tensorflow/core/kernels/gather_functor_gpu.cu.cc1
-rw-r--r--tensorflow/core/kernels/gather_nd_op.cc4
-rw-r--r--tensorflow/core/kernels/gather_nd_op_gpu.cu.cc2
-rw-r--r--tensorflow/core/kernels/gather_op.cc1
-rw-r--r--tensorflow/core/kernels/mkl_concat_op.cc213
-rw-r--r--tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc2
-rw-r--r--tensorflow/core/kernels/mkl_pooling_ops_common.h6
-rw-r--r--tensorflow/core/kernels/scatter_nd_op.cc4
-rw-r--r--tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc1
-rw-r--r--tensorflow/core/kernels/scoped_allocator_ops_test.cc9
-rw-r--r--tensorflow/core/kernels/segment_reduction_ops.h10
-rw-r--r--tensorflow/core/kernels/sparse_matmul_op.cc2
-rw-r--r--tensorflow/core/kernels/string_split_op.cc130
-rw-r--r--tensorflow/core/ops/candidate_sampling_ops.cc5
-rw-r--r--tensorflow/core/ops/dataset_ops.cc24
-rw-r--r--tensorflow/core/ops/image_ops.cc4
-rw-r--r--tensorflow/core/ops/math_ops.cc2
-rw-r--r--tensorflow/core/ops/nn_ops.cc1
-rw-r--r--tensorflow/core/ops/string_ops.cc20
-rw-r--r--tensorflow/core/platform/cpu_info.cc23
-rw-r--r--tensorflow/core/platform/cpu_info.h7
-rw-r--r--tensorflow/core/platform/default/build_config.bzl2
-rw-r--r--tensorflow/core/platform/hadoop/hadoop_file_system.cc21
-rw-r--r--tensorflow/core/platform/posix/port.cc5
-rw-r--r--tensorflow/core/public/version.h4
-rw-r--r--tensorflow/core/util/mkl_util.h50
-rw-r--r--tensorflow/docs_src/community/groups.md29
-rw-r--r--tensorflow/docs_src/get_started/eager.md2
-rw-r--r--tensorflow/docs_src/get_started/index.md4
-rw-r--r--tensorflow/docs_src/install/install_c.md2
-rw-r--r--tensorflow/docs_src/install/install_go.md2
-rw-r--r--tensorflow/docs_src/install/install_java.md24
-rw-r--r--tensorflow/docs_src/install/install_linux.md24
-rw-r--r--tensorflow/docs_src/install/install_mac.md10
-rw-r--r--tensorflow/docs_src/install/install_sources.md17
-rw-r--r--tensorflow/docs_src/mobile/linking_libs.md2
-rw-r--r--tensorflow/docs_src/mobile/prepare_models.md4
-rw-r--r--tensorflow/docs_src/performance/quantization.md2
-rw-r--r--tensorflow/docs_src/programmers_guide/estimators.md19
-rw-r--r--tensorflow/docs_src/programmers_guide/feature_columns.md4
-rw-r--r--tensorflow/examples/learn/iris.py7
-rw-r--r--tensorflow/go/op/wrappers.go12
-rw-r--r--tensorflow/java/src/gen/cc/op_generator.cc11
-rw-r--r--tensorflow/java/src/gen/cc/op_specs.cc1
-rw-r--r--tensorflow/python/eager/backprop.py4
-rw-r--r--tensorflow/python/estimator/BUILD5
-rw-r--r--tensorflow/python/estimator/exporter.py4
-rw-r--r--tensorflow/python/estimator/inputs/numpy_io.py8
-rw-r--r--tensorflow/python/estimator/inputs/numpy_io_test.py5
-rw-r--r--tensorflow/python/estimator/inputs/pandas_io.py7
-rw-r--r--tensorflow/python/estimator/inputs/pandas_io_test.py5
-rw-r--r--tensorflow/python/estimator/inputs/queues/feeding_functions.py2
-rw-r--r--tensorflow/python/estimator/keras.py4
-rw-r--r--tensorflow/python/estimator/keras_test.py14
-rw-r--r--tensorflow/python/grappler/layout_optimizer_test.py4
-rw-r--r--tensorflow/python/keras/activations.py2
-rw-r--r--tensorflow/python/keras/callbacks.py21
-rw-r--r--tensorflow/python/keras/callbacks_test.py2
-rw-r--r--tensorflow/python/keras/engine/network.py2
-rw-r--r--tensorflow/python/keras/engine/saving_test.py4
-rw-r--r--tensorflow/python/keras/engine/training.py7
-rw-r--r--tensorflow/python/keras/engine/training_eager.py2
-rw-r--r--tensorflow/python/keras/initializers_test.py26
-rw-r--r--tensorflow/python/keras/layers/core.py26
-rw-r--r--tensorflow/python/keras/models_test.py14
-rw-r--r--tensorflow/python/kernel_tests/as_string_op_test.py10
-rw-r--r--tensorflow/python/kernel_tests/betainc_op_test.py4
-rw-r--r--tensorflow/python/kernel_tests/clip_ops_test.py13
-rw-r--r--tensorflow/python/kernel_tests/conv_ops_test.py32
-rw-r--r--tensorflow/python/kernel_tests/gather_nd_op_test.py32
-rw-r--r--tensorflow/python/kernel_tests/gather_op_test.py20
-rw-r--r--tensorflow/python/kernel_tests/init_ops_test.py27
-rw-r--r--tensorflow/python/kernel_tests/pooling_ops_test.py4
-rw-r--r--tensorflow/python/kernel_tests/py_func_test.py31
-rw-r--r--tensorflow/python/kernel_tests/scatter_nd_ops_test.py6
-rw-r--r--tensorflow/python/kernel_tests/scatter_ops_test.py14
-rw-r--r--tensorflow/python/kernel_tests/segment_reduction_ops_test.py4
-rw-r--r--tensorflow/python/kernel_tests/string_split_op_test.py96
-rw-r--r--tensorflow/python/ops/array_ops.py4
-rw-r--r--tensorflow/python/ops/gradient_checker.py8
-rw-r--r--tensorflow/python/ops/image_ops_impl.py74
-rw-r--r--tensorflow/python/ops/image_ops_test.py261
-rw-r--r--tensorflow/python/ops/init_ops.py3
-rw-r--r--tensorflow/python/ops/logging_ops.py5
-rw-r--r--tensorflow/python/ops/math_ops.py28
-rw-r--r--tensorflow/python/ops/nn_impl.py5
-rw-r--r--tensorflow/python/ops/nn_ops.py4
-rw-r--r--tensorflow/python/ops/nn_test.py10
-rw-r--r--tensorflow/python/ops/script_ops.py35
-rw-r--r--tensorflow/python/ops/sparse_ops.py4
-rw-r--r--tensorflow/python/ops/string_ops.py53
-rw-r--r--tensorflow/python/ops/variable_scope.py21
-rw-r--r--[-rwxr-xr-x]tensorflow/python/tools/import_pb_to_tensorboard.py0
-rw-r--r--tensorflow/tensorflow.bzl2
-rw-r--r--tensorflow/tools/api/generator/create_python_api.py8
-rw-r--r--tensorflow/tools/api/golden/tensorflow.image.pbtxt2
-rw-r--r--tensorflow/tools/api/golden/tensorflow.pbtxt4
-rw-r--r--tensorflow/tools/api/golden/tensorflow.strings.pbtxt4
-rwxr-xr-xtensorflow/tools/ci_build/builds/pip.sh4
-rwxr-xr-xtensorflow/tools/ci_build/builds/with_the_same_user2
-rwxr-xr-xtensorflow/tools/ci_build/ci_build.sh7
-rwxr-xr-xtensorflow/tools/ci_build/copy_binary.py3
-rwxr-xr-xtensorflow/tools/ci_build/install/install_pip_packages.sh4
-rwxr-xr-xtensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh4
-rwxr-xr-xtensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh5
-rwxr-xr-xtensorflow/tools/ci_build/linux/mkl/basic-mkl-test.sh29
-rwxr-xr-xtensorflow/tools/ci_build/pi/build_raspberry_pi.sh8
-rw-r--r--tensorflow/tools/def_file_filter/def_file_filter_configure.bzl6
-rwxr-xr-xtensorflow/tools/dist_test/local_test.sh12
-rwxr-xr-xtensorflow/tools/dist_test/remote_test.sh11
-rw-r--r--tensorflow/tools/docker/Dockerfile.devel2
-rw-r--r--tensorflow/tools/docker/Dockerfile.devel-cpu-mkl2
-rw-r--r--tensorflow/tools/docker/Dockerfile.devel-gpu6
-rw-r--r--tensorflow/tools/docker/Dockerfile.gpu2
-rw-r--r--tensorflow/tools/pip_package/BUILD1
-rwxr-xr-xtensorflow/tools/pip_package/build_pip_package.sh160
-rw-r--r--tensorflow/tools/pip_package/setup.py3
-rw-r--r--tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc3
-rw-r--r--tensorflow/tools/quantization/quantize_graph_test.py12
-rw-r--r--tensorflow/tools/test/upload_test_benchmarks.py1
-rw-r--r--tensorflow/workspace.bzl40
222 files changed, 3241 insertions, 894 deletions
diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index a73c4ca3aa..6d134dbb80 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -475,7 +475,7 @@ tf_cc_shared_object(
# excludes all but a subset of function names.
# On MacOS, the linker does not support version_script, but has an
# an "-exported_symbols_list" command. -z defs disallows undefined
-# symbols in object files and -s strips the output.
+# symbols in object files.
tf_cc_shared_object(
name = "libtensorflow.so",
@@ -489,7 +489,6 @@ tf_cc_shared_object(
"//tensorflow:windows_msvc": [],
"//conditions:default": [
"-z defs",
- "-s",
"-Wl,--version-script", # This line must be directly followed by the version_script.lds file
"$(location //tensorflow/c:version_script.lds)",
],
@@ -515,7 +514,6 @@ tf_cc_shared_object(
"//tensorflow:windows_msvc": [],
"//conditions:default": [
"-z defs",
- "-s",
"-Wl,--version-script", # This line must be directly followed by the version_script.lds file
"$(location //tensorflow:tf_version_script.lds)",
],
diff --git a/tensorflow/c/generate-pc.sh b/tensorflow/c/generate-pc.sh
index 02a6a58b61..7184ad68fb 100755
--- a/tensorflow/c/generate-pc.sh
+++ b/tensorflow/c/generate-pc.sh
@@ -15,10 +15,12 @@
# ==============================================================================
TF_PREFIX='/usr/local'
+LIBDIR='lib'
usage() {
echo "Usage: $0 OPTIONS"
echo -e "-p, --prefix\tset installation prefix (default: /usr/local)"
+ echo -e "-l, --libdir\tset lib directory (default: lib)"
echo -e "-v, --version\tset TensorFlow version"
echo -e "-h, --help\tdisplay this message"
}
@@ -26,7 +28,7 @@ usage() {
[ $# == 0 ] && usage && exit 0
# read the options
-ARGS=$(getopt -o p:v:h --long prefix:,version:,help -n $0 -- "$@")
+ARGS=$(getopt -o p:l:v:h --long prefix:,libdir:,version:,help -n $0 -- "$@")
eval set -- "$ARGS"
# extract options and their arguments into variables.
@@ -38,6 +40,11 @@ while true ; do
"") shift 2 ;;
*) TF_PREFIX=$2 ; shift 2 ;;
esac ;;
+ -l|--libdir)
+ case "$2" in
+ "") shift 2 ;;
+ *) LIBDIR=$2 ; shift 2 ;;
+ esac ;;
-v|--version)
case "$2" in
"") shift 2 ;;
@@ -55,7 +62,7 @@ echo "Generating pkgconfig file for TensorFlow $TF_VERSION in $TF_PREFIX"
cat << EOF > tensorflow.pc
prefix=${TF_PREFIX}
exec_prefix=\${prefix}
-libdir=\${exec_prefix}/lib
+libdir=\${exec_prefix}/${LIBDIR}
includedir=\${prefix}/include
Name: TensorFlow
diff --git a/tensorflow/cc/gradients/math_grad.cc b/tensorflow/cc/gradients/math_grad.cc
index 52c177212a..35a01e0341 100644
--- a/tensorflow/cc/gradients/math_grad.cc
+++ b/tensorflow/cc/gradients/math_grad.cc
@@ -38,6 +38,7 @@ REGISTER_NO_GRADIENT_OP("NotEqual");
REGISTER_NO_GRADIENT_OP("LogicalAnd");
REGISTER_NO_GRADIENT_OP("LogicalOr");
REGISTER_NO_GRADIENT_OP("LogicalNot");
+REGISTER_NO_GRADIENT_OP("Floor");
// Conjugate helper function returns the conjugate of an Output if it
// is complex valued.
diff --git a/tensorflow/cc/gradients/nn_grad.cc b/tensorflow/cc/gradients/nn_grad.cc
index 0cb3132e94..c73482d5f4 100644
--- a/tensorflow/cc/gradients/nn_grad.cc
+++ b/tensorflow/cc/gradients/nn_grad.cc
@@ -255,6 +255,53 @@ Status LRNGradHelper(const Scope& scope, const Operation& op,
}
REGISTER_GRADIENT_OP("LRN", LRNGradHelper);
+Status SoftplusGradHelper(const Scope& scope, const Operation& op,
+ const std::vector<Output>& grad_inputs,
+ std::vector<Output>* grad_outputs) {
+ auto dx = internal::SoftplusGrad(scope, grad_inputs[0], op.input(0));
+ grad_outputs->push_back(dx);
+ return scope.status();
+}
+REGISTER_GRADIENT_OP("Softplus", SoftplusGradHelper);
+
+Status SoftsignGradHelper(const Scope& scope, const Operation& op,
+ const std::vector<Output>& grad_inputs,
+ std::vector<Output>* grad_outputs) {
+ auto dx = internal::SoftsignGrad(scope, grad_inputs[0], op.input(0));
+ grad_outputs->push_back(dx);
+ return scope.status();
+}
+REGISTER_GRADIENT_OP("Softsign", SoftsignGradHelper);
+
+Status FractionalAvgPoolGradHelper(const Scope& scope, const Operation& op,
+ const std::vector<Output>& grad_inputs,
+ std::vector<Output>* grad_outputs) {
+ bool overlapping;
+ TF_RETURN_IF_ERROR(
+ GetNodeAttr(op.output(0).node()->attrs(), "overlapping", &overlapping));
+ auto dx = internal::FractionalAvgPoolGrad(
+ scope, Shape(scope, op.input(0), Shape::OutType(DT_INT64)),
+ grad_inputs[0], op.output(1), op.output(2),
+ internal::FractionalAvgPoolGrad::Overlapping(overlapping));
+ grad_outputs->push_back(dx);
+ return scope.status();
+}
+REGISTER_GRADIENT_OP("FractionalAvgPool", FractionalAvgPoolGradHelper);
+
+Status FractionalMaxPoolGradHelper(const Scope& scope, const Operation& op,
+ const std::vector<Output>& grad_inputs,
+ std::vector<Output>* grad_outputs) {
+ bool overlapping;
+ TF_RETURN_IF_ERROR(
+ GetNodeAttr(op.output(0).node()->attrs(), "overlapping", &overlapping));
+ auto dx = internal::FractionalMaxPoolGrad(
+ scope, op.input(0), op.output(0), grad_inputs[0], op.output(1),
+ op.output(2), internal::FractionalMaxPoolGrad::Overlapping(overlapping));
+ grad_outputs->push_back(dx);
+ return scope.status();
+}
+REGISTER_GRADIENT_OP("FractionalMaxPool", FractionalMaxPoolGradHelper);
+
} // anonymous namespace
} // namespace ops
} // namespace tensorflow
diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc
index c4eba7ecb0..b4d457a9d1 100644
--- a/tensorflow/cc/gradients/nn_grad_test.cc
+++ b/tensorflow/cc/gradients/nn_grad_test.cc
@@ -28,6 +28,8 @@ namespace {
using ops::BiasAdd;
using ops::Conv2D;
using ops::Elu;
+using ops::FractionalAvgPool;
+using ops::FractionalMaxPool;
using ops::L2Loss;
using ops::LogSoftmax;
using ops::LRN;
@@ -41,6 +43,8 @@ using ops::Relu;
using ops::Relu6;
using ops::Selu;
using ops::Softmax;
+using ops::Softplus;
+using ops::Softsign;
class NNGradTest : public ::testing::Test {
protected:
@@ -71,22 +75,30 @@ class NNGradTest : public ::testing::Test {
EXPECT_LT(max_error, 1e-3);
}
- // Sets tensor with random values, ensuring that the max value is largest by
- // a reasonable amount.
- // This is an issue for MaxPool, MaxPoolV2 and MaxPool3D, in which
- // perturbations by the numeric gradient computation in the gradient checker
- // can change the max value if values are too close together.
+ // Sets tensor with random values, ensuring that every pair of elements are at
+ // least a reasonable amount apart.
+ // This is an issue for max pooling operations, in which perturbations by the
+ // numeric gradient computation in the gradient checker can change the max
+ // value if a pool has values that are too close together.
template <typename T>
- void SetRandomValuesWithBumpedMax(Tensor* tensor) {
+ void SetRandomValuesForMaxPooling(Tensor* tensor) {
auto tensor_flat = tensor->flat<T>();
- tensor_flat.setRandom();
- int32 max_index = 0;
- for (size_t i = 1; i < tensor->NumElements(); i++) {
- if (tensor_flat(i) > tensor_flat(max_index)) {
- max_index = i;
- }
+ // First set the array to an increasing sequence of values spaced
+ // a reasonable amount apart
+ T cur = 0;
+ for (size_t i = 0; i < tensor->NumElements(); i++) {
+ tensor_flat(i) = cur;
+ cur += 5e-2;
+ }
+ // Fischer-Yates shuffle the array
+ for (size_t i = tensor->NumElements() - 1; i >= 1; i--) {
+ // j <- random integer 0 <= j <= i
+ size_t j = random::New64() % (i + 1);
+ // swap values at i, j
+ T tmp = tensor_flat(i);
+ tensor_flat(i) = tensor_flat(j);
+ tensor_flat(j) = tmp;
}
- tensor_flat(max_index) += 1e-2;
}
Scope scope_;
@@ -189,7 +201,7 @@ TEST_F(NNGradTest, MaxPoolGradHelper) {
const std::vector<int> strides{1, 2, 2, 1};
auto y = MaxPool(scope_, x, ksize, strides, "VALID");
Tensor x_init_value = Tensor(DT_FLOAT, x_shape);
- SetRandomValuesWithBumpedMax<float>(&x_init_value);
+ SetRandomValuesForMaxPooling<float>(&x_init_value);
RunTest(x, x_init_value, y, y_shape);
}
@@ -202,7 +214,7 @@ TEST_F(NNGradTest, MaxPoolGradV2Helper) {
Tensor strides = test::AsTensor<int>({1, 2, 2, 1}, {4});
auto y = MaxPoolV2(scope_, x, ksize, strides, "VALID");
Tensor x_init_value = Tensor(DT_FLOAT, x_shape);
- SetRandomValuesWithBumpedMax<float>(&x_init_value);
+ SetRandomValuesForMaxPooling<float>(&x_init_value);
RunTest(x, x_init_value, y, y_shape);
}
@@ -215,7 +227,7 @@ TEST_F(NNGradTest, MaxPool3DGradHelper) {
const std::vector<int> strides{1, 3, 3, 3, 1};
auto y = MaxPool3D(scope_, x, ksize, strides, "VALID");
Tensor x_init_value = Tensor(DT_FLOAT, x_shape);
- SetRandomValuesWithBumpedMax<float>(&x_init_value);
+ SetRandomValuesForMaxPooling<float>(&x_init_value);
RunTest(x, x_init_value, y, y_shape);
}
@@ -248,5 +260,45 @@ TEST_F(NNGradTest, LRN){
RunTest(x, x_shape, y, x_shape);
}
+TEST_F(NNGradTest, SoftplusGrad) {
+ TensorShape shape({3, 7});
+ auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape));
+ auto y = Softplus(scope_, x);
+ RunTest(x, shape, y, shape);
+}
+
+TEST_F(NNGradTest, SoftsignGrad) {
+ TensorShape shape({3, 7});
+ auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape));
+ auto y = Softsign(scope_, x);
+ RunTest(x, shape, y, shape);
+}
+
+TEST_F(NNGradTest, FractionalAvgPoolGradHelper) {
+ TensorShape x_shape({1, 3, 7, 1});
+ auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape));
+ // Force consistent pooling regions for unit testing.
+ auto y = FractionalAvgPool(
+ scope_, x, {1, 1.2, 1.9, 1},
+ FractionalAvgPool::Deterministic(true).Overlapping(true).Seed(1).Seed2(
+ 2));
+ TensorShape y_shape({1, 2, 3, 1});
+ RunTest(x, x_shape, y.output, y_shape);
+}
+
+TEST_F(NNGradTest, FractionalMaxPoolGradHelper) {
+ TensorShape x_shape({1, 3, 7, 1});
+ auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape));
+ // Force consistent pooling regions for unit testing.
+ auto y = FractionalMaxPool(
+ scope_, x, {1, 1.2, 1.9, 1},
+ FractionalMaxPool::Deterministic(true).Overlapping(true).Seed(1).Seed2(
+ 2));
+ Tensor x_init_value = Tensor(DT_FLOAT, x_shape);
+ SetRandomValuesForMaxPooling<float>(&x_init_value);
+ TensorShape y_shape({1, 2, 3, 1});
+ RunTest(x, x_init_value, y.output, y_shape);
+}
+
} // namespace
} // namespace tensorflow
diff --git a/tensorflow/compiler/aot/codegen_test_h.golden b/tensorflow/compiler/aot/codegen_test_h.golden
index 6e050cf564..6641d45e83 100644
--- a/tensorflow/compiler/aot/codegen_test_h.golden
+++ b/tensorflow/compiler/aot/codegen_test_h.golden
@@ -56,9 +56,9 @@ namespace bar {
//
// Memory stats:
// arg bytes total: 104
-// arg bytes aligned: 128
+// arg bytes aligned: 192
// temp bytes total: 126
-// temp bytes aligned: 224
+// temp bytes aligned: 320
class MyClass : public tensorflow::XlaCompiledCpuFunction {
public:
// Number of input arguments for the compiled computation.
diff --git a/tensorflow/compiler/aot/embedded_protocol_buffers.h b/tensorflow/compiler/aot/embedded_protocol_buffers.h
index ebfe4806c2..4e194a6aba 100644
--- a/tensorflow/compiler/aot/embedded_protocol_buffers.h
+++ b/tensorflow/compiler/aot/embedded_protocol_buffers.h
@@ -71,7 +71,7 @@ struct ProtobufToEmbed {
const ::tensorflow::protobuf::MessageLite* message;
};
-// Embeds a a sequence of protocol buffers into an object file.
+// Embeds a sequence of protocol buffers into an object file.
//
// `target_triple` is the target triple for the target architecture for the
// generated object file.
diff --git a/tensorflow/compiler/aot/runtime.h b/tensorflow/compiler/aot/runtime.h
index d085864f00..d1a669ceb1 100644
--- a/tensorflow/compiler/aot/runtime.h
+++ b/tensorflow/compiler/aot/runtime.h
@@ -25,8 +25,8 @@ namespace tensorflow {
namespace tfcompile {
namespace runtime {
-// Align to 32-bytes, to mimic tensorflow::Allocator::kAllocatorAlignment.
-static constexpr size_t kAlign = 32;
+// Align to 64-bytes, to mimic tensorflow::Allocator::kAllocatorAlignment.
+static constexpr size_t kAlign = 64;
// aligned_buffer_bytes returns the sum of each size in `sizes`, skipping -1
// values. There are `n` entries in `sizes`. Each buffer is aligned to kAlign
diff --git a/tensorflow/compiler/aot/runtime_test.cc b/tensorflow/compiler/aot/runtime_test.cc
index 6d603a02eb..06ec623eb2 100644
--- a/tensorflow/compiler/aot/runtime_test.cc
+++ b/tensorflow/compiler/aot/runtime_test.cc
@@ -24,7 +24,7 @@ namespace runtime {
namespace {
TEST(Runtime, AlignmentValue) {
- // We've chosen 32 byte alignment for the tfcompile runtime to mimic the
+ // We've chosen 64 byte alignment for the tfcompile runtime to mimic the
// regular tensorflow allocator, which was chosen to play nicely with Eigen.
// The tfcompile runtime also has a requirement that comes from the xla
// generated code, on the relation: buffer_size >= 16 ? 2 * sizeof(void*) : 8
@@ -39,13 +39,13 @@ TEST(Runtime, AlignedBufferBytes) {
EXPECT_EQ(aligned_buffer_bytes(sizesA, 1), 0);
static constexpr intptr_t sizesB[1] = {3};
- EXPECT_EQ(aligned_buffer_bytes(sizesB, 1), 32);
+ EXPECT_EQ(aligned_buffer_bytes(sizesB, 1), 64);
static constexpr intptr_t sizesC[1] = {32};
- EXPECT_EQ(aligned_buffer_bytes(sizesC, 1), 32);
+ EXPECT_EQ(aligned_buffer_bytes(sizesC, 1), 64);
static constexpr intptr_t sizesD[7] = {1, -1, 32, -1, 64, 2, 3};
- EXPECT_EQ(aligned_buffer_bytes(sizesD, 7), 192);
+ EXPECT_EQ(aligned_buffer_bytes(sizesD, 7), 320);
}
void* add_ptr(void* base, uintptr_t delta) {
@@ -101,11 +101,11 @@ TEST(Runtime, MallocFreeContiguousBuffers) {
EXPECT_NE(base, nullptr);
EXPECT_EQ(bufD[0], add_ptr(base, 0));
EXPECT_EQ(bufD[1], nullptr);
- EXPECT_EQ(bufD[2], add_ptr(base, 32));
+ EXPECT_EQ(bufD[2], add_ptr(base, 64));
EXPECT_EQ(bufD[3], nullptr);
- EXPECT_EQ(bufD[4], add_ptr(base, 64));
- EXPECT_EQ(bufD[5], add_ptr(base, 128));
- EXPECT_EQ(bufD[6], add_ptr(base, 160));
+ EXPECT_EQ(bufD[4], add_ptr(base, 128));
+ EXPECT_EQ(bufD[5], add_ptr(base, 192));
+ EXPECT_EQ(bufD[6], add_ptr(base, 256));
for (int i = 0; i < 7; ++i) {
const intptr_t size = sizesD[i];
if (size != -1) {
diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index d82922a359..1067b38f93 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@@ -178,6 +178,7 @@ cc_library(
":runtime_matmul",
":runtime_matmul_mkl",
":runtime_single_threaded_conv2d",
+ ":runtime_single_threaded_fft",
":runtime_single_threaded_matmul",
"@llvm//:execution_engine",
"@llvm//:core",
@@ -516,7 +517,6 @@ cc_library(
deps = [
"//tensorflow/compiler/xla:executable_run_options",
"//tensorflow/compiler/xla:xla_data_proto",
- "//tensorflow/core:framework",
"//tensorflow/core:framework_lite",
"//third_party/eigen3",
],
@@ -579,6 +579,22 @@ cc_library(
)
cc_library(
+ name = "runtime_single_threaded_fft",
+ srcs = [
+ "runtime_fft_impl.h",
+ "runtime_single_threaded_fft.cc",
+ ],
+ hdrs = ["runtime_single_threaded_fft.h"],
+ copts = runtime_copts(),
+ visibility = ["//visibility:public"],
+ deps = [
+ "//tensorflow/compiler/xla:xla_data_proto",
+ "//tensorflow/core:framework_lite",
+ "//third_party/eigen3",
+ ],
+)
+
+cc_library(
name = "runtime_single_threaded_matmul",
srcs = ["runtime_single_threaded_matmul.cc"],
hdrs = ["runtime_single_threaded_matmul.h"],
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc b/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc
index 215405f680..54c52bc08f 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc
@@ -51,6 +51,8 @@ extern const char* const kEigenConvF16SymbolName =
extern const char* const kEigenConvF32SymbolName =
"__xla_cpu_runtime_EigenConvF32";
extern const char* const kEigenFftSymbolName = "__xla_cpu_runtime_EigenFft";
+extern const char* const kEigenSingleThreadedFftSymbolName =
+ "__xla_cpu_runtime_EigenSingleThreadedFft";
extern const char* const kEigenSingleThreadedMatMulF16SymbolName =
"__xla_cpu_runtime_EigenSingleThreadedMatMulF16";
extern const char* const kEigenSingleThreadedMatMulF32SymbolName =
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime.h b/tensorflow/compiler/xla/service/cpu/cpu_runtime.h
index 1dce6efa5c..aa0e967123 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_runtime.h
+++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime.h
@@ -52,6 +52,7 @@ extern const char* const kMKLSingleThreadedMatMulF64SymbolName;
extern const char* const kEigenConvF16SymbolName;
extern const char* const kEigenConvF32SymbolName;
extern const char* const kEigenFftSymbolName;
+extern const char* const kEigenSingleThreadedFftSymbolName;
extern const char* const kEigenSingleThreadedMatMulF16SymbolName;
extern const char* const kEigenSingleThreadedMatMulF32SymbolName;
extern const char* const kEigenSingleThreadedMatMulF64SymbolName;
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index 2c20be155f..758b8c62b4 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -1172,7 +1172,13 @@ Status IrEmitter::HandleFft(HloInstruction* fft) {
{int8_ptr_type, int8_ptr_type, int8_ptr_type, int32_type, int32_type,
int64_type, int64_type, int64_type, int64_type},
/*isVarArg=*/false);
- const char* fn_name = runtime::kEigenFftSymbolName;
+
+ bool multi_threaded_eigen =
+ hlo_module_config_.debug_options().xla_cpu_multi_thread_eigen();
+ const char* fn_name = multi_threaded_eigen
+ ? runtime::kEigenFftSymbolName
+ : runtime::kEigenSingleThreadedFftSymbolName;
+
llvm::Function* fft_func = llvm::cast<llvm::Function>(
module_->getOrInsertFunction(fn_name, fft_type));
fft_func->setCallingConv(llvm::CallingConv::C);
diff --git a/tensorflow/compiler/xla/service/cpu/runtime_fft_impl.h b/tensorflow/compiler/xla/service/cpu/runtime_fft_impl.h
index 984cb0616e..0bf693edd0 100644
--- a/tensorflow/compiler/xla/service/cpu/runtime_fft_impl.h
+++ b/tensorflow/compiler/xla/service/cpu/runtime_fft_impl.h
@@ -21,8 +21,6 @@ limitations under the License.
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/compiler/xla/xla_data.pb.h"
#include "tensorflow/core/framework/numeric_types.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/framework/tensor_shape.h"
#include "tensorflow/core/platform/types.h"
// 'tensorflow' namespace is used so that int64 and other types don't require
@@ -71,11 +69,9 @@ void EigenFftR2C(const EigenDevice& device, complex64* out, float* operand,
in_dims[0] = input_batch;
Eigen::DSizes<Eigen::DenseIndex, FFTRank + 1> out_dims;
out_dims[0] = input_batch;
- TensorShape temp_shape{input_batch};
for (int i = 0; i < FFTRank; i++) {
in_dims[i + 1] = fft_shape[i];
out_dims[i + 1] = i == FFTRank - 1 ? fft_shape[i] / 2 + 1 : fft_shape[i];
- temp_shape.AddDim(fft_shape[i]);
}
const Eigen::TensorMap<Eigen::Tensor<float, FFTRank + 1, Eigen::RowMajor>,
Eigen::Aligned>
@@ -88,8 +84,8 @@ void EigenFftR2C(const EigenDevice& device, complex64* out, float* operand,
const auto axes = Eigen::ArrayXi::LinSpaced(FFTRank, 1, FFTRank);
// Compute the full FFT using a temporary tensor.
- Tensor temp(DataTypeToEnum<complex64>::v(), temp_shape);
- auto full_fft = temp.flat_inner_dims<complex64, FFTRank + 1>();
+ Eigen::Tensor<complex64, FFTRank + 1, Eigen::RowMajor> full_fft(in_dims);
+
const Eigen::DSizes<Eigen::DenseIndex, FFTRank + 1> zero_start_indices;
full_fft.device(device) =
input.template fft<Eigen::BothParts, Eigen::FFT_FORWARD>(axes);
@@ -112,11 +108,9 @@ void EigenFftC2R(const EigenDevice& device, float* out, complex64* operand,
in_dims[0] = input_batch;
Eigen::DSizes<Eigen::DenseIndex, FFTRank + 1> out_dims;
out_dims[0] = input_batch;
- TensorShape temp_shape{input_batch};
for (int i = 0; i < FFTRank; i++) {
in_dims[i + 1] = i == FFTRank - 1 ? fft_shape[i] / 2 + 1 : fft_shape[i];
out_dims[i + 1] = fft_shape[i];
- temp_shape.AddDim(fft_shape[i]);
}
const Eigen::TensorMap<Eigen::Tensor<complex64, FFTRank + 1, Eigen::RowMajor>,
Eigen::Aligned>
@@ -129,8 +123,7 @@ void EigenFftC2R(const EigenDevice& device, float* out, complex64* operand,
// region we will slice from input given fft_shape. We slice input to
// fft_shape on its inner-most dimensions, except the last (which we
// slice to fft_shape[-1] / 2 + 1).
- Tensor temp(DataTypeToEnum<complex64>::v(), temp_shape);
- auto full_fft = temp.flat_inner_dims<complex64, FFTRank + 1>();
+ Eigen::Tensor<complex64, FFTRank + 1, Eigen::RowMajor> full_fft(out_dims);
// Calculate the starting point and range of the source of
// negative frequency part.
@@ -179,7 +172,6 @@ template <int FFTRank, typename EigenDevice>
void EigenFftWithRank(const EigenDevice& device, void* out, void* operand,
int32 fft_type, int64 input_batch, int64 fft_length0,
int64 fft_length1, int64 fft_length2) {
- CHECK(::xla::FftType_IsValid(fft_type)) << fft_type;
switch (fft_type) {
case ::xla::FftType::FFT:
EigenFftC2C<true, FFTRank, EigenDevice>(
@@ -204,7 +196,8 @@ void EigenFftWithRank(const EigenDevice& device, void* out, void* operand,
input_batch, fft_length0, fft_length1, fft_length2);
break;
default:
- LOG(FATAL) << "Unsupported FFT type: " << fft_type;
+ // Unsupported FFT type
+ abort();
}
}
@@ -230,7 +223,8 @@ void EigenFftImpl(const EigenDevice& device, void* out, void* operand,
fft_length1, fft_length2);
break;
default:
- LOG(FATAL) << "Unsupported FFT rank " << fft_rank;
+ // Unsupported FFT rank
+ abort();
}
}
diff --git a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.cc b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.cc
new file mode 100644
index 0000000000..2613ddb127
--- /dev/null
+++ b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.cc
@@ -0,0 +1,32 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.h"
+
+#include "tensorflow/compiler/xla/service/cpu/runtime_fft_impl.h"
+#include "tensorflow/core/platform/dynamic_annotations.h"
+#include "tensorflow/core/platform/types.h"
+
+using tensorflow::int32;
+using tensorflow::int64;
+
+TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenSingleThreadedFft(
+ const void* run_options_ptr, void* out, void* operand, int32 fft_type,
+ int32 fft_rank, int64 input_batch, int64 fft_length0, int64 fft_length1,
+ int64 fft_length2) {
+ tensorflow::xla::EigenFftImpl(Eigen::DefaultDevice(), out, operand, fft_type,
+ fft_rank, input_batch, fft_length0, fft_length1,
+ fft_length2);
+}
diff --git a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.h b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.h
new file mode 100644
index 0000000000..dcd133d012
--- /dev/null
+++ b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.h
@@ -0,0 +1,31 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_SINGLE_THREADED_FFT_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_SINGLE_THREADED_FFT_H_
+
+#include "tensorflow/core/platform/types.h"
+
+extern "C" {
+
+extern void __xla_cpu_runtime_EigenSingleThreadedFft(
+ const void* /* xla::ExecutableRunOptions* */ run_options_ptr, void* out,
+ void* operand, tensorflow::int32 fft_type, tensorflow::int32 fft_rank,
+ tensorflow::int64 input_batch, tensorflow::int64 fft_length0,
+ tensorflow::int64 fft_length1, tensorflow::int64 fft_length2);
+
+} // extern "C"
+
+#endif // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_SINGLE_THREADED_FFT_H_
diff --git a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc
index 8d8c5e4c44..c4c90515ac 100644
--- a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc
+++ b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc
@@ -38,6 +38,7 @@ limitations under the License.
#include "tensorflow/compiler/xla/service/cpu/runtime_matmul.h"
#include "tensorflow/compiler/xla/service/cpu/runtime_matmul_mkl.h"
#include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.h"
+#include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.h"
#include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.h"
#include "tensorflow/compiler/xla/service/cpu/windows_compatibility.h"
#include "tensorflow/compiler/xla/types.h"
@@ -202,6 +203,7 @@ bool RegisterKnownJITSymbols() {
REGISTER_CPU_RUNTIME_SYMBOL(MKLSingleThreadedMatMulF64);
REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF16);
REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF32);
+ REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedFft);
REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF16);
REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF32);
REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF64);
diff --git a/tensorflow/compiler/xla/service/pattern_matcher.h b/tensorflow/compiler/xla/service/pattern_matcher.h
index d3bc47e61e..2515222cf2 100644
--- a/tensorflow/compiler/xla/service/pattern_matcher.h
+++ b/tensorflow/compiler/xla/service/pattern_matcher.h
@@ -204,7 +204,7 @@ class LayoutPattern {
// Modifies the pattern to match only if the layout equals the given proto.
// The layout must outlive the returned pattern.
constexpr LayoutPattern<LayoutType, LayoutPatternEqualImpl<Impl>> EqualTo(
- const Layout* layout) const {
+ const ::xla::Layout* layout) const {
return LayoutPattern<LayoutType, LayoutPatternEqualImpl<Impl>>(
LayoutPatternEqualImpl<Impl>(impl_, layout), matched_layout_);
}
diff --git a/tensorflow/compiler/xla/service/tuple_simplifier.cc b/tensorflow/compiler/xla/service/tuple_simplifier.cc
index e536c8afbf..77bdcc9de0 100644
--- a/tensorflow/compiler/xla/service/tuple_simplifier.cc
+++ b/tensorflow/compiler/xla/service/tuple_simplifier.cc
@@ -30,10 +30,17 @@ limitations under the License.
namespace xla {
+TupleSimplifier::TupleSimplifier(bool exclude_entry_computation) :
+ exclude_entry_computation_(exclude_entry_computation) {}
+
StatusOr<bool> TupleSimplifier::Run(HloModule* module) {
// Initially add all GTE and Tuple instructions to the worklist.
std::queue<HloInstruction*> worklist;
for (auto* computation : module->computations()) {
+ if (exclude_entry_computation_ &&
+ computation == module->entry_computation()) {
+ continue;
+ }
for (auto* instruction : computation->instructions()) {
if (instruction->opcode() == HloOpcode::kTuple ||
instruction->opcode() == HloOpcode::kGetTupleElement) {
diff --git a/tensorflow/compiler/xla/service/tuple_simplifier.h b/tensorflow/compiler/xla/service/tuple_simplifier.h
index e5e9b10b5b..7509501883 100644
--- a/tensorflow/compiler/xla/service/tuple_simplifier.h
+++ b/tensorflow/compiler/xla/service/tuple_simplifier.h
@@ -27,13 +27,20 @@ namespace xla {
// the module.
class TupleSimplifier : public HloPassInterface {
public:
- TupleSimplifier() {}
+ TupleSimplifier() : TupleSimplifier(/*exclude_entry_computation=*/false) {}
+ explicit TupleSimplifier(bool exclude_entry_computation);
~TupleSimplifier() override {}
tensorflow::StringPiece name() const override { return "tuple-simplifier"; }
// Run tuple simplification on the given computation. Returns whether the
// computation was changed.
StatusOr<bool> Run(HloModule* module) override;
+
+ private:
+ // When set, this pipeline stage will perform optimization of all computations
+ // apart from the module's entry computation. This is used by Graphcore's
+ // backend.
+ bool exclude_entry_computation_;
};
} // namespace xla
diff --git a/tensorflow/compiler/xla/service/tuple_simplifier_test.cc b/tensorflow/compiler/xla/service/tuple_simplifier_test.cc
index ca9ae91281..d3635eae81 100644
--- a/tensorflow/compiler/xla/service/tuple_simplifier_test.cc
+++ b/tensorflow/compiler/xla/service/tuple_simplifier_test.cc
@@ -42,6 +42,12 @@ class TupleSimplifierTest : public HloTestBase {
TF_ASSERT_OK(changed_status.status());
EXPECT_EQ(change_expected, changed_status.ValueOrDie());
}
+ void Run(HloModule* module, bool change_expected, bool exclude_entry) {
+ TupleSimplifier simplifier(exclude_entry);
+ auto changed_status = simplifier.Run(module);
+ TF_ASSERT_OK(changed_status.status());
+ EXPECT_EQ(change_expected, changed_status.ValueOrDie());
+ }
const Shape scalar_shape_ = ShapeUtil::MakeShape(F32, {});
const Shape tuple_shape_ = ShapeUtil::MakeTupleShape(
@@ -211,5 +217,76 @@ TEST_F(TupleSimplifierTest, IncompatibleTuples) {
EXPECT_THAT(computation->root_instruction(), tuple);
}
+TEST_F(TupleSimplifierTest, CanExcludeEntryComputation) {
+ // Verify that the root computation can be excluded
+ auto module = CreateNewModule();
+
+ HloInstruction* p0;
+ HloInstruction* p1;
+ HloComputation* c0;
+ HloComputation* c1;
+ HloComputation* entry;
+
+ {
+ HloComputation::Builder builder(TestName() + "_1");
+ p0 = builder.AddInstruction(
+ HloInstruction::CreateParameter(0, tuple_shape_, "param"));
+ HloInstruction* gte0 = builder.AddInstruction(
+ HloInstruction::CreateGetTupleElement(scalar_shape_, p0, 0));
+ HloInstruction* gte1 = builder.AddInstruction(
+ HloInstruction::CreateGetTupleElement(scalar_shape_, p0, 1));
+ HloInstruction* gte2 = builder.AddInstruction(
+ HloInstruction::CreateGetTupleElement(scalar_shape_, p0, 2));
+
+ builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1, gte2}));
+
+ c0 = module->AddEmbeddedComputation(builder.Build());
+ }
+ {
+ HloComputation::Builder builder(TestName() + "_2");
+ p1 = builder.AddInstruction(
+ HloInstruction::CreateParameter(0, tuple_shape_, "param"));
+ HloInstruction* gte0 = builder.AddInstruction(
+ HloInstruction::CreateGetTupleElement(scalar_shape_, p1, 0));
+ HloInstruction* gte1 = builder.AddInstruction(
+ HloInstruction::CreateGetTupleElement(scalar_shape_, p1, 1));
+ HloInstruction* gte2 = builder.AddInstruction(
+ HloInstruction::CreateGetTupleElement(scalar_shape_, p1, 2));
+
+ builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1, gte2}));
+
+ c1 = module->AddEmbeddedComputation(builder.Build());
+ }
+ {
+ HloComputation::Builder builder(TestName() + "_Entry");
+ HloInstruction* tuple_param = builder.AddInstruction(
+ HloInstruction::CreateParameter(0, tuple_shape_, "param"));
+ HloInstruction* call0 = builder.AddInstruction(
+ HloInstruction::CreateCall(tuple_shape_, {tuple_param}, c0));
+ HloInstruction* call1 = builder.AddInstruction(
+ HloInstruction::CreateCall(tuple_shape_, {tuple_param}, c1));
+ HloInstruction* gte0 = builder.AddInstruction(
+ HloInstruction::CreateGetTupleElement(scalar_shape_, call0, 0));
+ HloInstruction* gte1 = builder.AddInstruction(
+ HloInstruction::CreateGetTupleElement(scalar_shape_, call1, 1));
+ HloInstruction* tuple0 =
+ builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1}));
+ HloInstruction* gte2 = builder.AddInstruction(
+ HloInstruction::CreateGetTupleElement(scalar_shape_, tuple0, 0));
+ HloInstruction* gte3 = builder.AddInstruction(
+ HloInstruction::CreateGetTupleElement(scalar_shape_, tuple0, 1));
+
+ builder.AddInstruction(HloInstruction::CreateTuple({gte2, gte3}));
+
+ entry = module->AddEntryComputation(builder.Build());
+ }
+
+ Run(module.get(), /*change_expected=*/true, /*exclude_entry=*/ true);
+
+ EXPECT_THAT(c0->root_instruction(), p0);
+ EXPECT_THAT(c1->root_instruction(), p1);
+ EXPECT_THAT(entry->instruction_count(), 9);
+}
+
} // namespace
} // namespace xla
diff --git a/tensorflow/contrib/autograph/__init__.py b/tensorflow/contrib/autograph/__init__.py
index 637e49c082..dbdbad8f4c 100644
--- a/tensorflow/contrib/autograph/__init__.py
+++ b/tensorflow/contrib/autograph/__init__.py
@@ -23,6 +23,7 @@ from __future__ import print_function
# TODO(mdan): Bring only the relevant symbols to the top level.
from tensorflow.contrib.autograph import utils
+from tensorflow.contrib.autograph import operators
from tensorflow.contrib.autograph.impl.api import convert
from tensorflow.contrib.autograph.impl.api import converted_call
from tensorflow.contrib.autograph.impl.api import do_not_convert
@@ -43,6 +44,8 @@ _allowed_symbols = [
'do_not_convert',
'to_code',
'to_graph',
+ # Overloaded operators
+ 'operators',
# Special functions and directives
'set_element_type',
'set_loop_options',
diff --git a/tensorflow/contrib/cmake/tf_c.cmake b/tensorflow/contrib/cmake/tf_c.cmake
index bda5e26f43..2e0a2fcef4 100644
--- a/tensorflow/contrib/cmake/tf_c.cmake
+++ b/tensorflow/contrib/cmake/tf_c.cmake
@@ -37,13 +37,15 @@ add_dependencies(
tf_core_lib
tf_protos_cc)
-add_library(tf_c_python_api OBJECT
- "${tensorflow_source_dir}/tensorflow/c/python_api.cc"
- "${tensorflow_source_dir}/tensorflow/c/python_api.h"
-)
-add_dependencies(
- tf_c_python_api
- tf_c
- tf_core_lib
- tf_core_framework
- tf_protos_cc)
+if(tensorflow_BUILD_PYTHON_BINDINGS)
+ add_library(tf_c_python_api OBJECT
+ "${tensorflow_source_dir}/tensorflow/c/python_api.cc"
+ "${tensorflow_source_dir}/tensorflow/c/python_api.h"
+ )
+ add_dependencies(
+ tf_c_python_api
+ tf_c
+ tf_core_lib
+ tf_core_framework
+ tf_protos_cc)
+endif()
diff --git a/tensorflow/contrib/cmake/tf_cc_ops.cmake b/tensorflow/contrib/cmake/tf_cc_ops.cmake
index f73da0b8ab..6c90cf398c 100644
--- a/tensorflow/contrib/cmake/tf_cc_ops.cmake
+++ b/tensorflow/contrib/cmake/tf_cc_ops.cmake
@@ -155,7 +155,7 @@ if (WIN32)
set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/pywrap_tensorflow_internal.lib")
endif()
else (WIN32)
- set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/libpywrap_tensorflow_internal.so")
+ set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/libpywrap_tensorflow_internal${CMAKE_SHARED_LIBRARY_SUFFIX}")
endif (WIN32)
add_custom_target(tf_extension_ops)
diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake
index a0c3ddd28b..9244604489 100755
--- a/tensorflow/contrib/cmake/tf_python.cmake
+++ b/tensorflow/contrib/cmake/tf_python.cmake
@@ -715,7 +715,7 @@ if(WIN32)
endif()
else()
add_custom_command(TARGET pywrap_tensorflow_internal POST_BUILD
- COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/libpywrap_tensorflow_internal.so
+ COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/libpywrap_tensorflow_internal${CMAKE_SHARED_LIBRARY_SUFFIX}
${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/_pywrap_tensorflow_internal.so)
endif()
@@ -832,7 +832,6 @@ add_custom_command(TARGET tf_python_build_pip_package POST_BUILD
add_custom_command(TARGET tf_python_copy_scripts_to_destination PRE_BUILD
COMMAND ${CMAKE_COMMAND} -E copy ${tensorflow_source_dir}/tensorflow/contrib/testing/python/framework/util_test.py
${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/testing/python/framework/)
-
add_custom_command(TARGET tf_python_build_pip_package POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy ${tensorflow_source_dir}/tensorflow/tools/pip_package/README
${CMAKE_CURRENT_BINARY_DIR}/tf_python/)
diff --git a/tensorflow/contrib/cmake/tools/create_def_file.py b/tensorflow/contrib/cmake/tools/create_def_file.py
index cffe069aa3..4f957f1e0b 100644
--- a/tensorflow/contrib/cmake/tools/create_def_file.py
+++ b/tensorflow/contrib/cmake/tools/create_def_file.py
@@ -44,7 +44,8 @@ UNDNAME = "undname.exe"
DUMPBIN = "dumpbin.exe"
# Exclude if matched
-EXCLUDE_RE = re.compile(r"RTTI|deleting destructor|::internal::")
+EXCLUDE_RE = re.compile(r"RTTI|deleting destructor|::internal::|Internal|"
+ r"python_op_gen_internal|grappler")
# Include if matched before exclude
INCLUDEPRE_RE = re.compile(r"google::protobuf::internal::ExplicitlyConstructed|"
@@ -56,6 +57,10 @@ INCLUDEPRE_RE = re.compile(r"google::protobuf::internal::ExplicitlyConstructed|"
r"tensorflow::ops::internal::Enter|"
r"tensorflow::strings::internal::AppendPieces|"
r"tensorflow::strings::internal::CatPieces|"
+ r"tensorflow::errors::Internal|"
+ r"tensorflow::Tensor::CopyFromInternal|"
+ r"tensorflow::kernel_factory::"
+ r"OpKernelRegistrar::InitInternal|"
r"tensorflow::io::internal::JoinPathImpl")
# Include if matched after exclude
@@ -64,7 +69,7 @@ INCLUDE_RE = re.compile(r"^(TF_\w*)$|"
r"tensorflow::|"
r"functor::|"
r"\?nsync_|"
- r"perftools::gputools")
+ r"stream_executor::")
# We want to identify data members explicitly in the DEF file, so that no one
# can implicitly link against the DLL if they use one of the variables exported
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_bijector_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_bijector_test.py
index 45760a29ee..795f1993ba 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_bijector_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_bijector_test.py
@@ -151,16 +151,24 @@ class SinhArcsinhBijectorTest(test.TestCase):
self.assertAllClose(y, bijector.forward(x).eval(), rtol=1e-4, atol=0.)
self.assertAllClose(x, bijector.inverse(y).eval(), rtol=1e-4, atol=0.)
- # Do the numpy calculation in float128 to avoid inf/nan.
- y_float128 = np.float128(y)
- self.assertAllClose(
- np.log(np.cosh(
- np.arcsinh(y_float128) / tailweight - skewness) / np.sqrt(
- y_float128**2 + 1)) -
- np.log(tailweight),
- bijector.inverse_log_det_jacobian(y, event_ndims=0).eval(),
- rtol=1e-4,
- atol=0.)
+ # On IBM PPC systems, longdouble (np.float128) is same as double except that it can have more precision.
+ # Type double being of 8 bytes, can't hold square of max of float64 (which is also 8 bytes) and
+ # below test fails due to overflow error giving inf. So this check avoids that error by skipping square
+ # calculation and corresponding assert.
+
+ if np.amax(y) <= np.sqrt(np.finfo(np.float128).max) and \
+ np.fabs(np.amin(y)) <= np.sqrt(np.fabs(np.finfo(np.float128).min)):
+
+ # Do the numpy calculation in float128 to avoid inf/nan.
+ y_float128 = np.float128(y)
+ self.assertAllClose(
+ np.log(np.cosh(
+ np.arcsinh(y_float128) / tailweight - skewness) / np.sqrt(
+ y_float128**2 + 1)) -
+ np.log(tailweight),
+ bijector.inverse_log_det_jacobian(y, event_ndims=0).eval(),
+ rtol=1e-4,
+ atol=0.)
self.assertAllClose(
-bijector.inverse_log_det_jacobian(y, event_ndims=0).eval(),
bijector.forward_log_det_jacobian(x, event_ndims=0).eval(),
diff --git a/tensorflow/contrib/eager/python/datasets.py b/tensorflow/contrib/eager/python/datasets.py
index d7909dd5a2..adf92c27ea 100644
--- a/tensorflow/contrib/eager/python/datasets.py
+++ b/tensorflow/contrib/eager/python/datasets.py
@@ -106,7 +106,8 @@ class Iterator(iterator_ops.EagerIterator, checkpointable.CheckpointableBase):
target_device=target,
buffer_size=10,
container="",
- shared_name=_generate_shared_name("function_buffer_resource"))
+ shared_name=_generate_shared_name(
+ "contrib_eager_iterator_function_buffer_resource"))
self._buffer_resource_deleter = resource_variable_ops.EagerResourceDeleter( # pylint: disable=line-too-long
handle=self._buffer_resource_handle,
handle_device=self._device)
diff --git a/tensorflow/contrib/eager/python/examples/notebooks/4_high_level.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/4_high_level.ipynb
index 4fe3a0e3f3..5749f22ac5 100644
--- a/tensorflow/contrib/eager/python/examples/notebooks/4_high_level.ipynb
+++ b/tensorflow/contrib/eager/python/examples/notebooks/4_high_level.ipynb
@@ -68,7 +68,7 @@
"# simply construct the object. Most layers take as a first argument the number\n",
"# of output dimensions / channels.\n",
"layer = tf.keras.layers.Dense(100)\n",
- "# The number of input dimensionss is often unnecessary, as it can be inferred\n",
+ "# The number of input dimensions is often unnecessary, as it can be inferred\n",
"# the first time the layer is used, but it can be provided if you want to \n",
"# specify it manually, which is useful in some complex models.\n",
"layer = tf.keras.layers.Dense(10, input_shape=(None, 5))"
@@ -267,7 +267,7 @@
" * `build`, where you know the shapes of the input tensors and can do the rest of the initialization\n",
" * `call`, where you do the forward computation\n",
"\n",
- "Note that you don't have to wait until `build` is called to create your variables, you can also create them in `__init__`. However, the advantage of creating them in `build` is that it enables late variable creation based on the shape of the inputs the layer will operate on. On the other hand, creating variables in `__init__` would mean that shapes requires to create the variables will need to be explicitly specified."
+ "Note that you don't have to wait until `build` is called to create your variables, you can also create them in `__init__`. However, the advantage of creating them in `build` is that it enables late variable creation based on the shape of the inputs the layer will operate on. On the other hand, creating variables in `__init__` would mean that shapes required to create the variables will need to be explicitly specified."
]
},
{
diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py
index 84a413c791..05bcdac2ca 100644
--- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py
+++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py
@@ -346,7 +346,8 @@ def sequence_numeric_column(
key,
shape=(1,),
default_value=0.,
- dtype=dtypes.float32):
+ dtype=dtypes.float32,
+ normalizer_fn=None):
"""Returns a feature column that represents sequences of numeric data.
Example:
@@ -370,6 +371,12 @@ def sequence_numeric_column(
default_value: A single value compatible with `dtype` that is used for
padding the sparse data into a dense `Tensor`.
dtype: The type of values.
+ normalizer_fn: If not `None`, a function that can be used to normalize the
+ value of the tensor after `default_value` is applied for parsing.
+ Normalizer function takes the input `Tensor` as its argument, and returns
+ the output `Tensor`. (e.g. lambda x: (x - 3.0) / 4.2). Please note that
+ even though the most common use case of this function is normalization, it
+ can be used for any kind of Tensorflow transformations.
Returns:
A `_SequenceNumericColumn`.
@@ -383,12 +390,16 @@ def sequence_numeric_column(
if not (dtype.is_integer or dtype.is_floating):
raise ValueError('dtype must be convertible to float. '
'dtype: {}, key: {}'.format(dtype, key))
+ if normalizer_fn is not None and not callable(normalizer_fn):
+ raise TypeError(
+ 'normalizer_fn must be a callable. Given: {}'.format(normalizer_fn))
return _SequenceNumericColumn(
key,
shape=shape,
default_value=default_value,
- dtype=dtype)
+ dtype=dtype,
+ normalizer_fn=normalizer_fn)
def _assert_all_equal_and_return(tensors, name=None):
@@ -407,7 +418,7 @@ class _SequenceNumericColumn(
fc._SequenceDenseColumn,
collections.namedtuple(
'_SequenceNumericColumn',
- ['key', 'shape', 'default_value', 'dtype'])):
+ ['key', 'shape', 'default_value', 'dtype', 'normalizer_fn'])):
"""Represents sequences of numeric data."""
@property
@@ -419,7 +430,10 @@ class _SequenceNumericColumn(
return {self.key: parsing_ops.VarLenFeature(self.dtype)}
def _transform_feature(self, inputs):
- return inputs.get(self.key)
+ input_tensor = inputs.get(self.key)
+ if self.normalizer_fn is not None:
+ input_tensor = self.normalizer_fn(input_tensor)
+ return input_tensor
@property
def _variable_shape(self):
diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py
index ee74cf56dc..45d7b74046 100644
--- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py
+++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py
@@ -28,6 +28,7 @@ from tensorflow.python.framework import dtypes
from tensorflow.python.framework import errors
from tensorflow.python.framework import ops
from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import sparse_ops
from tensorflow.python.platform import test
from tensorflow.python.training import monitored_session
@@ -947,6 +948,7 @@ class SequenceNumericColumnTest(test.TestCase):
self.assertEqual((1,), a.shape)
self.assertEqual(0., a.default_value)
self.assertEqual(dtypes.float32, a.dtype)
+ self.assertIsNone(a.normalizer_fn)
def test_shape_saved_as_tuple(self):
a = sfc.sequence_numeric_column('aaa', shape=[1, 2])
@@ -965,6 +967,10 @@ class SequenceNumericColumnTest(test.TestCase):
ValueError, 'dtype must be convertible to float'):
sfc.sequence_numeric_column('aaa', dtype=dtypes.string)
+ def test_normalizer_fn_must_be_callable(self):
+ with self.assertRaisesRegexp(TypeError, 'must be a callable'):
+ sfc.sequence_numeric_column('aaa', normalizer_fn='NotACallable')
+
def test_get_sequence_dense_tensor(self):
sparse_input = sparse_tensor.SparseTensorValue(
# example 0, values [[0.], [1]]
@@ -985,6 +991,41 @@ class SequenceNumericColumnTest(test.TestCase):
self.assertAllEqual(
expected_dense_tensor, dense_tensor.eval(session=sess))
+ def test_get_sequence_dense_tensor_with_normalizer_fn(self):
+
+ def _increment_two(input_sparse_tensor):
+ return sparse_ops.sparse_add(
+ input_sparse_tensor,
+ sparse_tensor.SparseTensor(((0, 0), (1, 1)), (2.0, 2.0), (2, 2))
+ )
+
+ sparse_input = sparse_tensor.SparseTensorValue(
+ # example 0, values [[0.], [1]]
+ # example 1, [[10.]]
+ indices=((0, 0), (0, 1), (1, 0)),
+ values=(0., 1., 10.),
+ dense_shape=(2, 2))
+
+ # Before _increment_two:
+ # [[0.], [1.]],
+ # [[10.], [0.]],
+ # After _increment_two:
+ # [[2.], [1.]],
+ # [[10.], [2.]],
+ expected_dense_tensor = [
+ [[2.], [1.]],
+ [[10.], [2.]],
+ ]
+ numeric_column = sfc.sequence_numeric_column(
+ 'aaa', normalizer_fn=_increment_two)
+
+ dense_tensor, _ = numeric_column._get_sequence_dense_tensor(
+ _LazyBuilder({'aaa': sparse_input}))
+
+ with monitored_session.MonitoredSession() as sess:
+ self.assertAllEqual(
+ expected_dense_tensor, dense_tensor.eval(session=sess))
+
def test_get_sequence_dense_tensor_with_shape(self):
"""Tests get_sequence_dense_tensor with shape !=(1,)."""
sparse_input = sparse_tensor.SparseTensorValue(
diff --git a/tensorflow/contrib/ffmpeg/__init__.py b/tensorflow/contrib/ffmpeg/__init__.py
index daba965a98..484ffee3e7 100644
--- a/tensorflow/contrib/ffmpeg/__init__.py
+++ b/tensorflow/contrib/ffmpeg/__init__.py
@@ -28,7 +28,6 @@ from __future__ import print_function
from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_audio
from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_video
from tensorflow.contrib.ffmpeg.ffmpeg_ops import encode_audio
-from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_video
from tensorflow.python.util.all_util import remove_undocumented
diff --git a/tensorflow/contrib/ffmpeg/ffmpeg_ops.py b/tensorflow/contrib/ffmpeg/ffmpeg_ops.py
index 020b5c99c6..b1b5126d9e 100644
--- a/tensorflow/contrib/ffmpeg/ffmpeg_ops.py
+++ b/tensorflow/contrib/ffmpeg/ffmpeg_ops.py
@@ -21,7 +21,6 @@ from __future__ import print_function
from tensorflow.contrib.ffmpeg.ops import gen_decode_audio_op_py
from tensorflow.contrib.ffmpeg.ops import gen_decode_video_op_py
from tensorflow.contrib.ffmpeg.ops import gen_encode_audio_op_py
-from tensorflow.contrib.ffmpeg.ops import gen_decode_video_op_py
from tensorflow.contrib.util import loader
from tensorflow.python.framework import ops
from tensorflow.python.platform import resource_loader
diff --git a/tensorflow/contrib/framework/__init__.py b/tensorflow/contrib/framework/__init__.py
index 10d1ecc738..dc49383c5c 100644
--- a/tensorflow/contrib/framework/__init__.py
+++ b/tensorflow/contrib/framework/__init__.py
@@ -119,14 +119,13 @@ from tensorflow.python.framework.smart_cond import smart_cond
from tensorflow.python.framework.smart_cond import smart_constant_value
from tensorflow.python.framework.tensor_spec import BoundedTensorSpec
from tensorflow.python.framework.tensor_spec import TensorSpec
-from tensorflow.python.ops.array_ops import broadcast_to
from tensorflow.python.ops.init_ops import convolutional_delta_orthogonal
from tensorflow.python.ops.init_ops import convolutional_orthogonal_1d
from tensorflow.python.ops.init_ops import convolutional_orthogonal_2d
from tensorflow.python.ops.init_ops import convolutional_orthogonal_3d
from tensorflow.python.util.all_util import remove_undocumented
-_allowed_symbols = ['nest', 'broadcast_to']
+_allowed_symbols = ['nest']
_nest_allowed_symbols = [
'assert_same_structure',
'is_sequence',
diff --git a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py
index 65cb94b5a4..a955e21b72 100644
--- a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py
+++ b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py
@@ -301,8 +301,8 @@ class FusedConv2DBiasActivationTest(test.TestCase):
conv = tensors[i]
value = values[i]
ref_value = ref_values[i]
- print("expected = ", ref_value)
- print("actual = ", value)
+ tf_logging.info("expected = ", ref_value)
+ tf_logging.info("actual = ", value)
tol = 1e-5
if value.dtype == np.float16:
tol = 1e-3
@@ -843,7 +843,8 @@ class FusedConvInt8Tests(test.TestCase):
vertical_stride, padding_type)
output_width = CalculateConvolvedOutputDim(input_width, filter_width,
horizontal_stride, padding_type)
- print("output_height=", output_height, ", output_width=", output_width)
+ tf_logging.info("output_height=", output_height, ", output_width=",
+ output_width)
side_input, _, _ = gen_array_ops.quantize_v2(
random_ops.random_uniform(
@@ -880,8 +881,8 @@ class FusedConvInt8Tests(test.TestCase):
with self.test_session(
use_gpu=True, config=NoMemoryOptimizationConfig()) as sess:
actual_y, expected_y = sess.run([actual, expected])
- print("actual_y = ", actual_y)
- print("expected_y = ", expected_y)
+ tf_logging.info("actual_y = ", actual_y)
+ tf_logging.info("expected_y = ", expected_y)
self.assertTrue(np.array_equal(actual_y, expected_y))
def testFusedConvInt8(self):
diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_impl/hexagon_controller.c b/tensorflow/contrib/hvx/hexagon_controller/src_impl/hexagon_controller.c
index 6a5d982dc8..2e5c84704f 100644
--- a/tensorflow/contrib/hvx/hexagon_controller/src_impl/hexagon_controller.c
+++ b/tensorflow/contrib/hvx/hexagon_controller/src_impl/hexagon_controller.c
@@ -19,7 +19,7 @@ limitations under the License.
#include "hexagon_controller.h"
-#include <malloc.h>
+#include <stdlib.h>
#include <stdio.h>
#include "adspmsgd.h"
diff --git a/tensorflow/contrib/lite/download_dependencies.sh b/tensorflow/contrib/lite/download_dependencies.sh
index 436c3e1d4c..840015a7fa 100755
--- a/tensorflow/contrib/lite/download_dependencies.sh
+++ b/tensorflow/contrib/lite/download_dependencies.sh
@@ -30,9 +30,7 @@ if [ ! -f $BZL_FILE_PATH ]; then
fi
EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)"
-# TODO (yongtang): Replace the following with 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' once
-# the archive has been propagated in mirror.bazel.build.
-GEMMLOWP_URL="$(grep -o 'https://github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)"
+GEMMLOWP_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)"
GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz"
ABSL_URL="$(grep -o 'https://github.com/abseil/abseil-cpp/.*tar.gz' "${BZL_FILE_PATH}" | head -n1)"
NEON_2_SSE_URL="https://github.com/intel/ARM_NEON_2_x86_SSE/archive/master.zip"
diff --git a/tensorflow/contrib/lite/examples/minimal/minimal.cc b/tensorflow/contrib/lite/examples/minimal/minimal.cc
index 106e3b0270..8b0ace96cc 100644
--- a/tensorflow/contrib/lite/examples/minimal/minimal.cc
+++ b/tensorflow/contrib/lite/examples/minimal/minimal.cc
@@ -38,7 +38,7 @@ using namespace tflite;
int main(int argc, char *argv[]) {
if(argc != 2) {
- fprintf(stderr, "Usage: %s <model>\n");
+ fprintf(stderr, "minimal <tflite model>\n");
return 1;
}
const char* filename = argv[1];
diff --git a/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md b/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md
index bb2e615eac..965273f0f0 100644
--- a/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md
+++ b/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md
@@ -128,7 +128,6 @@ TensorFlow operation not listed above are likely unsupported. Notably, the
following common ops are not supported at the moment:
* [tf.depth_to_space](https://www.tensorflow.org/api_docs/python/tf/depth_to_space)
-* [tf.gather](https://www.tensorflow.org/api_docs/python/tf/gather)
* [tf.image.resize_bilinear](https://www.tensorflow.org/api_docs/python/tf/image/resize_bilinear)
* [tf.tanh](https://www.tensorflow.org/api_docs/python/tf/tanh)
@@ -306,6 +305,19 @@ Options {
}
```
+**GATHER**
+
+```
+Inputs {
+ 0: params tensor
+ 1: indices tensor
+ 2: axis tensor (optional)
+}
+Outputs {
+ 0: a tensor with same type as the params tensor.
+}
+```
+
**GREATER**
```
diff --git a/tensorflow/contrib/lite/java/ovic/README.md b/tensorflow/contrib/lite/java/ovic/README.md
index 5efa70987e..26349347fa 100644
--- a/tensorflow/contrib/lite/java/ovic/README.md
+++ b/tensorflow/contrib/lite/java/ovic/README.md
@@ -2,7 +2,7 @@
This folder contains building code for track one of the [Low Power ImageNet Recognition Challenge workshop at CVPR 2018.](https://rebootingcomputing.ieee.org/home/sitemap/14-lpirc/80-low-power-image-recognition-challenge-lpirc-2018)
-## Pre-requesits
+## Pre-requisite
Follow the steps [here](https://www.tensorflow.org/mobile/tflite/demo_android) to install Tensorflow, Bazel, and the Android NDK and SDK.
@@ -49,7 +49,7 @@ Once you have a submission that follows the instructions from the [competition s
You can call the validator binary below to verify that your model fits the format requirements. This often helps you to catch size mismatches (e.g. output should be [1, 1001] instead of [1,1,1,1001]). Let say the submission file is located at `/path/to/my_model.lite`, then call:
```sh
-bazel build --cxxopt--std=c++11 //tensorflow/contrib/lite/java/ovic:ovic_validator --cxxopt=-Wno-all
+bazel build --cxxopt=--std=c++11 //tensorflow/contrib/lite/java/ovic:ovic_validator --cxxopt=-Wno-all
bazel-bin/tensorflow/contrib/lite/java/ovic/ovic_validator /path/to/my_model.lite
```
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
index a2f192bbc2..1908f7fa6c 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
@@ -1934,7 +1934,7 @@ inline void LstmCell(const float* input_data, const Dims<4>& input_dims,
// The quantization of the input, output arrays is as follows:
// - The input activations are quantized as uint8 on the interval
// [-1, 127/128].
-// The rationale for that is that that is the natural interval for output
+// The rationale for that is that is the natural interval for output
// activations (see next point) and these need to be concatenated together.
// We could accommodate different ranges by re-scaling, but we empirically
// found that setting the input activations range to be [-1, 127/128] in the
@@ -1999,7 +1999,7 @@ inline void LstmCell(const float* input_data, const Dims<4>& input_dims,
// However, for a fixed-point implementation in 16-bit integers, using 5
// integer bits to represent the [-16, 16] range would leave only 11
// fractional bits, giving an increment of 2^-11 = 4.9e-4 between consecutive
-// representable values. Notice that that is higher than the
+// representable values. Notice that is higher than the
// worst-case clamping error with clamping to [-8, 8]: 3.4e-4 for Logistic.
// Using [-8, 8] thus seems like the better compromise overall, enjoying
// an increment of 2.4e-4 between representable values and a worst-case
diff --git a/tensorflow/contrib/lite/python/interpreter.py b/tensorflow/contrib/lite/python/interpreter.py
index 9400e757b9..fd90823425 100644
--- a/tensorflow/contrib/lite/python/interpreter.py
+++ b/tensorflow/contrib/lite/python/interpreter.py
@@ -55,7 +55,7 @@ class Interpreter(object):
elif model_content and not model_path:
self._interpreter = (
_interpreter_wrapper.InterpreterWrapper_CreateWrapperCPPFromBuffer(
- model_content, len(model_content)))
+ model_content))
if not self._interpreter:
raise ValueError(
'Failed to create model from {} bytes'.format(len(model_content)))
diff --git a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc
index f705551fcb..b283551c45 100644
--- a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc
+++ b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc
@@ -397,9 +397,14 @@ InterpreterWrapper* InterpreterWrapper::CreateWrapperCPPFromFile(
}
InterpreterWrapper* InterpreterWrapper::CreateWrapperCPPFromBuffer(
- const char* data, size_t len) {
+ PyObject* data) {
+ char * buf = nullptr;
+ Py_ssize_t length;
+ if (PY_TO_CPPSTRING(data, &buf, &length) == -1) {
+ return nullptr;
+ }
std::unique_ptr<tflite::FlatBufferModel> model =
- tflite::FlatBufferModel::BuildFromBuffer(data, len);
+ tflite::FlatBufferModel::BuildFromBuffer(buf, length);
return model ? new InterpreterWrapper(std::move(model)) : nullptr;
}
diff --git a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h
index b0ed7c4559..cbeb53bee7 100644
--- a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h
+++ b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h
@@ -40,8 +40,7 @@ class InterpreterWrapper {
static InterpreterWrapper* CreateWrapperCPPFromFile(const char* model_path);
// SWIG caller takes ownership of pointer.
- static InterpreterWrapper* CreateWrapperCPPFromBuffer(const char* data,
- size_t len);
+ static InterpreterWrapper* CreateWrapperCPPFromBuffer(PyObject* data);
~InterpreterWrapper();
bool AllocateTensors();
diff --git a/tensorflow/contrib/lite/python/lite.py b/tensorflow/contrib/lite/python/lite.py
index 0913cd2c5c..88dda7290b 100644
--- a/tensorflow/contrib/lite/python/lite.py
+++ b/tensorflow/contrib/lite/python/lite.py
@@ -34,6 +34,8 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
+from six import PY3
+
from google.protobuf import text_format as _text_format
from google.protobuf.message import DecodeError
from tensorflow.contrib.lite.python import lite_constants as constants
@@ -54,6 +56,7 @@ from tensorflow.python.framework.importer import import_graph_def
from tensorflow.python.ops.variables import global_variables_initializer
from tensorflow.python.saved_model import signature_constants
from tensorflow.python.saved_model import tag_constants
+# from tensorflow.python.util.all_util import remove_undocumented
class TocoConverter(object):
@@ -203,6 +206,12 @@ class TocoConverter(object):
except (_text_format.ParseError, DecodeError):
try:
print("Ignore 'tcmalloc: large alloc' warnings.")
+
+ if not isinstance(file_content, str):
+ if PY3:
+ file_content = file_content.decode('utf-8')
+ else:
+ file_content = file_content.encode('utf-8')
_text_format.Merge(file_content, graph_def)
except (_text_format.ParseError, DecodeError):
raise ValueError(
@@ -382,3 +391,5 @@ def _freeze_graph(sess, output_tensors):
output_arrays)
else:
return sess.graph_def
+
+# remove_undocumented(__name__)
diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc
index e33b430937..5c7fa09891 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc
@@ -178,7 +178,7 @@ ArrayDataType ConvertDataType(tensorflow::DataType dtype) {
else if (dtype == DT_STRING)
return ArrayDataType::kString;
else
- LOG(INFO) << "Unsupported data type in placehoder op: " << dtype;
+ LOG(INFO) << "Unsupported data type in placeholder op: " << dtype;
return ArrayDataType::kNone;
}
diff --git a/tensorflow/contrib/lite/toco/toco_port.cc b/tensorflow/contrib/lite/toco/toco_port.cc
index 1b21c8bc60..de76fd4032 100644
--- a/tensorflow/contrib/lite/toco/toco_port.cc
+++ b/tensorflow/contrib/lite/toco/toco_port.cc
@@ -20,6 +20,12 @@ limitations under the License.
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/platform/logging.h"
+#if defined(__ANDROID__) && defined(__ARM_ARCH_7A__)
+namespace std {
+double round(double x) { return ::round(x); }
+} // namespace std
+#endif
+
namespace toco {
namespace port {
void CopyToBuffer(const string& src, char* dest) {
diff --git a/tensorflow/contrib/lite/toco/toco_port.h b/tensorflow/contrib/lite/toco/toco_port.h
index 5c019cb2bf..17f82b9dd7 100644
--- a/tensorflow/contrib/lite/toco/toco_port.h
+++ b/tensorflow/contrib/lite/toco/toco_port.h
@@ -34,6 +34,24 @@ limitations under the License.
#define TFLITE_PROTO_NS google::protobuf
#endif
+#ifdef __ANDROID__
+#include <sstream>
+namespace std {
+
+template <typename T>
+std::string to_string(T value)
+{
+ std::ostringstream os ;
+ os << value ;
+ return os.str() ;
+}
+
+#ifdef __ARM_ARCH_7A__
+double round(double x);
+#endif
+}
+#endif
+
namespace toco {
namespace port {
diff --git a/tensorflow/contrib/makefile/compile_nsync.sh b/tensorflow/contrib/makefile/compile_nsync.sh
index e8c6edd7ba..a28fc3a87f 100755
--- a/tensorflow/contrib/makefile/compile_nsync.sh
+++ b/tensorflow/contrib/makefile/compile_nsync.sh
@@ -270,7 +270,7 @@ for arch in $archs; do
PLATFORM_LDFLAGS=-pthread
MKDEP=${CC} -M -std=c++11
PLATFORM_C=../../platform/c++11/src/nsync_semaphore_mutex.cc \
- ../../platform/c++11/src/per_thread_waiter.cc \
+ ../../platform/posix/src/per_thread_waiter.c \
../../platform/c++11/src/yield.cc \
../../platform/c++11/src/time_rep_timespec.cc \
../../platform/c++11/src/nsync_panic.cc
diff --git a/tensorflow/contrib/makefile/download_dependencies.sh b/tensorflow/contrib/makefile/download_dependencies.sh
index eff9081e35..48953e2e38 100755
--- a/tensorflow/contrib/makefile/download_dependencies.sh
+++ b/tensorflow/contrib/makefile/download_dependencies.sh
@@ -27,9 +27,7 @@ if [ ! -f $BZL_FILE_PATH ]; then
fi
EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)"
-# TODO (yongtang): Replace the following with 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' once
-# the archive has been propagated in mirror.bazel.build.
-GEMMLOWP_URL="$(grep -o 'https://github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)"
+GEMMLOWP_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)"
GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz"
NSYNC_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/nsync/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)"
PROTOBUF_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/protobuf/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)"
diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py
index 2ed99d50a4..a6be2084aa 100644
--- a/tensorflow/contrib/metrics/python/ops/metric_ops.py
+++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py
@@ -2503,7 +2503,7 @@ def _compute_recall_at_precision(tp, fp, fn, precision, name):
name: An optional variable_scope name.
Returns:
- The recall at a the given `precision`.
+ The recall at a given `precision`.
"""
precisions = math_ops.div(tp, tp + fp + _EPSILON)
tf_index = math_ops.argmin(
diff --git a/tensorflow/contrib/mpi_collectives/kernels/ring.h b/tensorflow/contrib/mpi_collectives/kernels/ring.h
index 1d56d588bc..c001615d3f 100644
--- a/tensorflow/contrib/mpi_collectives/kernels/ring.h
+++ b/tensorflow/contrib/mpi_collectives/kernels/ring.h
@@ -129,7 +129,7 @@ cudaStream_t CudaStreamForMPI();
* has the fully accumulated Segment 1; and so on. The scatter-reduce is
* complete.
*
- * Next, the allgather distributes these fully accumululated chunks across all
+ * Next, the allgather distributes these fully accumulated chunks across all
* nodes. Communication proceeds in the same ring, once again in N-1 steps. At
* the ith step, node j will send chunk (j - i + 1) and receive chunk (j - i).
* For example, at the first iteration, the following transfers will occur:
diff --git a/tensorflow/contrib/opt/python/training/adamax_test.py b/tensorflow/contrib/opt/python/training/adamax_test.py
index 21bf3f5313..915e6504e1 100644
--- a/tensorflow/contrib/opt/python/training/adamax_test.py
+++ b/tensorflow/contrib/opt/python/training/adamax_test.py
@@ -224,8 +224,10 @@ class AdaMaxOptimizerTest(test.TestCase):
var1_np, m1, v1 = adamax_update_numpy(var1_np, grads1_np, t, m1, v1)
# Validate updated params
- self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
- self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
+ self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0),
+ rtol=1e-2)
+ self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1),
+ rtol=1e-2)
if use_resource:
self.assertEqual("var0_%d/AdaMax:0" % (i,),
opt.get_slot(var=var0, name="m").name)
diff --git a/tensorflow/contrib/opt/python/training/model_average_optimizer.py b/tensorflow/contrib/opt/python/training/model_average_optimizer.py
index a7c97a1da2..b6b10e500b 100644
--- a/tensorflow/contrib/opt/python/training/model_average_optimizer.py
+++ b/tensorflow/contrib/opt/python/training/model_average_optimizer.py
@@ -62,7 +62,7 @@ class ModelAverageCustomGetter(object):
"""
def __init__(self, worker_device):
- """Create a new `ElasticAverageCustomGetter`.
+ """Create a new `ModelAverageCustomGetter`.
Args:
worker_device: String. Name of the `worker` job.
diff --git a/tensorflow/contrib/periodic_resample/BUILD b/tensorflow/contrib/periodic_resample/BUILD
index 6ca7fe8b6e..aad1ca04c5 100644
--- a/tensorflow/contrib/periodic_resample/BUILD
+++ b/tensorflow/contrib/periodic_resample/BUILD
@@ -6,12 +6,13 @@ exports_files(["LICENSE"])
load(
"//tensorflow:tensorflow.bzl",
- "py_test",
+ "tf_cc_test",
"tf_gen_op_libs",
"tf_custom_op_library",
"tf_custom_op_py_library",
"tf_gen_op_wrapper_py",
)
+load("//tensorflow:tensorflow.bzl", "py_test")
cc_library(
name = "all_ops",
@@ -84,6 +85,23 @@ py_test(
":init_py",
"//tensorflow/contrib/util:util_py",
"//tensorflow/python:framework_test_lib",
+ "//tensorflow/python:gradient_checker",
+ ],
+)
+
+tf_cc_test(
+ name = "periodic_resample_op_cc_test",
+ size = "small",
+ srcs = [
+ "ops/array_ops_test.cc",
+ ],
+ deps = [
+ ":all_ops",
+ "//tensorflow/core:framework",
+ "//tensorflow/core:protos_all_proto",
+ "//tensorflow/core:test",
+ "//tensorflow/core:test_main",
+ "//tensorflow/core:testlib",
],
)
diff --git a/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.cc b/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.cc
index e18923c8aa..514689cf45 100644
--- a/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.cc
+++ b/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.cc
@@ -22,4 +22,9 @@ namespace tensorflow {
REGISTER_KERNEL_BUILDER(Name("PeriodicResample").Device(DEVICE_CPU),
PeriodicResampleOp);
+
+REGISTER_KERNEL_BUILDER(Name("PeriodicResampleOpGrad")
+ .Device(DEVICE_CPU),
+ PeriodicResampleOpGrad);
+
} // namespace tensorflow
diff --git a/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h b/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h
index 3ab588c458..42fba81a5c 100644
--- a/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h
+++ b/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h
@@ -25,92 +25,202 @@
#include "tensorflow/core/framework/shape_inference.h"
#include "tensorflow/core/framework/tensor_shape.h"
#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/util/work_sharder.h"
namespace {
-template <class IndexVecT, class IndexT>
-IndexT compute_input_index(
- IndexVecT* target_dimensions, const IndexT& output_index,
- const IndexVecT& original_dimensions, const int& adjustable_dimension,
- const std::vector<tensorflow::int64>& dimension_ceiling,
- const std::vector<tensorflow::int64>& cumulative_dimensions, IndexT* result,
- std::vector<IndexT>* output_indices, const int& rank) {
- *result = 0;
- output_indices->clear();
+// Computes input tensor index for given output index during forward
+// propagation through periodic_resample operation.
+class InputIndexer {
+ public:
+ InputIndexer(const std::vector<tensorflow::int64>& output_dimensions,
+ const tensorflow::TensorShape& input_shape,
+ int adjustable_dimension)
+ : output_dimensions_(output_dimensions),
+ adjustable_dimension_(adjustable_dimension),
+ rank_(input_shape.dims()),
+ linear_output_index_(0),
+ linear_input_index_(0),
+ adjustable_dimension_carriage_sum_(0) {
+ auto input_dimensions = TensorShapeToVector(input_shape);
+ // factors by which input_dimensions increases/decreases w.r.t.
+ // output_dimensions
+ dimension_ceiling_ =
+ ComputeDimensionCeiling(output_dimensions, input_dimensions);
+ cumulative_dimensions_ = ComputeCumulativeDimensions();
+
+ output_indices_.resize(output_dimensions_.size());
+ input_indices_.resize(output_dimensions_.size());
+
+ // Compute index_factors
+ index_factors_.resize(rank_);
+ tensorflow::int64 last_index_factor = 1;
+ for (auto r = rank_ - 1; r >= 0; --r) {
+ index_factors_[r] = last_index_factor;
+ last_index_factor *= input_dimensions[r];
+ }
+ }
+
+ tensorflow::int64 linear_input_index() const { return linear_input_index_; }
+
+ void MoveToOutputIndex(tensorflow::int64 output_index);
+ void IncrementOutputIndex();
+
+ private:
+ void RecomputeInputAdjustableDimensionIndex() {
+ tensorflow::int64 index = adjustable_dimension_carriage_sum_;
+ index *= output_dimensions_[adjustable_dimension_];
+ index += output_indices_[adjustable_dimension_];
+ input_indices_[adjustable_dimension_] = index;
+ }
+
+ std::vector<tensorflow::int64> TensorShapeToVector(
+ const tensorflow::TensorShape& tensor_shape);
+
+ std::vector<tensorflow::int64> ComputeDimensionCeiling(
+ const std::vector<tensorflow::int64>& output_dimensions,
+ const std::vector<tensorflow::int64>& input_dimensions);
+
+ std::vector<tensorflow::int64> ComputeCumulativeDimensions();
+
+ const std::vector<tensorflow::int64> output_dimensions_;
+ std::vector<tensorflow::int64> dimension_ceiling_;
+ std::vector<tensorflow::int64> index_factors_;
+ std::vector<tensorflow::int64> cumulative_dimensions_;
+ std::vector<tensorflow::int64> output_indices_;
+ std::vector<tensorflow::int64> input_indices_;
+
+ const int adjustable_dimension_;
+ const int rank_;
+ tensorflow::int64 linear_output_index_;
+ tensorflow::int64 linear_input_index_;
+ tensorflow::int64 adjustable_dimension_carriage_sum_;
+};
+
+void InputIndexer::MoveToOutputIndex(tensorflow::int64 output_index) {
+ linear_output_index_ = output_index;
+ linear_input_index_ = 0;
// un-rasterize the output index
auto last_reduced_i = output_index;
- for (auto r = rank - 1; r >= 0; --r) {
- (*output_indices)[r] = last_reduced_i % (*target_dimensions)[r];
+ for (auto r = rank_ - 1; r >= 0; --r) {
+ output_indices_[r] = last_reduced_i % output_dimensions_[r];
last_reduced_i =
- (last_reduced_i - (*output_indices)[r]) / (*target_dimensions)[r];
+ (last_reduced_i - output_indices_[r]) / output_dimensions_[r];
}
+ tensorflow::int64 carriage_sum = 0;
+ for (int qi = 0; qi < rank_; ++qi) {
+ if (qi == adjustable_dimension_) continue;
+ carriage_sum += cumulative_dimensions_[qi] *
+ (output_indices_[qi] % dimension_ceiling_[qi]);
+ }
+ adjustable_dimension_carriage_sum_ = carriage_sum;
+
// rasterize the input index
- IndexT last_index_factor = 1;
- for (auto r = rank - 1; r >= 0; --r) {
- IndexT index = 0;
- if (r != adjustable_dimension)
- index = (*output_indices)[r] / dimension_ceiling[r];
- else {
- for (int qi = 0; qi < rank; ++qi) {
- if (qi == adjustable_dimension) continue;
- index += cumulative_dimensions[qi] *
- ((*output_indices)[qi] % dimension_ceiling[qi]);
- }
- index *= (*target_dimensions)[adjustable_dimension];
- index += (*output_indices)[r];
+ for (auto r = rank_ - 1; r >= 0; --r) {
+ if (r != adjustable_dimension_) {
+ input_indices_[r] = output_indices_[r] / dimension_ceiling_[r];
+ } else {
+ RecomputeInputAdjustableDimensionIndex();
}
- *result += last_index_factor * index;
- last_index_factor *= original_dimensions[r];
}
+ for (auto r = rank_ - 1; r >= 0; --r) {
+ linear_input_index_ += index_factors_[r] * input_indices_[r];
+ }
+}
+
+void InputIndexer::IncrementOutputIndex() {
+ linear_output_index_++;
+ for (auto r = rank_ - 1; r >= 0; --r) {
+ auto old_carriage_sum_increment =
+ cumulative_dimensions_[r] *
+ (output_indices_[r] % dimension_ceiling_[r]);
+ output_indices_[r] = (output_indices_[r] + 1) % output_dimensions_[r];
+ if (r != adjustable_dimension_) {
+ auto new_input_index = output_indices_[r] / dimension_ceiling_[r];
+ linear_input_index_ +=
+ (new_input_index - input_indices_[r]) * index_factors_[r];
+
+ input_indices_[r] = new_input_index;
+
+ auto new_carriage_sum_increment =
+ cumulative_dimensions_[r] *
+ (output_indices_[r] % dimension_ceiling_[r]);
- return *result;
+ adjustable_dimension_carriage_sum_ = adjustable_dimension_carriage_sum_ -
+ old_carriage_sum_increment +
+ new_carriage_sum_increment;
+ }
+
+ if (output_indices_[r] != 0) {
+ // No more carries to higher indices.
+ break;
+ }
+ }
+ auto old_adjustable_dimension_input_index =
+ input_indices_[adjustable_dimension_];
+ RecomputeInputAdjustableDimensionIndex();
+ linear_input_index_ += (input_indices_[adjustable_dimension_] -
+ old_adjustable_dimension_input_index) *
+ index_factors_[adjustable_dimension_];
}
-template <class InputDataT,
- class IndexVecT> // both types are needed here b/c IndexVecT and
- // InputDataT are not related
- void
- fill_periodic_tensor(
- tensorflow::OpKernelContext* context,
- const IndexVecT& desired_shape,
- const tensorflow::Tensor& input_tensor) {
- // input is a strided array (last index is fastest, C-ordered)
- auto input = input_tensor.flat<InputDataT>();
- const int rank = input_tensor.dims();
- // original and target dimensions
- std::vector<tensorflow::int64> original_dimensions(rank),
- target_dimensions(rank);
- tensorflow::int64 total_size(input_tensor.NumElements()), new_sliced_size(1);
- // factors by which original_dimensions increases/decreases w.r.t.
- // target_dimensions
- std::vector<tensorflow::int64> dimension_ceiling(rank),
- cumulative_dimensions(rank);
- // index of adjustable dimension
- int adjustable_dimension;
- tensorflow::TensorShape output_shape;
+std::vector<tensorflow::int64> InputIndexer::TensorShapeToVector(
+ const tensorflow::TensorShape& tensor_shape) {
+ std::vector<tensorflow::int64> result(tensor_shape.dims());
+ int count = 0;
+ for (const auto dim_info : tensor_shape) {
+ result[count] = dim_info.size;
+ ++count;
+ }
+ return result;
+}
- // requires that the rank of the input tensor and length of the desired shape
- // are equal
- OP_REQUIRES(context, rank == desired_shape.size(),
- tensorflow::errors::InvalidArgument(
- "periodic_resample expects the rank of the input tensor, ",
- rank, ", to be the same as the length of the desired shape, ",
- desired_shape.size(), "."));
+std::vector<tensorflow::int64> InputIndexer::ComputeDimensionCeiling(
+ const std::vector<tensorflow::int64>& output_dimensions,
+ const std::vector<tensorflow::int64>& input_dimensions) {
+ std::vector<tensorflow::int64> dimension_ceiling(input_dimensions.size());
+ for (size_t i = 0; i < input_dimensions.size(); ++i) {
+ dimension_ceiling[i] = (output_dimensions[i] + input_dimensions[i] - 1) /
+ input_dimensions[i];
+ }
+ return dimension_ceiling;
+}
- bool found = false;
- const auto& input_tensor_shape = input_tensor.shape();
+std::vector<tensorflow::int64> InputIndexer::ComputeCumulativeDimensions() {
+ std::vector<tensorflow::int64> cumulative_dimensions(rank_);
+ int count = 0;
+ for (int i = 0; i < rank_; ++i) {
+ if (count == 0) {
+ cumulative_dimensions[count] = 1;
+ } else {
+ cumulative_dimensions[count] =
+ cumulative_dimensions[count - 1] * dimension_ceiling_[count - 1];
+ }
+ ++count;
+ }
+ return cumulative_dimensions;
+}
+template <typename IndexVecT>
+void process_desired_shape(tensorflow::OpKernelContext* context,
+ const tensorflow::TensorShape& input_tensor_shape,
+ const IndexVecT& desired_shape,
+ int* adjustable_dimension,
+ std::vector<tensorflow::int64>* target_dimensions,
+ tensorflow::int64* output_size) {
+ tensorflow::int64 new_sliced_size = 1;
+ bool found = false;
+ const int rank = input_tensor_shape.dims();
for (int i = 0; i < rank; ++i) {
- // if (desired_shape(i) < 1) {
if (desired_shape[i] < 1) {
// only one index can be adjustable
OP_REQUIRES(context, !found,
tensorflow::errors::InvalidArgument(
"periodic_resample expects only "
"one index to be marked as adjustable."));
- adjustable_dimension = i;
+ *adjustable_dimension = i;
found = true;
} else {
OP_REQUIRES(
@@ -122,9 +232,8 @@ template <class InputDataT,
i, " input tensor has size ", input_tensor_shape.dim_size(i),
", desired shape has size ", desired_shape[i], "."));
- // target_dimensions[i] = desired_shape(i);
- target_dimensions[i] = desired_shape[i];
- new_sliced_size *= target_dimensions[i];
+ (*target_dimensions)[i] = desired_shape[i];
+ new_sliced_size *= (*target_dimensions)[i];
}
}
// at least one index needs to be adjustable
@@ -132,26 +241,50 @@ template <class InputDataT,
tensorflow::errors::InvalidArgument(
"periodic_resample expects at least "
"one index to be marked as adjustable."));
+ (*target_dimensions)[*adjustable_dimension] =
+ input_tensor_shape.num_elements() / new_sliced_size;
- int count = 0;
- for (const auto dim_info : input_tensor.shape()) {
- original_dimensions[count] = dim_info.size;
- ++count;
- }
+ *output_size = new_sliced_size * (*target_dimensions)[*adjustable_dimension];
+}
- target_dimensions[adjustable_dimension] = total_size / new_sliced_size;
+// Heuristic number based on measurements on
+// Intel(R) Core(TM) i7-4930K CPU @ 3.40GHz
+const tensorflow::int64 costPerFillIndex = 35;
- count = 0;
- for (int i = 0; i < input_tensor.shape().dims(); ++i) {
- dimension_ceiling[count] = tensorflow::int64(std::ceil(
- float(target_dimensions[count]) / float(original_dimensions[count])));
- if (count == 0)
- cumulative_dimensions[count] = 1;
- else
- cumulative_dimensions[count] =
- cumulative_dimensions[count - 1] * dimension_ceiling[count - 1];
- ++count;
- }
+enum class Mode {
+ kForward,
+ kGradient
+};
+
+// Computes either periodic_resample operation output or gradients for it,
+// depending on |mode|.
+// |original_shape| is always shape of input to periodic_resample operation.
+// |source_tensor| is either source for periodic_resample (for forward mode)
+// or gradients tensor.
+// |desired_shape| is always shape, provided by user, to which forward
+// propagation attempts resample input tensor.
+template <class InputDataT, Mode mode>
+void
+do_periodic_resample_op(tensorflow::OpKernelContext* context,
+ const tensorflow::TensorShape& original_shape,
+ const tensorflow::PartialTensorShape& desired_shape,
+ const tensorflow::Tensor& source_tensor) {
+ const int rank = source_tensor.dims();
+
+ // requires that the rank of the input tensor and length of the desired shape
+ // are equal
+ OP_REQUIRES(context, rank == desired_shape.dims(),
+ tensorflow::errors::InvalidArgument(
+ "periodic_resample expects the rank of the input tensor, ",
+ rank, ", to be the same as the length of the desired shape, ",
+ desired_shape.dims(), "."));
+
+ std::vector<tensorflow::int64> target_dimensions(rank);
+ tensorflow::int64 new_size = 0;
+ // index of adjustable dimension
+ int adjustable_dimension = 0;
+ process_desired_shape(context, original_shape, desired_shape.dim_sizes(),
+ &adjustable_dimension, &target_dimensions, &new_size);
// ensure that the new dimension is greater than zero
OP_REQUIRES(context, target_dimensions[adjustable_dimension] > 0,
@@ -160,11 +293,14 @@ template <class InputDataT,
"adjustable dimension, ",
adjustable_dimension, ", isn't greater than zero, ",
target_dimensions[adjustable_dimension], "."));
- for (int i = 0; i < rank; ++i) {
- output_shape.AddDim(target_dimensions[i]);
+ tensorflow::TensorShape output_shape;
+ if (mode == Mode::kForward) {
+ for (int i = 0; i < rank; ++i) {
+ output_shape.AddDim(target_dimensions[i]);
+ }
+ } else {
+ output_shape = original_shape;
}
- const auto new_size =
- new_sliced_size * target_dimensions[adjustable_dimension];
// Create an output tensor and attach it to the current context
tensorflow::Tensor* output_tensor = nullptr;
@@ -172,47 +308,73 @@ template <class InputDataT,
context->allocate_output(0, output_shape, &output_tensor));
auto output = output_tensor->flat<InputDataT>();
- // memory is allocated for these variables outside the inner loop for
- // efficiency (although, I could create a separate class scope for
- // this purpose instead)
- tensorflow::int64 result = 0;
- std::vector<tensorflow::int64> output_indices(target_dimensions.size());
+ // input is a strided array (last index is fastest, C-ordered)
+ auto input = source_tensor.flat<InputDataT>();
// Fill output tensor with periodically resampled input tensor values
- for (tensorflow::int64 output_index = 0; output_index < new_size;
- ++output_index) {
- output(output_index) = input(compute_input_index(
- &target_dimensions, output_index, original_dimensions,
- adjustable_dimension, dimension_ceiling, cumulative_dimensions, &result,
- &output_indices, rank));
- }
+ InputIndexer input_indexer(target_dimensions, original_shape,
+ adjustable_dimension);
+
+ auto worker_threads = *(context->device()->tensorflow_cpu_worker_threads());
+ auto fill_output_tensor = [&input_indexer, &output, &input](
+ tensorflow::int64 start, tensorflow::int64 limit) {
+ InputIndexer local_indexer(input_indexer);
+ local_indexer.MoveToOutputIndex(start);
+ for (tensorflow::int64 output_index = start; output_index < limit;
+ ++output_index) {
+ if (mode == Mode::kForward) {
+ output(output_index) = input(local_indexer.linear_input_index());
+ } else {
+ output(local_indexer.linear_input_index()) = input(output_index);
+ }
+ local_indexer.IncrementOutputIndex();
+ }
+ };
+ ::tensorflow::Shard(worker_threads.num_threads, worker_threads.workers,
+ new_size, costPerFillIndex, fill_output_tensor);
}
+#define DATA_TYPE_SWITCH(data_type, context, CASE) \
+ switch (data_type) { \
+ CASE(float) \
+ CASE(double) \
+ CASE(tensorflow::int32) \
+ CASE(tensorflow::int64) \
+ default: \
+ context->CtxFailure(__FILE__, __LINE__, \
+ tensorflow::errors::InvalidArgument( \
+ "Unsuppored tensor elements type")); \
+ break; \
+ }
+
void create_output_tensor(
tensorflow::OpKernelContext* context,
const tensorflow::Tensor& input_tensor,
const tensorflow::DataType& input_tensor_type,
- const tensorflow::PartialTensorShape& desired_shape_tensor) {
- auto desired_shape = desired_shape_tensor.dim_sizes();
-
- // obligatory type switch
- switch (input_tensor_type) {
- case tensorflow::DataTypeToEnum<float>::value:
- fill_periodic_tensor<float>(context, desired_shape, input_tensor);
+ const tensorflow::PartialTensorShape& desired_shape) {
+#define CASE(type) \
+ case tensorflow::DataTypeToEnum<type>::value: \
+ do_periodic_resample_op<type, Mode::kForward>( \
+ context, input_tensor.shape(), desired_shape, input_tensor); \
break;
- case tensorflow::DataTypeToEnum<double>::value:
- fill_periodic_tensor<double>(context, desired_shape, input_tensor);
- break;
- case tensorflow::DataTypeToEnum<tensorflow::int32>::value:
- fill_periodic_tensor<tensorflow::int32>(context, desired_shape,
- input_tensor);
- break;
- case tensorflow::DataTypeToEnum<tensorflow::int64>::value:
- fill_periodic_tensor<tensorflow::int64>(context, desired_shape,
- input_tensor);
+
+ DATA_TYPE_SWITCH(input_tensor_type, context, CASE);
+#undef CASE
+}
+
+void create_grad_tensor(tensorflow::OpKernelContext* context,
+ const tensorflow::Tensor& grad_tensor,
+ const tensorflow::DataType& grad_tensor_type,
+ const tensorflow::TensorShape& original_shape,
+ const tensorflow::PartialTensorShape& desired_shape) {
+#define CASE(type) \
+ case tensorflow::DataTypeToEnum<type>::value: \
+ do_periodic_resample_op<type, Mode::kGradient>( \
+ context, original_shape, desired_shape, grad_tensor); \
break;
- default:;
- }
+
+ DATA_TYPE_SWITCH(grad_tensor_type, context, CASE);
+#undef CASE
}
} // namespace
@@ -238,4 +400,25 @@ class PeriodicResampleOp : public tensorflow::OpKernel {
tensorflow::PartialTensorShape desired_shape;
};
+class PeriodicResampleOpGrad : public tensorflow::OpKernel {
+ public:
+ explicit PeriodicResampleOpGrad(tensorflow::OpKernelConstruction* context)
+ : tensorflow::OpKernel(context) {
+ OP_REQUIRES_OK(context,
+ context->GetAttr("original_shape", &original_shape));
+ OP_REQUIRES_OK(context, context->GetAttr("desired_shape", &desired_shape));
+ }
+
+ void Compute(tensorflow::OpKernelContext* context) override {
+ const tensorflow::Tensor& grad_tensor = context->input(0);
+ const tensorflow::DataType grad_tensor_type = context->input_dtype(0);
+ create_grad_tensor(context, grad_tensor, grad_tensor_type, original_shape,
+ desired_shape);
+ }
+
+ private:
+ tensorflow::TensorShape original_shape;
+ tensorflow::PartialTensorShape desired_shape;
+};
+
#endif // TENSORFLOW_KERNELS_PERIODICRESAMPLE_OP_H_
diff --git a/tensorflow/contrib/periodic_resample/ops/array_ops.cc b/tensorflow/contrib/periodic_resample/ops/array_ops.cc
index 82bd796956..fd38cd09b4 100644
--- a/tensorflow/contrib/periodic_resample/ops/array_ops.cc
+++ b/tensorflow/contrib/periodic_resample/ops/array_ops.cc
@@ -26,7 +26,42 @@ REGISTER_OP("PeriodicResample")
.Input("values: T")
.Attr("shape: shape")
.Output("output: T")
- .SetShapeFn(shape_inference::ExplicitShape)
+ .SetShapeFn([](shape_inference::InferenceContext* c) {
+ tensorflow::PartialTensorShape desired_shape;
+ TF_RETURN_IF_ERROR(c->GetAttr("shape", &desired_shape));
+ shape_inference::ShapeHandle input_tensor_shape = c->input(0);
+ shape_inference::DimensionHandle num_input_elements =
+ c->NumElements(input_tensor_shape);
+ shape_inference::ShapeHandle result_shape_handle;
+ if (!shape_inference::InferenceContext::ValueKnown(num_input_elements)) {
+ TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape(
+ desired_shape, &result_shape_handle));
+ } else {
+ const int rank = c->Rank(input_tensor_shape);
+ std::vector<tensorflow::int64> target_dimensions(rank);
+ tensorflow::int64 new_sliced_size = 1;
+ int adjustable_dimension = 0;
+ for (int i = 0; i < rank; ++i) {
+ if (desired_shape.dim_size(i) < 1) {
+ adjustable_dimension = i;
+ } else {
+ target_dimensions[i] = desired_shape.dim_size(i);
+ new_sliced_size *= target_dimensions[i];
+ }
+ }
+ target_dimensions[adjustable_dimension] =
+ shape_inference::InferenceContext::Value(
+ num_input_elements) / new_sliced_size;
+ tensorflow::TensorShape result_shape;
+ for (int i = 0; i < rank; ++i) {
+ result_shape.AddDim(target_dimensions[i]);
+ }
+ TF_RETURN_IF_ERROR(c->MakeShapeFromTensorShape(
+ result_shape, &result_shape_handle));
+ }
+ c->set_output(0, result_shape_handle);
+ return Status::OK();
+ })
.Doc(R"doc(
Periodically resample elements of a tensor to conform to `shape`.
@@ -101,4 +136,20 @@ output: Periodically resampled tensor that has dimensions specified as in
)doc");
+
+REGISTER_OP("PeriodicResampleOpGrad")
+ .Attr("T: numbertype")
+ .Input("grad: T")
+ .Attr("original_shape: shape")
+ .Attr("desired_shape: shape")
+ .Output("grad_values: T")
+ .SetShapeFn([](shape_inference::InferenceContext* c) {
+ tensorflow::TensorShape original_shape;
+ TF_RETURN_IF_ERROR(c->GetAttr("original_shape", &original_shape));
+ shape_inference::ShapeHandle s;
+ TF_RETURN_IF_ERROR(c->MakeShapeFromTensorShape(original_shape, &s));
+ c->set_output(0, s);
+ return Status::OK();
+});
+
} // namespace tensorflow
diff --git a/tensorflow/contrib/periodic_resample/ops/array_ops_test.cc b/tensorflow/contrib/periodic_resample/ops/array_ops_test.cc
new file mode 100644
index 0000000000..43b7c1799f
--- /dev/null
+++ b/tensorflow/contrib/periodic_resample/ops/array_ops_test.cc
@@ -0,0 +1,41 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/node_def_builder.h"
+#include "tensorflow/core/framework/shape_inference_testutil.h"
+#include "tensorflow/core/framework/tensor_shape.pb.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+
+TEST(ArrayOpsTest, PeriodicResample_ShapeFn) {
+ ShapeInferenceTestOp op("PeriodicResample");
+ // Case 1: output shape can be fully inferreed.
+ PartialTensorShape shape({4, 4, -1});
+ TensorShapeProto shape_proto;
+ shape.AsProto(&shape_proto);
+
+ TF_ASSERT_OK(NodeDefBuilder("test", "PeriodicResample")
+ .Input({"values", 0, DT_INT32})
+ .Attr("shape", shape_proto)
+ .Finalize(&op.node_def));
+ INFER_OK(op, "[2,2,4]", "[4,4,1]");
+ // Case 2: output shape can not be inferred - report desired shape.
+ INFER_OK(op, "[2,2,?]", "[4,4,?]");
+}
+
+} // end namespace tensorflow
diff --git a/tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py b/tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py
index a25de55e18..31a6fe1d94 100644
--- a/tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py
+++ b/tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py
@@ -21,8 +21,11 @@ from __future__ import print_function
import numpy
from tensorflow.contrib.periodic_resample import periodic_resample
+from tensorflow.python.framework import dtypes
from tensorflow.python.framework import errors_impl
from tensorflow.python.framework import test_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gradient_checker
from tensorflow.python.ops import variables
from tensorflow.python.platform import googletest
@@ -93,7 +96,6 @@ class PeriodicResampleTest(test_util.TensorFlowTestCase):
def testPeriodicResampleErrors(self):
input_tensor = numpy.zeros(shape=[1, 2, 2, 4])
with self.test_session():
- variables.global_variables_initializer().run()
with self.assertRaisesWithPredicateMatch(
errors_impl.InvalidArgumentError,
'Dimension 3 input tensor has size 4, desired shape has size 1'):
@@ -103,6 +105,29 @@ class PeriodicResampleTest(test_util.TensorFlowTestCase):
'4, to be the same as the length of the desired shape, 3'):
periodic_resample(input_tensor, [None, 4, 4]).eval()
+ def testPeriodicResampleGradient(self):
+ desired_shape = numpy.array([4, 4, None])
+ result_shape = (4, 4, 1)
+ input_shape = (2, 2, 4)
+ with self.test_session() as sess:
+ x = array_ops.placeholder(dtypes.float32, shape=input_shape)
+ output = periodic_resample(x, desired_shape)
+ error = gradient_checker.compute_gradient_error(
+ x, input_shape, output, result_shape)
+ self.assertLess(error, 1e-4)
+
+ def testPeriodicResampleShapeInference(self):
+ with self.test_session() as sess:
+ # Case 1: output shape can be fully inferreed.
+ x = array_ops.placeholder(dtypes.float32, shape=(2, 2, 4))
+ output = periodic_resample(x, [4, 4, None])
+ self.assertEqual(output.shape, [4, 4, 1])
+ # Case 2: output shape can not be inferred - report desired shape.
+ x = array_ops.placeholder(dtypes.float32, shape=(2, 2, None))
+ output = periodic_resample(x, [4, 4, None])
+ self.assertTrue(output.shape.is_compatible_with([4, 4, None]))
+ self.assertEqual(output.shape[2].value, None)
+
if __name__ == '__main__':
googletest.main()
diff --git a/tensorflow/contrib/periodic_resample/python/ops/periodic_resample_op.py b/tensorflow/contrib/periodic_resample/python/ops/periodic_resample_op.py
index 348623d8f8..470e300ccb 100644
--- a/tensorflow/contrib/periodic_resample/python/ops/periodic_resample_op.py
+++ b/tensorflow/contrib/periodic_resample/python/ops/periodic_resample_op.py
@@ -21,11 +21,17 @@ from __future__ import print_function
# pylint: disable=unused-import
from tensorflow.contrib.periodic_resample.python.ops import gen_periodic_resample_op
-from tensorflow.contrib.periodic_resample.python.ops.gen_periodic_resample_op import periodic_resample
+from tensorflow.contrib.periodic_resample.python.ops.gen_periodic_resample_op import periodic_resample, periodic_resample_op_grad
from tensorflow.contrib.util import loader
+from tensorflow.python.framework import ops
from tensorflow.python.platform import resource_loader
# pylint: enable=unused-import
_periodic_resample_op = loader.load_op_library(
resource_loader.get_path_to_datafile('_periodic_resample_op.so'))
+
+@ops.RegisterGradient("PeriodicResample")
+def _periodic_resample_grad_cc(op, grad):
+ return periodic_resample_op_grad(
+ grad, op.inputs[0].shape, op.get_attr('shape'))
diff --git a/tensorflow/contrib/predictor/contrib_estimator_predictor.py b/tensorflow/contrib/predictor/contrib_estimator_predictor.py
index b7a98c68e2..af3b2ad1b5 100644
--- a/tensorflow/contrib/predictor/contrib_estimator_predictor.py
+++ b/tensorflow/contrib/predictor/contrib_estimator_predictor.py
@@ -34,7 +34,8 @@ class ContribEstimatorPredictor(predictor.Predictor):
prediction_input_fn,
input_alternative_key=None,
output_alternative_key=None,
- graph=None):
+ graph=None,
+ config=None):
"""Initialize a `ContribEstimatorPredictor`.
Args:
@@ -48,6 +49,7 @@ class ContribEstimatorPredictor(predictor.Predictor):
multi-headed models.
graph: Optional. The Tensorflow `graph` in which prediction should be
done.
+ config: `ConfigProto` proto used to configure the session.
"""
self._graph = graph or ops.Graph()
with self._graph.as_default():
@@ -58,6 +60,7 @@ class ContribEstimatorPredictor(predictor.Predictor):
checkpoint_path = saver.latest_checkpoint(estimator.model_dir)
self._session = monitored_session.MonitoredSession(
session_creator=monitored_session.ChiefSessionCreator(
+ config=config,
checkpoint_filename_with_path=checkpoint_path))
input_alternative_key = (
diff --git a/tensorflow/contrib/predictor/core_estimator_predictor.py b/tensorflow/contrib/predictor/core_estimator_predictor.py
index d78d94c269..a725072e72 100644
--- a/tensorflow/contrib/predictor/core_estimator_predictor.py
+++ b/tensorflow/contrib/predictor/core_estimator_predictor.py
@@ -51,7 +51,8 @@ class CoreEstimatorPredictor(predictor.Predictor):
estimator,
serving_input_receiver_fn,
output_key=None,
- graph=None):
+ graph=None,
+ config=None):
"""Initialize a `CoreEstimatorPredictor`.
Args:
@@ -62,6 +63,7 @@ class CoreEstimatorPredictor(predictor.Predictor):
`None`, then `DEFAULT_SERVING_SIGNATURE_DEF_KEY` is used.
graph: Optional. The Tensorflow `graph` in which prediction should be
done.
+ config: `ConfigProto` proto used to configure the session.
"""
self._graph = graph or ops.Graph()
with self._graph.as_default():
@@ -71,6 +73,7 @@ class CoreEstimatorPredictor(predictor.Predictor):
checkpoint_dir = estimator.model_dir
self._session = monitored_session.MonitoredSession(
session_creator=monitored_session.ChiefSessionCreator(
+ config=config,
checkpoint_dir=checkpoint_dir))
feed_tensor_info = signature_def.inputs
diff --git a/tensorflow/contrib/predictor/predictor_factories.py b/tensorflow/contrib/predictor/predictor_factories.py
index 6e77e934fe..f275bc15ad 100644
--- a/tensorflow/contrib/predictor/predictor_factories.py
+++ b/tensorflow/contrib/predictor/predictor_factories.py
@@ -30,7 +30,8 @@ def from_contrib_estimator(estimator,
prediction_input_fn,
input_alternative_key=None,
output_alternative_key=None,
- graph=None):
+ graph=None,
+ config=None):
"""Constructs a `Predictor` from a `tf.contrib.learn.Estimator`.
Args:
@@ -44,6 +45,7 @@ def from_contrib_estimator(estimator,
multi-headed models.
graph: Optional. The Tensorflow `graph` in which prediction should be
done.
+ config: `ConfigProto` proto used to configure the session.
Returns:
An initialized `Predictor`.
@@ -62,13 +64,15 @@ def from_contrib_estimator(estimator,
prediction_input_fn,
input_alternative_key=input_alternative_key,
output_alternative_key=output_alternative_key,
- graph=graph)
+ graph=graph,
+ config=config)
def from_estimator(estimator,
serving_input_receiver_fn,
output_key=None,
- graph=None):
+ graph=None,
+ config=None):
"""Constructs a `Predictor` from a `tf.python.estimator.Estimator`.
Args:
@@ -79,6 +83,7 @@ def from_estimator(estimator,
`None`, then `DEFAULT_SERVING_SIGNATURE_DEF_KEY` is used.
graph: Optional. The Tensorflow `graph` in which prediction should be
done.
+ config: `ConfigProto` proto used to configure the session.
Returns:
An initialized `Predictor`.
@@ -93,14 +98,19 @@ def from_estimator(estimator,
'tf.contrib.learn.Estimator. You likely want to call '
'from_contrib_estimator.')
return core_estimator_predictor.CoreEstimatorPredictor(
- estimator, serving_input_receiver_fn, output_key=output_key, graph=graph)
+ estimator,
+ serving_input_receiver_fn,
+ output_key=output_key,
+ graph=graph,
+ config=config)
def from_saved_model(export_dir,
signature_def_key=None,
signature_def=None,
tags=None,
- graph=None):
+ graph=None,
+ config=None):
"""Constructs a `Predictor` from a `SavedModel` on disk.
Args:
@@ -115,6 +125,7 @@ def from_saved_model(export_dir,
`SignatureDef`. Defaults to `DEFAULT_TAGS`.
graph: Optional. The Tensorflow `graph` in which prediction should be
done.
+ config: `ConfigProto` proto used to configure the session.
Returns:
An initialized `Predictor`.
@@ -128,4 +139,5 @@ def from_saved_model(export_dir,
signature_def_key=signature_def_key,
signature_def=signature_def,
tags=tags,
- graph=graph)
+ graph=graph,
+ config=config)
diff --git a/tensorflow/contrib/predictor/predictor_factories_test.py b/tensorflow/contrib/predictor/predictor_factories_test.py
index 578d9424b2..a2ef1dc3af 100644
--- a/tensorflow/contrib/predictor/predictor_factories_test.py
+++ b/tensorflow/contrib/predictor/predictor_factories_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
from tensorflow.contrib.predictor import predictor_factories
from tensorflow.contrib.predictor import testing_common
+from tensorflow.core.protobuf import config_pb2
from tensorflow.python.platform import test
MODEL_DIR_NAME = 'contrib/predictor/test_export_dir'
@@ -41,6 +42,11 @@ class PredictorFactoriesTest(test.TestCase):
"""Test loading from_saved_model with tags."""
predictor_factories.from_saved_model(self._export_dir, tags='serve')
+ def testFromSavedModelWithSessionConfig(self):
+ """Test loading from_saved_model with session config."""
+ predictor_factories.from_saved_model(
+ self._export_dir, config=config_pb2.ConfigProto())
+
def testFromSavedModelWithBadTags(self):
"""Test that loading fails for bad tags."""
bad_tags_regex = ('.*? could not be found in SavedModel')
@@ -53,6 +59,13 @@ class PredictorFactoriesTest(test.TestCase):
predictor_factories.from_contrib_estimator(
estimator, input_fn, output_alternative_key='sum')
+ def testFromContribEstimatorWithSessionConfig(self):
+ estimator = testing_common.get_arithmetic_estimator(core=False)
+ input_fn = testing_common.get_arithmetic_input_fn(core=False)
+ predictor_factories.from_contrib_estimator(
+ estimator, input_fn, output_alternative_key='sum',
+ config=config_pb2.ConfigProto())
+
def testFromContribEstimatorWithCoreEstimatorRaises(self):
estimator = testing_common.get_arithmetic_estimator(core=True)
input_fn = testing_common.get_arithmetic_input_fn(core=True)
@@ -64,6 +77,12 @@ class PredictorFactoriesTest(test.TestCase):
input_fn = testing_common.get_arithmetic_input_fn(core=True)
predictor_factories.from_estimator(estimator, input_fn)
+ def testFromCoreEstimatorWithSessionConfig(self):
+ estimator = testing_common.get_arithmetic_estimator(core=True)
+ input_fn = testing_common.get_arithmetic_input_fn(core=True)
+ predictor_factories.from_estimator(
+ estimator, input_fn, config=config_pb2.ConfigProto())
+
def testFromCoreEstimatorWithContribEstimatorRaises(self):
estimator = testing_common.get_arithmetic_estimator(core=False)
input_fn = testing_common.get_arithmetic_input_fn(core=False)
diff --git a/tensorflow/contrib/predictor/saved_model_predictor.py b/tensorflow/contrib/predictor/saved_model_predictor.py
index 0dbca0f813..95da6d04ed 100644
--- a/tensorflow/contrib/predictor/saved_model_predictor.py
+++ b/tensorflow/contrib/predictor/saved_model_predictor.py
@@ -121,7 +121,8 @@ class SavedModelPredictor(predictor.Predictor):
input_names=None,
output_names=None,
tags=None,
- graph=None):
+ graph=None,
+ config=None):
"""Initialize a `CoreEstimatorPredictor`.
Args:
@@ -142,6 +143,7 @@ class SavedModelPredictor(predictor.Predictor):
the correct `SignatureDef`. Defaults to `DEFAULT_TAGS`.
graph: Optional. The Tensorflow `graph` in which prediction should be
done.
+ config: `ConfigProto` proto used to configure the session.
Raises:
ValueError: If more than one of signature_def_key OR signature_def OR
(input_names AND output_names) is specified.
@@ -152,7 +154,7 @@ class SavedModelPredictor(predictor.Predictor):
self._graph = graph or ops.Graph()
with self._graph.as_default():
- self._session = session.Session()
+ self._session = session.Session(config=config)
loader.load(self._session, tags.split(','), export_dir)
if input_names is None:
diff --git a/tensorflow/contrib/quantize/README.md b/tensorflow/contrib/quantize/README.md
index c83623ec94..27a933c0f9 100644
--- a/tensorflow/contrib/quantize/README.md
+++ b/tensorflow/contrib/quantize/README.md
@@ -6,7 +6,7 @@ inference. The details of the transformation implemented in this package is
described here [1].
This is done using the
-[fake quantization op](https://www.tensorflow.org/versions/r0.12/api_docs/python/array_ops/fake_quantization).
+[fake quantization op](https://www.tensorflow.org/api_guides/python/array_ops#Fake_quantization).
Literature has shown that fixed point networks provide comparable performance to
floating point networks [2]. This is achieved by modeling the quantization
diff --git a/tensorflow/contrib/slim/python/slim/evaluation_test.py b/tensorflow/contrib/slim/python/slim/evaluation_test.py
index 94fc12ca81..3d0308aaf3 100644
--- a/tensorflow/contrib/slim/python/slim/evaluation_test.py
+++ b/tensorflow/contrib/slim/python/slim/evaluation_test.py
@@ -26,7 +26,6 @@ import time
import numpy as np
from tensorflow.contrib.framework.python.ops import variables as variables_lib
-from tensorflow.contrib.metrics.python.ops import metric_ops
from tensorflow.contrib.slim.python.slim import evaluation
from tensorflow.contrib.training.python.training import evaluation as evaluation_lib
from tensorflow.core.protobuf import saver_pb2
@@ -37,6 +36,7 @@ from tensorflow.python.framework import dtypes
from tensorflow.python.framework import errors
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import metrics
from tensorflow.python.ops import variables
from tensorflow.python.platform import flags
from tensorflow.python.platform import gfile
@@ -89,8 +89,8 @@ class EvaluationTest(test.TestCase):
self._predictions, self._scale = TestModel(self._inputs)
def testFinalOpsOnEvaluationLoop(self):
- value_op, update_op = metric_ops.streaming_accuracy(self._predictions,
- self._labels)
+ value_op, update_op = metrics.accuracy(
+ labels=self._labels, predictions=self._predictions)
init_op = control_flow_ops.group(variables.global_variables_initializer(),
variables.local_variables_initializer())
# Create checkpoint and log directories:
@@ -136,9 +136,10 @@ class EvaluationTest(test.TestCase):
self.assertTrue(obj.hook_was_run)
def _create_names_to_metrics(self, predictions, labels):
- accuracy0, update_op0 = metric_ops.streaming_accuracy(predictions, labels)
- accuracy1, update_op1 = metric_ops.streaming_accuracy(predictions + 1,
- labels)
+ accuracy0, update_op0 = metrics.accuracy(
+ labels=labels, predictions=predictions)
+ accuracy1, update_op1 = metrics.accuracy(
+ labels=labels, predictions=predictions + 1)
names_to_values = {'Accuracy': accuracy0, 'Another_accuracy': accuracy1}
names_to_updates = {'Accuracy': update_op0, 'Another_accuracy': update_op1}
@@ -198,8 +199,8 @@ class EvaluationTest(test.TestCase):
predictions_limited = input.limit_epochs(self._predictions, num_epochs=1)
labels_limited = input.limit_epochs(self._labels, num_epochs=1)
- value_op, update_op = metric_ops.streaming_accuracy(
- predictions_limited, labels_limited)
+ value_op, update_op = metrics.accuracy(
+ labels=labels_limited, predictions=predictions_limited)
init_op = control_flow_ops.group(variables.global_variables_initializer(),
variables.local_variables_initializer())
@@ -260,8 +261,8 @@ class SingleEvaluationTest(test.TestCase):
self._prepareCheckpoint(checkpoint_path)
# Next, determine the metric to evaluate:
- value_op, update_op = metric_ops.streaming_accuracy(self._predictions,
- self._labels)
+ value_op, update_op = metrics.accuracy(
+ labels=self._labels, predictions=self._predictions)
# Run the evaluation and verify the results:
accuracy_value = evaluation.evaluate_once(
@@ -276,8 +277,8 @@ class SingleEvaluationTest(test.TestCase):
self._prepareCheckpoint(checkpoint_path)
# Next, determine the metric to evaluate:
- value_op, update_op = metric_ops.streaming_accuracy(self._predictions,
- self._labels)
+ value_op, update_op = metrics.accuracy(
+ labels=self._labels, predictions=self._predictions)
dumping_root = os.path.join(self.get_temp_dir(), 'tfdbg_dump_dir')
dumping_hook = hooks.DumpingDebugHook(dumping_root, log_usage=False)
diff --git a/tensorflow/contrib/summary/summary.py b/tensorflow/contrib/summary/summary.py
index 99ced53e11..d22b80ac88 100644
--- a/tensorflow/contrib/summary/summary.py
+++ b/tensorflow/contrib/summary/summary.py
@@ -21,6 +21,7 @@ from @{tf.summary.merge_all} to @{tf.summary.FileWriter}.
To use with eager execution enabled, write your code as follows:
+```python
global_step = tf.train.get_or_create_global_step()
summary_writer = tf.contrib.summary.create_file_writer(
train_dir, flush_millis=10000)
@@ -30,9 +31,11 @@ with summary_writer.as_default(), tf.contrib.summary.always_record_summaries():
tf.contrib.summary.scalar("loss", my_loss)
# In this case every call to tf.contrib.summary.scalar will generate a record
# ...
+```
To use it with graph execution, write your code as follows:
+```python
global_step = tf.train.get_or_create_global_step()
summary_writer = tf.contrib.summary.create_file_writer(
train_dir, flush_millis=10000)
@@ -53,7 +56,7 @@ with tf.Session(...) as sess:
while not_done_training:
sess.run([train_op, tf.contrib.summary.all_summary_ops()])
# ...
-
+```
"""
from __future__ import absolute_import
diff --git a/tensorflow/contrib/tensor_forest/client/eval_metrics.py b/tensorflow/contrib/tensor_forest/client/eval_metrics.py
index e893e1d1c8..d8236a0a6f 100644
--- a/tensorflow/contrib/tensor_forest/client/eval_metrics.py
+++ b/tensorflow/contrib/tensor_forest/client/eval_metrics.py
@@ -21,10 +21,10 @@ import numpy as np
from tensorflow.contrib import losses
from tensorflow.contrib.learn.python.learn.estimators import prediction_key
-from tensorflow.contrib.metrics.python.ops import metric_ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import metrics
from tensorflow.python.ops import nn
INFERENCE_PROB_NAME = prediction_key.PredictionKey.PROBABILITIES
@@ -38,12 +38,13 @@ def _top_k_generator(k):
targets = math_ops.to_int32(targets)
if targets.get_shape().ndims > 1:
targets = array_ops.squeeze(targets, axis=[1])
- return metric_ops.streaming_mean(nn.in_top_k(probabilities, targets, k))
+ return metrics.mean(nn.in_top_k(probabilities, targets, k))
return _top_k
def _accuracy(predictions, targets, weights=None):
- return metric_ops.streaming_accuracy(predictions, targets, weights=weights)
+ return metrics.accuracy(
+ labels=targets, predictions=predictions, weights=weights)
def _r2(probabilities, targets, weights=None):
@@ -53,7 +54,7 @@ def _r2(probabilities, targets, weights=None):
squares_residuals = math_ops.reduce_sum(
math_ops.square(targets - probabilities), 0)
score = 1 - math_ops.reduce_sum(squares_residuals / squares_total)
- return metric_ops.streaming_mean(score, weights=weights)
+ return metrics.mean(score, weights=weights)
def _squeeze_and_onehot(targets, depth):
@@ -62,7 +63,7 @@ def _squeeze_and_onehot(targets, depth):
def _sigmoid_entropy(probabilities, targets, weights=None):
- return metric_ops.streaming_mean(
+ return metrics.mean(
losses.sigmoid_cross_entropy(probabilities,
_squeeze_and_onehot(
targets,
@@ -71,7 +72,7 @@ def _sigmoid_entropy(probabilities, targets, weights=None):
def _softmax_entropy(probabilities, targets, weights=None):
- return metric_ops.streaming_mean(
+ return metrics.mean(
losses.sparse_softmax_cross_entropy(probabilities,
math_ops.to_int32(targets)),
weights=weights)
@@ -82,7 +83,7 @@ def _predictions(predictions, unused_targets, **unused_kwargs):
def _class_log_loss(probabilities, targets, weights=None):
- return metric_ops.streaming_mean(
+ return metrics.mean(
losses.log_loss(probabilities,
_squeeze_and_onehot(targets,
array_ops.shape(probabilities)[1])),
@@ -90,34 +91,36 @@ def _class_log_loss(probabilities, targets, weights=None):
def _precision(predictions, targets, weights=None):
- return metric_ops.streaming_precision(predictions, targets, weights=weights)
+ return metrics.precision(
+ labels=targets, predictions=predictions, weights=weights)
def _precision_at_thresholds(predictions, targets, weights=None):
- return metric_ops.streaming_precision_at_thresholds(
- array_ops.slice(predictions, [0, 1], [-1, 1]),
- targets,
- np.arange(
- 0, 1, 0.01, dtype=np.float32),
+ return metrics.precision_at_thresholds(
+ labels=targets,
+ predictions=array_ops.slice(predictions, [0, 1], [-1, 1]),
+ thresholds=np.arange(0, 1, 0.01, dtype=np.float32),
weights=weights)
def _recall(predictions, targets, weights=None):
- return metric_ops.streaming_recall(predictions, targets, weights=weights)
+ return metrics.recall(
+ labels=targets, predictions=predictions, weights=weights)
def _recall_at_thresholds(predictions, targets, weights=None):
- return metric_ops.streaming_recall_at_thresholds(
- array_ops.slice(predictions, [0, 1], [-1, 1]),
- targets,
- np.arange(
- 0, 1, 0.01, dtype=np.float32),
+ return metrics.recall_at_thresholds(
+ labels=targets,
+ predictions=array_ops.slice(predictions, [0, 1], [-1, 1]),
+ thresholds=np.arange(0, 1, 0.01, dtype=np.float32),
weights=weights)
def _auc(probs, targets, weights=None):
- return metric_ops.streaming_auc(array_ops.slice(probs, [0, 1], [-1, 1]),
- targets, weights=weights)
+ return metrics.auc(
+ labels=targets,
+ predictions=array_ops.slice(probs, [0, 1], [-1, 1]),
+ weights=weights)
_EVAL_METRICS = {
diff --git a/tensorflow/contrib/tensor_forest/python/tensor_forest.py b/tensorflow/contrib/tensor_forest/python/tensor_forest.py
index 7a35a70bbe..6f62cd11a9 100644
--- a/tensorflow/contrib/tensor_forest/python/tensor_forest.py
+++ b/tensorflow/contrib/tensor_forest/python/tensor_forest.py
@@ -295,7 +295,7 @@ def get_epoch_variable():
# A simple container to hold the training variables for a single tree.
-class TreeTrainingVariables(object):
+class TreeVariables(object):
"""Stores tf.Variables for training a single random tree.
Uses tf.get_variable to get tree-specific names so that this can be used
@@ -303,7 +303,7 @@ class TreeTrainingVariables(object):
then relies on restoring that model to evaluate).
"""
- def __init__(self, params, tree_num, training):
+ def __init__(self, params, tree_num, training, tree_config='', tree_stat=''):
if (not hasattr(params, 'params_proto') or
not isinstance(params.params_proto,
_params_proto.TensorForestParams)):
@@ -315,27 +315,28 @@ class TreeTrainingVariables(object):
# TODO(gilberth): Manually shard this to be able to fit it on
# multiple machines.
self.stats = stats_ops.fertile_stats_variable(
- params, '', self.get_tree_name('stats', tree_num))
+ params, tree_stat, self.get_tree_name('stats', tree_num))
self.tree = model_ops.tree_variable(
- params, '', self.stats, self.get_tree_name('tree', tree_num))
+ params, tree_config, self.stats, self.get_tree_name('tree', tree_num))
def get_tree_name(self, name, num):
return '{0}-{1}'.format(name, num)
-class ForestTrainingVariables(object):
+class ForestVariables(object):
"""A container for a forests training data, consisting of multiple trees.
- Instantiates a TreeTrainingVariables object for each tree. We override the
+ Instantiates a TreeVariables object for each tree. We override the
__getitem__ and __setitem__ function so that usage looks like this:
- forest_variables = ForestTrainingVariables(params)
+ forest_variables = ForestVariables(params)
... forest_variables.tree ...
"""
def __init__(self, params, device_assigner, training=True,
- tree_variables_class=TreeTrainingVariables):
+ tree_variables_class=TreeVariables,
+ tree_configs=None, tree_stats=None):
self.variables = []
# Set up some scalar variables to run through the device assigner, then
# we can use those to colocate everything related to a tree.
@@ -347,7 +348,13 @@ class ForestTrainingVariables(object):
for i in range(params.num_trees):
with ops.device(self.device_dummies[i].device):
- self.variables.append(tree_variables_class(params, i, training))
+ kwargs = {}
+ if tree_configs is not None:
+ kwargs.update(dict(tree_config=tree_configs[i]))
+ if tree_stats is not None:
+ kwargs.update(dict(tree_stat=tree_stats[i]))
+ self.variables.append(tree_variables_class(
+ params, i, training, **kwargs))
def __setitem__(self, t, val):
self.variables[t] = val
@@ -361,9 +368,11 @@ class RandomForestGraphs(object):
def __init__(self,
params,
+ tree_configs=None,
+ tree_stats=None,
device_assigner=None,
variables=None,
- tree_variables_class=TreeTrainingVariables,
+ tree_variables_class=TreeVariables,
tree_graphs=None,
training=True):
self.params = params
@@ -371,9 +380,10 @@ class RandomForestGraphs(object):
device_assigner or framework_variables.VariableDeviceChooser())
logging.info('Constructing forest with params = ')
logging.info(self.params.__dict__)
- self.variables = variables or ForestTrainingVariables(
+ self.variables = variables or ForestVariables(
self.params, device_assigner=self.device_assigner, training=training,
- tree_variables_class=tree_variables_class)
+ tree_variables_class=tree_variables_class,
+ tree_configs=tree_configs, tree_stats=tree_stats)
tree_graph_class = tree_graphs or RandomTreeGraphs
self.trees = [
tree_graph_class(self.variables[i], self.params, i)
diff --git a/tensorflow/contrib/tensor_forest/python/tensor_forest_test.py b/tensorflow/contrib/tensor_forest/python/tensor_forest_test.py
index bbe627b157..1c9c81827e 100644
--- a/tensorflow/contrib/tensor_forest/python/tensor_forest_test.py
+++ b/tensorflow/contrib/tensor_forest/python/tensor_forest_test.py
@@ -18,10 +18,14 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
+from google.protobuf.json_format import ParseDict
+from tensorflow.contrib.decision_trees.proto import generic_tree_model_pb2 as _tree_proto
from tensorflow.contrib.tensor_forest.python import tensor_forest
from tensorflow.python.framework import ops
from tensorflow.python.framework import sparse_tensor
from tensorflow.python.framework import test_util
+from tensorflow.python.ops import resources
+from tensorflow.python.ops import variables
from tensorflow.python.platform import googletest
@@ -110,6 +114,47 @@ class TensorForestTest(test_util.TensorFlowTestCase):
self.assertTrue(isinstance(paths, ops.Tensor))
self.assertTrue(isinstance(var, ops.Tensor))
+ def testInfrenceFromRestoredModel(self):
+ input_data = [[-1., 0.], [-1., 2.], # node 1
+ [1., 0.], [1., -2.]] # node 2
+ expected_prediction = [[0.0, 1.0], [0.0, 1.0],
+ [0.0, 1.0], [0.0, 1.0]]
+ hparams = tensor_forest.ForestHParams(
+ num_classes=2,
+ num_features=2,
+ num_trees=1,
+ max_nodes=1000,
+ split_after_samples=25).fill()
+ tree_weight = {'decisionTree':
+ {'nodes':
+ [{'binaryNode':
+ {'rightChildId': 2,
+ 'leftChildId': 1,
+ 'inequalityLeftChildTest':
+ {'featureId': {'id': '0'},
+ 'threshold': {'floatValue': 0}}}},
+ {'leaf': {'vector':
+ {'value': [{'floatValue': 0.0},
+ {'floatValue': 1.0}]}},
+ 'nodeId': 1},
+ {'leaf': {'vector':
+ {'value': [{'floatValue': 0.0},
+ {'floatValue': 1.0}]}},
+ 'nodeId': 2}]}}
+ restored_tree_param = ParseDict(tree_weight,
+ _tree_proto.Model()).SerializeToString()
+ graph_builder = tensor_forest.RandomForestGraphs(hparams,
+ [restored_tree_param])
+ probs, paths, var = graph_builder.inference_graph(input_data)
+ self.assertTrue(isinstance(probs, ops.Tensor))
+ self.assertTrue(isinstance(paths, ops.Tensor))
+ self.assertTrue(isinstance(var, ops.Tensor))
+ with self.test_session():
+ variables.global_variables_initializer().run()
+ resources.initialize_resources(resources.shared_resources()).run()
+ self.assertEquals(probs.eval().shape, (4, 2))
+ self.assertEquals(probs.eval().tolist(), expected_prediction)
+
def testTrainingConstructionClassificationSparse(self):
input_data = sparse_tensor.SparseTensor(
indices=[[0, 0], [0, 3], [1, 0], [1, 7], [2, 1], [3, 9]],
diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
index b7b26cfb1c..da4dd5a14c 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
@@ -91,8 +91,11 @@ void GetSubGraphIncomingEdges(const tensorflow::Graph& graph,
if (!subgraph_node_ids.count(edge->src()->id()) &&
!edge->src()->IsSource() && !edge->IsControlEdge()) {
incoming_edges->insert(edge);
+ VLOG(2) << "INCOMING " << edge->src()->name() << " -> " << node->name()
+ << " Y, ";
} else {
- VLOG(2) << node->name() << " -> " << edge->src()->name() << " N, ";
+ VLOG(2) << "INCOMING " << edge->src()->name() << " -> " << node->name()
+ << " N, ";
}
}
}
@@ -106,10 +109,12 @@ void GetSubGraphOutgoingEdges(const tensorflow::Graph& graph,
for (const tensorflow::Edge* edge : node->out_edges()) {
if (!subgraph_node_ids.count(edge->dst()->id()) &&
!edge->dst()->IsSink() && !edge->IsControlEdge()) {
- VLOG(2) << node->name() << " -> " << edge->dst()->name() << " Y, ";
+ VLOG(2) << "OUTGOING " << node->name() << " -> " << edge->dst()->name()
+ << " Y, ";
outgoing_edges->insert(edge);
} else {
- VLOG(2) << node->name() << " -> " << edge->dst()->name() << " N, ";
+ VLOG(2) << "OUTGOING " << node->name() << " -> " << edge->dst()->name()
+ << " N, ";
}
}
}
@@ -181,29 +186,27 @@ struct ConvertGraphParams {
static tensorflow::Status FillSubGraphEdgeSets(ConvertGraphParams* p) {
GetSubGraphIncomingEdges(p->graph, p->subgraph_node_ids,
&p->subgraph_incoming_edges);
+
+ std::set<std::pair<int, int>> unique_tensors;
+ // Add only unique input source nodes. If output of an outside node is shared
+ // between multiple nodes inside the engine, only one edge should be created
for (const tensorflow::Edge* edge : p->subgraph_incoming_edges) {
- p->subgraph_inputs.push_back({edge->src()->id(), edge->src_output()});
- }
- auto output_name_to_index_map = BuildTensorNameMap(p->output_names);
- std::set<std::pair<int, int>> subgraph_outputs_set;
- // Collect outputs referenced from output_names
- for (int node_id : p->subgraph_node_ids) {
- tensorflow::Node* node = p->graph.FindNodeId(node_id);
- if (output_name_to_index_map.count(node->name())) {
- for (int index : output_name_to_index_map.at(node->name())) {
- subgraph_outputs_set.insert({node_id, index});
- }
- }
+ unique_tensors.insert({edge->src()->id(), edge->src_output()});
}
+ p->subgraph_inputs.insert(p->subgraph_inputs.begin(), unique_tensors.begin(),
+ unique_tensors.end());
GetSubGraphOutgoingEdges(p->graph, p->subgraph_node_ids,
&p->subgraph_outgoing_edges);
+ unique_tensors.clear();
+ // Similar to above, if multiple ouside nodes are sharing the output of an
+ // internal node only one output port should be created and shared between
+ // outputs
for (const tensorflow::Edge* edge : p->subgraph_outgoing_edges) {
- subgraph_outputs_set.insert({edge->src()->id(), edge->src_output()});
+ unique_tensors.insert({edge->src()->id(), edge->src_output()});
}
- p->subgraph_outputs.reserve(subgraph_outputs_set.size());
+ p->subgraph_outputs.reserve(unique_tensors.size());
p->subgraph_outputs.insert(p->subgraph_outputs.begin(),
- subgraph_outputs_set.begin(),
- subgraph_outputs_set.end());
+ unique_tensors.begin(), unique_tensors.end());
return tensorflow::Status::OK();
}
@@ -225,7 +228,6 @@ tensorflow::Status GetCalibNode(ConvertGraphParams* params) {
for (auto in_edge :
params->subgraph_incoming_edges) { // loop over incoming edges and
// attach them to calib node
- // tensorflow::Node* src_node = in_edge->src();
auto src_output = in_edge->src_output();
auto dst_node = in_edge->dst();
auto dst_input = in_edge->dst_input();
@@ -257,19 +259,24 @@ tensorflow::Status ConvertSubGraphToTensorRT(ConvertGraphParams* params) {
for (size_t i = 0; i < params->subgraph_inputs.size(); ++i) {
subgraph_edge_to_input_map.insert({params->subgraph_inputs.at(i), i});
}
+ std::set<std::pair<int, int>> unique_tensors;
for (const tensorflow::Edge* edge : params->subgraph_incoming_edges) {
std::pair<int, int> old_src = {edge->src()->id(), edge->src_output()};
+ if (unique_tensors.count(old_src)) continue;
+ unique_tensors.insert(old_src);
int new_src_output = subgraph_edge_to_input_map.at(old_src);
params->graph.AddEdge(edge->src(), edge->src_output(), trt_node,
new_src_output);
+ VLOG(1) << "Wire " << edge->src()->name() << ":" << edge->src_output()
+ << " -> " << trt_node->name() << ":" << new_src_output;
params->graph.RemoveEdge(edge);
}
-
- VLOG(2) << "new wiring edges: " << trt_node->in_edges().size();
- for (const tensorflow::Edge* edge : trt_node->in_edges()) {
- VLOG(2) << edge->src()->name() << " port: " << edge->src_output();
+ if (VLOG_IS_ON(2)) {
+ VLOG(2) << "new edge count: " << trt_node->in_edges().size();
+ for (const tensorflow::Edge* edge : trt_node->in_edges()) {
+ VLOG(2) << edge->src()->name() << " port: " << edge->src_output();
+ }
}
-
TF_RETURN_IF_ERROR(status);
// Re-map outgoing edges to use the new TRT node instead of the orig subgraph
@@ -283,6 +290,8 @@ tensorflow::Status ConvertSubGraphToTensorRT(ConvertGraphParams* params) {
int new_src_output = subgraph_edge_to_output_map.at(old_src);
TF_RETURN_IF_ERROR(params->graph.UpdateEdge(
trt_node, new_src_output, edge->dst(), edge->dst_input()));
+ VLOG(1) << "Wire " << trt_node->name() << ":" << new_src_output << " -> "
+ << edge->dst()->name() << ":" << edge->dst_input();
}
// Remove the original subgraph
for (int node_id : params->subgraph_node_ids) {
@@ -317,9 +326,12 @@ tensorflow::Status ConvertCalibGraphToInferGraph(
tensorflow::GraphConstructorOptions(), graph_def, &graph));
// get calib nodes
std::vector<tensorflow::Node*> calib_nodes;
- for (auto node : graph.op_nodes()) {
+ std::vector<tensorflow::Node*> topo_order;
+ tensorflow::GetPostOrder(graph, &topo_order);
+ for (auto rit = topo_order.rbegin(); rit != topo_order.rend(); ++rit) {
+ auto node = *rit;
if (node->type_string() == "TRTCalibOp") {
- VLOG(1) << "Found Calib Node";
+ VLOG(1) << "Found Calib Node " << node->name();
calib_nodes.push_back(node);
}
}
diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index 96e0700862..4e4d295538 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -362,10 +362,11 @@ void ReorderCKtoKC(const TRT_ShapedWeights& iweights,
break;
}
case tensorflow::DataType::DT_HALF: {
- Reorder2({k, c}, static_cast<Eigen::half const*>(iweights.GetValues()),
- istrides, static_cast<Eigen::half*>(
- const_cast<void*>(oweights->GetValues())),
- ostrides);
+ Reorder2(
+ {k, c}, static_cast<Eigen::half const*>(iweights.GetValues()),
+ istrides,
+ static_cast<Eigen::half*>(const_cast<void*>(oweights->GetValues())),
+ ostrides);
break;
}
default:
@@ -1179,9 +1180,9 @@ tensorflow::Status BinaryTensorOpTensor(
CHECK_EQ_TYPE(tensor_r->getType(), dtype);
auto op_pair = ops.find(node_def.op());
if (op_pair == ops.end())
- return tensorflow::errors::Unimplemented("binary op: " + node_def.op() +
- " not supported at: " +
- node_def.name());
+ return tensorflow::errors::Unimplemented(
+ "binary op: " + node_def.op() +
+ " not supported at: " + node_def.name());
nvinfer1::IElementWiseLayer* layer = ctx.network()->addElementWise(
*const_cast<nvinfer1::ITensor*>(tensor_l),
@@ -2138,9 +2139,7 @@ void Converter::register_op_converters() {
}
} // namespace
-tensorflow::Status GetTensorRTGraph(tensorrt::convert::SubGraphParams& s) {
- return tensorflow::errors::Unimplemented("Not implemented yet");
-}
+
tensorflow::Status ConvertCalibrationNodeToEngineNode(
tensorflow::Graph& graph, tensorflow::Node* c_node) {
const auto ndef = c_node->def();
@@ -2164,9 +2163,23 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode(
for (auto n : graph.op_nodes()) {
node_maps.insert({n->name(), n});
}
+ std::set<int> subgraph_ids;
+ for (const auto internal_node : segment_nodes) {
+ subgraph_ids.insert(node_maps.at(internal_node)->id());
+ }
+ if (VLOG_IS_ON(2)) {
+ string node_names = StrCat(c_node->name(), " segment nodes= ");
+
+ for (const auto& node_name : segment_nodes) {
+ StrAppend(&node_names, node_name, ", ");
+ }
+ VLOG(2) << node_names;
+ }
+
VLOG(1) << "Output Nodes:";
std::vector<tensorflow::DataType> out_types;
std::vector<const tensorflow::Edge*> out_edges;
+
for (auto& i : output_nodes) {
auto node_port = tensorflow::str_util::Split(i, ":");
VLOG(1) << " " << i << " in graph " << node_maps.count(i);
@@ -2186,18 +2199,24 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode(
out_types.push_back(out_node->output_type(0));
}
for (auto out_edge : out_node->out_edges()) {
+ if (subgraph_ids.count(out_edge->dst()->id()))
+ continue; // skip internal edges;
if (out_edge->src_output() == port) {
out_edges.push_back(out_edge);
- break;
+ VLOG(1) << "OUTPUT EDGE " << out_edge->src()->name() << ":"
+ << out_edge->src_output() << " -> " << out_edge->dst()->name()
+ << ":" << out_edge->dst_input();
}
}
} else {
LOG(WARNING) << " couldn't find output node " << out_node_name;
}
}
- VLOG(1) << "Input Nodes:";
- for (auto& i : input_names) {
- VLOG(1) << " " << i << " in graph " << node_maps.count(i);
+ if (VLOG_IS_ON(1)) {
+ VLOG(1) << c_node->name() << " Input Nodes:";
+ for (auto& i : input_names) {
+ VLOG(1) << " Input " << i << " in graph " << node_maps.count(i);
+ }
}
auto trt_rm = tensorflow::tensorrt::TRTResourceManager::instance();
auto resmgr = trt_rm->getManager("TRTCalibOps");
@@ -2231,14 +2250,24 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode(
calib_res->builder_ = nullptr;
tensorflow::NodeDefBuilder op_builder(engine_name, "TRTEngineOp");
std::vector<tensorflow::NodeDefBuilder::NodeOut> income_edges;
+ income_edges.resize(c_node->num_inputs());
for (const auto in_edge : c_node->in_edges()) {
auto src = in_edge->src();
int dest_port = in_edge->dst_input();
- income_edges.emplace_back(src->name(), in_edge->src_output(),
- c_node->input_type(dest_port));
+ VLOG(1) << "Incoming connection " << src->name() << ":"
+ << in_edge->src_output() << " -> " << c_node->name() << ":"
+ << dest_port;
+ income_edges.at(dest_port) = {src->name(), in_edge->src_output(),
+ c_node->input_type(dest_port)};
}
tensorflow::gtl::ArraySlice<tensorflow::NodeDefBuilder::NodeOut> input_list(
income_edges);
+ if (VLOG_IS_ON(2)) {
+ for (const auto& inp : input_list) {
+ VLOG(2) << " Input from inputlist " << inp.node << ":" << inp.index << " "
+ << tensorflow::DataTypeString(inp.data_type);
+ }
+ }
op_builder.Input(input_list);
tensorflow::NodeDef engine_node;
const char* engine_plan_data = static_cast<const char*>(engine_plan->data());
@@ -2255,13 +2284,26 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode(
}
auto trt_engine_node = graph.AddNode(engine_node, &status);
TF_RETURN_IF_ERROR(status);
- for (size_t i = 0; i < out_edges.size(); i++) {
- VLOG(1) << "Connecting trt_engine_node output " << i << " with "
- << out_edges.at(i)->dst()->name() << " port "
- << out_edges.at(i)->dst_input();
- TF_RETURN_IF_ERROR(graph.UpdateEdge(trt_engine_node, i,
- out_edges.at(i)->dst(),
- out_edges.at(i)->dst_input()));
+ std::map<string, int> port_map;
+ for (size_t t = 0; t < output_nodes.size(); t++) {
+ port_map.insert({output_nodes.at(t), t});
+ }
+ for (auto& i : out_edges) {
+ string s(i->src()->name());
+ if (i->src_output()) StrAppend(&s, ":", i->src_output());
+ int out_port = port_map.at(s);
+ VLOG(1) << "Connecting " << trt_engine_node->name() << ":" << out_port
+ << " -> " << i->dst()->name() << ":" << i->dst_input();
+ TF_RETURN_IF_ERROR(
+ graph.UpdateEdge(trt_engine_node, out_port, i->dst(), i->dst_input()));
+ }
+ for (const auto ed : trt_engine_node->in_edges()) {
+ VLOG(1) << "In Edge " << ed->src()->name() << ":" << ed->src_output()
+ << " -> " << ed->dst()->name() << ":" << ed->dst_input();
+ }
+ for (const auto ed : trt_engine_node->out_edges()) {
+ VLOG(1) << "Out Edge " << ed->src()->name() << ":" << ed->src_output()
+ << " -> " << ed->dst()->name() << ":" << ed->dst_input();
}
VLOG(1) << "Segment nodes:";
for (auto& i : segment_nodes) {
@@ -2332,6 +2374,7 @@ tensorflow::Status ConvertSubgraph(
std::vector<string>* output_names,
std::vector<tensorflow::DataType>* output_dtypes,
const string& engine_name) {
+ std::set<string> added_tensors;
for (const std::pair<int, int>& input : s.input_inds) {
VLOG(2) << "parsing input. Node id= " << input.first;
int node_id = input.first;
@@ -2374,7 +2417,6 @@ tensorflow::Status ConvertSubgraph(
auto op_info = op_info_vec.at(shape_inference_output_idx);
tensorflow::DataType tf_dtype = op_info.dtype();
- input_dtypes->push_back(tf_dtype);
nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT);
auto type_status = ConvertDType(tf_dtype, &dtype);
@@ -2410,8 +2452,10 @@ tensorflow::Status ConvertSubgraph(
if (output_idx != 0) {
input_tensor_name = StrCat(node_name, ":", output_idx);
}
-
+ if (added_tensors.count(input_tensor_name)) continue;
+ added_tensors.insert(input_tensor_name);
input_names->push_back(input_tensor_name);
+ input_dtypes->push_back(tf_dtype);
nvinfer1::ITensor* input_tensor = converter.network()->addInput(
input_tensor_name.c_str(), dtype, input_dim_pseudo_chw);
@@ -2435,6 +2479,7 @@ tensorflow::Status ConvertSubgraph(
// Gather output metadata
int trt_engine_op_output_idx = 0;
+ added_tensors.clear();
for (const std::pair<int, int>& output : s.output_inds) {
int node_id = output.first;
int output_idx = output.second;
@@ -2451,6 +2496,8 @@ tensorflow::Status ConvertSubgraph(
if (output_idx != 0)
tensorflow::strings::StrAppend(&tensor_name, ":", output_idx);
VLOG(2) << "Output tensor name: " << tensor_name;
+ if (added_tensors.count(tensor_name)) continue;
+ added_tensors.insert(tensor_name);
output_names->push_back(tensor_name);
auto tensor_or_weights = converter.get_tensor(tensor_name);
if (!tensor_or_weights.is_tensor()) {
diff --git a/tensorflow/contrib/tpu/python/tpu/datasets.py b/tensorflow/contrib/tpu/python/tpu/datasets.py
index 2e472a2805..d879170b68 100644
--- a/tensorflow/contrib/tpu/python/tpu/datasets.py
+++ b/tensorflow/contrib/tpu/python/tpu/datasets.py
@@ -166,11 +166,21 @@ def StreamingFilesDataset(files,
return remote_iterator.get_next()
def MapFn(unused_input):
- return functional_ops.remote_call(
+ if isinstance(source_dataset.output_types, dtypes.DType):
+ output_types = [source_dataset.output_types]
+ elif isinstance(source_dataset.output_types, (list, tuple)):
+ output_types = source_dataset.output_types
+ else:
+ raise ValueError('source dataset has invalid output types')
+ remote_calls = functional_ops.remote_call(
args=[source_handle],
- Tout=[dtypes.string],
+ Tout=output_types,
f=LoadingFunc,
- target='/job:%s/replica:0/task:0/cpu:0' % file_reader_job)[0]
+ target='/job:%s/replica:0/task:0/cpu:0' % file_reader_job)
+ if len(remote_calls) == 1:
+ return remote_calls[0]
+ else:
+ return remote_calls
with ops.device('/job:%s' % worker_job):
output_dataset = dataset_ops.Dataset.range(2).repeat().map(
diff --git a/tensorflow/contrib/tpu/python/tpu/datasets_test.py b/tensorflow/contrib/tpu/python/tpu/datasets_test.py
index 918cf0ed8e..b58d05eac5 100644
--- a/tensorflow/contrib/tpu/python/tpu/datasets_test.py
+++ b/tensorflow/contrib/tpu/python/tpu/datasets_test.py
@@ -26,6 +26,8 @@ from tensorflow.core.protobuf import config_pb2
from tensorflow.python.client import session
from tensorflow.python.data.ops import dataset_ops
from tensorflow.python.data.ops import readers
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import tensor_shape
from tensorflow.python.lib.io import python_io
from tensorflow.python.platform import test
from tensorflow.python.training import server_lib
@@ -162,6 +164,30 @@ class DatasetsTest(test.TestCase):
self.assertEqual(set(all_contents), set(retrieved_values))
+ def testArbitraryReaderFuncFromDatasetGenerator(self):
+
+ def my_generator():
+ yield (1, [1] * 10)
+
+ def gen_dataset(dummy):
+ return dataset_ops.Dataset.from_generator(
+ my_generator, (dtypes.int64, dtypes.int64),
+ (tensor_shape.TensorShape([]), tensor_shape.TensorShape([10])))
+
+ dataset = datasets.StreamingFilesDataset(
+ dataset_ops.Dataset.range(10), filetype=gen_dataset)
+
+ iterator = dataset.make_initializable_iterator()
+ self._sess.run(iterator.initializer)
+ get_next = iterator.get_next()
+
+ retrieved_values = self._sess.run(get_next)
+
+ self.assertIsInstance(retrieved_values, (list, tuple))
+ self.assertEqual(len(retrieved_values), 2)
+ self.assertEqual(retrieved_values[0], 1)
+ self.assertItemsEqual(retrieved_values[1], [1] * 10)
+
def testUnexpectedFiletypeString(self):
with self.assertRaises(ValueError):
datasets.StreamingFilesDataset(
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index d89633199d..b1c224a345 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -699,7 +699,9 @@ cc_library(
srcs = ["platform/stacktrace_handler.cc"],
hdrs = ["platform/stacktrace_handler.h"],
deps = [
+ ":abi",
":lib_platform",
+ ":stacktrace",
],
)
@@ -3089,6 +3091,8 @@ cc_library(
# we now need at least "str_util".
":lib",
":lib_platform",
+ ":stacktrace_handler",
+ ":test_lite",
"//tensorflow/core/platform/default/build_config:test_lite_main",
],
alwayslink = 1,
@@ -3569,7 +3573,10 @@ tf_cc_tests_gpu(
tf_cc_test_mkl(
name = "mkl_runtime_tests",
size = "small",
- srcs = ["common_runtime/mkl_cpu_allocator_test.cc"],
+ srcs = [
+ "common_runtime/mkl_cpu_allocator_test.cc",
+ "common_runtime/mkl_threadpool_device_test.cc",
+ ],
linkstatic = 1,
deps = [
":core",
diff --git a/tensorflow/core/api_def/base_api/api_def_Selu.pbtxt b/tensorflow/core/api_def/base_api/api_def_Selu.pbtxt
index cbe76de415..985f09312f 100644
--- a/tensorflow/core/api_def/base_api/api_def_Selu.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_Selu.pbtxt
@@ -4,6 +4,10 @@ op {
description: <<END
if < 0, `scale * features` otherwise.
+To be used together with
+`initializer = tf.variance_scaling_initializer(factor=1.0, mode='FAN_IN')`.
+For correct dropout, use `tf.contrib.nn.alpha_dropout`.
+
See [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515)
END
}
diff --git a/tensorflow/core/api_def/base_api/api_def_StringSplitV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_StringSplitV2.pbtxt
new file mode 100644
index 0000000000..6e13d0d049
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_StringSplitV2.pbtxt
@@ -0,0 +1,48 @@
+op {
+ graph_op_name: "StringSplitV2"
+ in_arg {
+ name: "input"
+ description: <<END
+`1-D` string `Tensor`, the strings to split.
+END
+ }
+ in_arg {
+ name: "sep"
+ description: <<END
+`0-D` string `Tensor`, the delimiter character.
+END
+ }
+ attr {
+ name: "maxsplit"
+ description: <<END
+An `int`. If `maxsplit > 0`, limit of the split of the result.
+END
+ }
+ summary: "Split elements of `source` based on `sep` into a `SparseTensor`."
+ description: <<END
+Let N be the size of source (typically N will be the batch size). Split each
+element of `source` based on `sep` and return a `SparseTensor`
+containing the split tokens. Empty tokens are ignored.
+
+For example, N = 2, source[0] is 'hello world' and source[1] is 'a b c',
+then the output will be
+```
+st.indices = [0, 0;
+ 0, 1;
+ 1, 0;
+ 1, 1;
+ 1, 2]
+st.shape = [2, 3]
+st.values = ['hello', 'world', 'a', 'b', 'c']
+```
+
+If `sep` is given, consecutive delimiters are not grouped together and are
+deemed to delimit empty strings. For example, source of `"1<>2<><>3"` and
+sep of `"<>"` returns `["1", "2", "", "3"]`. If `sep` is None or an empty
+string, consecutive whitespace are regarded as a single separator, and the
+result will contain no empty strings at the startor end if the string has
+leading or trailing whitespace.
+
+Note that the above mentioned behavior matches python's str.split.
+END
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_StringSplitV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_StringSplitV2.pbtxt
new file mode 100644
index 0000000000..0e8576fb01
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_StringSplitV2.pbtxt
@@ -0,0 +1,4 @@
+op {
+ graph_op_name: "StringSplitV2"
+ visibility: HIDDEN
+}
diff --git a/tensorflow/core/common_runtime/bfc_allocator.cc b/tensorflow/core/common_runtime/bfc_allocator.cc
index 8f2a419756..9cda17867b 100644
--- a/tensorflow/core/common_runtime/bfc_allocator.cc
+++ b/tensorflow/core/common_runtime/bfc_allocator.cc
@@ -86,7 +86,7 @@ BFCAllocator::Chunk* BFCAllocator::ChunkFromHandle(ChunkHandle h) {
return &(chunks_[h]);
}
-bool BFCAllocator::Extend(size_t rounded_bytes) {
+bool BFCAllocator::Extend(size_t alignment, size_t rounded_bytes) {
size_t available_bytes = memory_limit_ - total_region_allocated_bytes_;
// Rounds available_bytes down to the nearest multiple of kMinAllocationSize.
available_bytes = (available_bytes / kMinAllocationSize) * kMinAllocationSize;
@@ -108,7 +108,7 @@ bool BFCAllocator::Extend(size_t rounded_bytes) {
// Try allocating.
size_t bytes = std::min(curr_region_allocation_bytes_, available_bytes);
- void* mem_addr = suballocator_->Alloc(32, bytes);
+ void* mem_addr = suballocator_->Alloc(alignment, bytes);
if (mem_addr == nullptr && !started_backpedal_) {
// Only backpedal once.
started_backpedal_ = true;
@@ -119,7 +119,7 @@ bool BFCAllocator::Extend(size_t rounded_bytes) {
while (mem_addr == nullptr) {
bytes = RoundedBytes(bytes * kBackpedalFactor);
if (bytes < rounded_bytes) break;
- mem_addr = suballocator_->Alloc(32, bytes);
+ mem_addr = suballocator_->Alloc(alignment, bytes);
}
}
@@ -261,7 +261,7 @@ void* BFCAllocator::AllocateRawInternal(size_t unused_alignment,
}
// Try to extend
- if (Extend(rounded_bytes)) {
+ if (Extend(unused_alignment, rounded_bytes)) {
ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes);
if (ptr != nullptr) {
return ptr;
diff --git a/tensorflow/core/common_runtime/bfc_allocator.h b/tensorflow/core/common_runtime/bfc_allocator.h
index ba5a3eea3a..52aedb1e9c 100644
--- a/tensorflow/core/common_runtime/bfc_allocator.h
+++ b/tensorflow/core/common_runtime/bfc_allocator.h
@@ -305,7 +305,8 @@ class BFCAllocator : public VisitableAllocator {
// Try to add a new memory region that can satisfy an allocation of
// 'rounded_bytes' bytes. Returns true on success and false on
// failure.
- bool Extend(size_t rounded_bytes) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+ bool Extend(size_t alignment, size_t rounded_bytes)
+ EXCLUSIVE_LOCKS_REQUIRED(lock_);
// Returns a pointer to an underlying allocated chunk of size
// 'rounded_bytes'.
diff --git a/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc b/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc
index c21a1ea9f2..9028e6298c 100644
--- a/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc
+++ b/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc
@@ -102,9 +102,25 @@ TEST(DirectSessionWithTrackingAllocTest, CostModelTest) {
EXPECT_EQ(2, shape.dim(0).size());
EXPECT_EQ(1, shape.dim(1).size());
if (node->name() == y->name()) {
+#ifdef INTEL_MKL
+ // if MKL is used, it goes through various additional
+ // graph rewrite pass. In TF, everytime a graph pass
+ // happens, "constant" nodes are allocated
+ // and deallocated. Each allocation calls the
+ // (FindChunkPtr of BFCAllocator),
+ // which increments the value of AllocationId.
+ // Thus AllocationId becomes more than 3 and 4 if
+ // MKL is used. Now they are 9 and 10 for MKL.
+ EXPECT_EQ(19, cm->AllocationId(node, 0));
+#else
EXPECT_EQ(21, cm->AllocationId(node, 0));
+#endif
} else {
+#ifdef INTEL_MKL
+ EXPECT_EQ(20, cm->AllocationId(node, 0));
+#else
EXPECT_EQ(22, cm->AllocationId(node, 0));
+#endif
}
}
EXPECT_LE(0, cm->MaxExecutionTime(node));
diff --git a/tensorflow/core/common_runtime/mkl_threadpool_device_test.cc b/tensorflow/core/common_runtime/mkl_threadpool_device_test.cc
new file mode 100644
index 0000000000..5d583a8360
--- /dev/null
+++ b/tensorflow/core/common_runtime/mkl_threadpool_device_test.cc
@@ -0,0 +1,53 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifdef INTEL_MKL
+
+#include "tensorflow/core/common_runtime/threadpool_device.h"
+
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/cpu_info.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/public/session_options.h"
+
+namespace tensorflow {
+
+#ifdef _OPENMP
+TEST(MKLThreadPoolDeviceTest, TestOmpDefaults) {
+ SessionOptions options;
+ unsetenv("OMP_NUM_THREADS");
+
+ ThreadPoolDevice* tp = new ThreadPoolDevice(
+ options, "/device:CPU:0", Bytes(256), DeviceLocality(), cpu_allocator());
+
+ const int ht = port::NumHyperthreadsPerCore();
+ EXPECT_EQ(omp_get_max_threads(), (port::NumSchedulableCPUs() + ht - 1) / ht);
+}
+
+TEST(MKLThreadPoolDeviceTest, TestOmpPreSets) {
+ SessionOptions options;
+ setenv("OMP_NUM_THREADS", "314", 1);
+
+ ThreadPoolDevice* tp = new ThreadPoolDevice(
+ options, "/device:CPU:0", Bytes(256), DeviceLocality(), cpu_allocator());
+
+ EXPECT_EQ(omp_get_max_threads(), 314);
+}
+#endif // _OPENMP
+
+} // namespace tensorflow
+
+#endif // INTEL_MKL
diff --git a/tensorflow/core/common_runtime/process_util.cc b/tensorflow/core/common_runtime/process_util.cc
index 21912236d0..a5d31b75c7 100644
--- a/tensorflow/core/common_runtime/process_util.cc
+++ b/tensorflow/core/common_runtime/process_util.cc
@@ -16,8 +16,10 @@ limitations under the License.
#include "tensorflow/core/common_runtime/process_util.h"
#ifdef INTEL_MKL
+#ifdef _OPENMP
#include <omp.h>
-#endif
+#endif // _OPENMP
+#endif // INTEL_MKL
#include <string.h>
#include "tensorflow/core/lib/core/threadpool.h"
@@ -57,7 +59,10 @@ int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) {
// MKL library executes ops in parallel using OMP threads
// Set inter_op conservatively to avoid thread oversubscription that could
// lead to severe perf degradations and OMP resource exhaustion
- const int mkl_intra_op = omp_get_max_threads();
+ int mkl_intra_op = 1;
+#ifdef _OPENMP
+ mkl_intra_op = omp_get_max_threads();
+#endif // _OPENMP
CHECK_GE(mkl_intra_op, 1);
const int32 mkl_inter_op = std::max(
(port::NumSchedulableCPUs() + mkl_intra_op - 1) / mkl_intra_op, 2);
@@ -68,7 +73,7 @@ int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) {
#else
// Default to using the number of cores available in the process.
return port::NumSchedulableCPUs();
-#endif
+#endif // INTEL_MKL
}
thread::ThreadPool* NewThreadPoolFromSessionOptions(
diff --git a/tensorflow/core/common_runtime/threadpool_device.cc b/tensorflow/core/common_runtime/threadpool_device.cc
index f7a07fe503..74a87215e1 100644
--- a/tensorflow/core/common_runtime/threadpool_device.cc
+++ b/tensorflow/core/common_runtime/threadpool_device.cc
@@ -31,7 +31,11 @@ limitations under the License.
#include "tensorflow/core/public/session_options.h"
#ifdef INTEL_MKL
+#ifdef _OPENMP
+#include <omp.h>
+#endif
#include "tensorflow/core/common_runtime/mkl_cpu_allocator.h"
+#include "tensorflow/core/platform/cpu_info.h"
#endif
namespace tensorflow {
@@ -43,7 +47,26 @@ ThreadPoolDevice::ThreadPoolDevice(const SessionOptions& options,
: LocalDevice(options, Device::BuildDeviceAttributes(
name, DEVICE_CPU, memory_limit, locality)),
allocator_(allocator),
- scoped_allocator_mgr_(new ScopedAllocatorMgr(name)) {}
+ scoped_allocator_mgr_(new ScopedAllocatorMgr(name)) {
+#ifdef INTEL_MKL
+#ifdef _OPENMP
+ const char* user_omp_threads = getenv("OMP_NUM_THREADS");
+ if (user_omp_threads == nullptr) {
+ // OMP_NUM_THREADS controls MKL's intra-op parallelization
+ // Default to available physical cores
+ const int mkl_intra_op = port::NumSchedulableCPUs();
+ const int ht = port::NumHyperthreadsPerCore();
+ omp_set_num_threads((mkl_intra_op + ht - 1) / ht);
+ } else {
+ uint64 user_val = 0;
+ if (strings::safe_strtou64(user_omp_threads, &user_val)) {
+ // Superflous but triggers OpenMP loading
+ omp_set_num_threads(user_val);
+ }
+ }
+#endif // _OPENMP
+#endif // INTEL_MKL
+}
ThreadPoolDevice::~ThreadPoolDevice() {}
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.cc b/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.cc
index 1cea1b1462..770a0fcf14 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.cc
@@ -147,7 +147,9 @@ MasterService::Stub::Stub(
}
MasterService::AsyncService::AsyncService() {
- for (int i = 0; i < 10; ++i) {
+ int method_len = sizeof(grpcMasterService_method_names) /
+ sizeof(grpcMasterService_method_names[0]);
+ for (int i = 0; i < method_len; ++i) {
AddMethod(new ::grpc::internal::RpcServiceMethod(
grpcMasterService_method_names[i],
::grpc::internal::RpcMethod::NORMAL_RPC, nullptr));
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_testlib.cc b/tensorflow/core/distributed_runtime/rpc/grpc_testlib.cc
index 89f83f9f24..a8508d2d4f 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_testlib.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_testlib.cc
@@ -17,6 +17,7 @@ limitations under the License.
#include "tensorflow/core/distributed_runtime/rpc/grpc_session.h"
#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/platform/env.h"
#include "tensorflow/core/util/device_name_utils.h"
namespace tensorflow {
@@ -50,9 +51,14 @@ Status TestCluster::MakeTestCluster(const SessionOptions& options, int n,
}
for (int i = 0; i < n; ++i) {
+ string server_file =
+ strings::StrCat(testing::TensorFlowSrcRoot(),
+ "/core/distributed_runtime/rpc/grpc_testlib_server");
+ if (!options.env->FileExists(server_file).ok()) {
+ return errors::Internal("Could not find grpc_testlib_server");
+ }
const std::vector<string> argv(
- {strings::StrCat(testing::TensorFlowSrcRoot(),
- "/core/distributed_runtime/rpc/grpc_testlib_server"),
+ {server_file,
/* see grpc_testlib_server.cc for flags */
tf_jobs, "--tf_job=localhost", strings::StrCat("--tf_task=", i),
strings::StrCat("--num_cpus=", num_cpus),
diff --git a/tensorflow/core/framework/allocator.h b/tensorflow/core/framework/allocator.h
index 2c87156dca..2bb4d32d57 100644
--- a/tensorflow/core/framework/allocator.h
+++ b/tensorflow/core/framework/allocator.h
@@ -67,13 +67,8 @@ struct AllocatorStats {
// device memory.
class Allocator {
public:
-#ifdef EIGEN_VECTORIZE_AVX512
// Align to 64 byte boundary.
static constexpr size_t kAllocatorAlignment = 64;
-#else
- // Align to 32 byte boundary.
- static constexpr size_t kAllocatorAlignment = 32;
-#endif
virtual ~Allocator();
diff --git a/tensorflow/core/framework/op_gen_lib.cc b/tensorflow/core/framework/op_gen_lib.cc
index 3d7920a6e2..4b56d807df 100644
--- a/tensorflow/core/framework/op_gen_lib.cc
+++ b/tensorflow/core/framework/op_gen_lib.cc
@@ -15,6 +15,7 @@ limitations under the License.
#include "tensorflow/core/framework/op_gen_lib.h"
+#include <algorithm>
#include <vector>
#include "tensorflow/core/framework/attr_value.pb.h"
#include "tensorflow/core/lib/core/errors.h"
diff --git a/tensorflow/core/framework/remote_fused_graph_execute_info.proto b/tensorflow/core/framework/remote_fused_graph_execute_info.proto
index eb689ec1e6..10072724d2 100644
--- a/tensorflow/core/framework/remote_fused_graph_execute_info.proto
+++ b/tensorflow/core/framework/remote_fused_graph_execute_info.proto
@@ -5,7 +5,7 @@ option cc_enable_arenas = true;
option java_outer_classname = "RemoteFusedGraphExecuteInfoProto";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";
-//add go_package externally
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
import "tensorflow/core/framework/graph.proto";
import "tensorflow/core/framework/tensor_shape.proto";
import "tensorflow/core/framework/types.proto";
diff --git a/tensorflow/core/framework/tensor_test.cc b/tensorflow/core/framework/tensor_test.cc
index b613effd18..80e168df97 100644
--- a/tensorflow/core/framework/tensor_test.cc
+++ b/tensorflow/core/framework/tensor_test.cc
@@ -1147,29 +1147,29 @@ TEST(Tensor, FailureToAllocate) {
// On the alignment.
//
-// As of 2015/8, tensorflow::Tensor allocates its buffer with 32-byte
+// As of 2018/5, tensorflow::Tensor allocates its buffer with 64-byte
// alignment. Tensor::tensor/flat/vec/matrix methods requires the
// buffer satisfies Eigen::Aligned (e.g., 16-bytes aligned usually,
-// and 32-bytes for AVX). Tensor::Slice requires the caller to ensure
-// its result is aligned if the caller intends to use those methods.
-// In this test case, we simply make sure each slice is 32-byte
-// aligned: sizeof(float) * 4 * 2 = 32.
+// 32-bytes for AVX, and 64-bytes for AVX512). Tensor::Slice requires
+// the caller to ensure its result is aligned if the caller intends
+// to use those methods. In this test case, we simply make sure each
+// slice is 64-byte aligned: sizeof(float) * 4 * 36 = 576. 576 % 64 = 0.
TEST(Tensor, Slice_Basic) {
Tensor saved;
{ // General
- Tensor x(DT_FLOAT, TensorShape({10, 4, 34}));
+ Tensor x(DT_FLOAT, TensorShape({10, 4, 36}));
// Fills in known values.
for (int i = 0; i < 10; ++i) {
x.Slice(i, i + 1).flat<float>().setConstant(i * 1.f);
}
// A simple slice along dim0.
Tensor y = x.Slice(4, 8);
- EXPECT_TRUE(y.shape().IsSameSize(TensorShape({4, 4, 34})));
+ EXPECT_TRUE(y.shape().IsSameSize(TensorShape({4, 4, 36})));
auto tx = x.tensor<float, 3>();
auto ty = y.tensor<float, 3>();
for (int i = 0; i < 4; ++i) {
for (int j = 0; j < 4; ++j) {
- for (int k = 0; k < 34; ++k) {
+ for (int k = 0; k < 36; ++k) {
EXPECT_EQ(ty(i, j, k), 4.0 + i);
EXPECT_EQ(&tx(4 + i, j, k), &ty(i, j, k));
}
@@ -1186,7 +1186,7 @@ TEST(Tensor, Slice_Basic) {
auto tz = z.tensor<float, 3>();
EXPECT_EQ(1, z.dim_size(0));
for (int j = 0; j < 4; ++j) {
- for (int k = 0; k < 34; ++k) {
+ for (int k = 0; k < 36; ++k) {
EXPECT_EQ(tz(0, j, k), 6.0);
}
}
@@ -1198,16 +1198,16 @@ TEST(Tensor, Slice_Basic) {
EXPECT_EQ(1, saved.dim_size(0));
auto tsaved = saved.tensor<float, 3>();
for (int j = 0; j < 4; ++j) {
- for (int k = 0; k < 34; ++k) {
+ for (int k = 0; k < 36; ++k) {
EXPECT_EQ(tsaved(0, j, k), 6.0);
}
}
}
{ // Empty
- Tensor x(DT_FLOAT, TensorShape({10, 0, 34}));
+ Tensor x(DT_FLOAT, TensorShape({10, 0, 36}));
x.flat<float>().setRandom();
Tensor y = x.Slice(4, 8);
- EXPECT_TRUE(y.shape().IsSameSize(TensorShape({4, 0, 34})));
+ EXPECT_TRUE(y.shape().IsSameSize(TensorShape({4, 0, 36})));
}
{
diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index 72a13d4da7..b9667998d6 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -2691,14 +2691,14 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
// If Op has been specifically assigned to a non-CPU device, then No.
if (!n->assigned_device_name().empty() &&
- !str_util::StrContains(n->assigned_device_name(),kCPUDeviceSubStr)) {
+ !str_util::StrContains(n->assigned_device_name(), kCPUDeviceSubStr)) {
result = false;
reason = "Op has been assigned a runtime device that is not CPU.";
}
// If user has specifically assigned this op to a non-CPU device, then No.
if (!n->def().device().empty() &&
- !str_util::StrContains(n->def().device(),kCPUDeviceSubStr)) {
+ !str_util::StrContains(n->def().device(), kCPUDeviceSubStr)) {
result = false;
reason = "User has assigned a device that is not CPU.";
}
@@ -2865,9 +2865,9 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
return false;
}
- // If the depth_radius of LRN is not 2, then MKL DNN takes unoptimized
- // path. The unoptimized path is slow. Thus we dont rewrite the node
- // and use default Eigen. But for depth_radius=2, MKL DNN optimized
+ // If the depth_radius of LRN is not 2, then MKL DNN takes unoptimized
+ // path. The unoptimized path is slow. Thus we dont rewrite the node
+ // and use default Eigen. But for depth_radius=2, MKL DNN optimized
// path is taken, i.e., eigen node is rewritten by MKl DNN node.
static bool LrnRewrite(const Node* n) {
CHECK_NOTNULL(n);
@@ -2876,13 +2876,13 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
CHECK_EQ(GetNodeAttr(n->def(), "depth_radius", &depth_radius).ok(), true);
// if the depth_radius of LRN is not 2, don't rewrite the node by MKL DNN
- // and use eigen node instead
+ // and use eigen node instead
if (depth_radius == 2) {
return true;
}
VLOG(1) << "LrnRewrite: The model sets depth_radius as not 2 which"
<< "case is not optimized by Intel MKL, thus using Eigen op"
- << "for LRN " ;
+ << "for LRN ";
return false;
}
@@ -3015,6 +3015,35 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
std::vector<NodeBuilder::NodeOut>* ws_tensors,
bool* are_ws_tensors_added);
+ // Helper function used by FixMklMetaDataEdges. Fixes the metadata edge
+ // pointed by 'e_metadata' corresponding to the data edge 'e_data' in graph
+ // 'g'. Returns true is fixup was done; otherwise, it returns false.
+ bool FixMklMetaDataEdgeIfNeeded(std::unique_ptr<Graph>* g,
+ const Edge* e_data, const Edge* e_metadata);
+
+ // Are the input Mkl metadata edges for node 'n' in graph 'g' correctly
+ // connected? If not, then fix them. This is needed because a graph may have
+ // some input Mkl metadata edges incorrectly setup after node merge and
+ // rewrite passes. This could happen because GetReversePostOrder function may
+ // not provide topologically sorted order if a graph contains cycles. The
+ // function returns true if at least one Mkl metadata edge for node 'n' was
+ // fixed. Otherwise, it returns false.
+ //
+ // Example:
+ //
+ // X = MklConv2D(_, _, _)
+ // Y = MklConv2DWithBias(_, _, _, _, _, _)
+ // Z = MklAdd(X, Y, DummyMklTensor, Y:1)
+ //
+ // For a graph such as shown above, note that 3rd argument of MklAdd contains
+ // DummyMklTensor. Actually, it should be getting the Mkl metadata from
+ // MklConv2D op (specifically, X:2). This incorrect plumbing could be possible
+ // (although rare) if the Mkl NodeMerge + NodeRewrite passes visit Z before X
+ // (possible if X, Y, Z are part of a loop.) This function fixes the Mkl
+ // metadata edges only - it does not rewrite nodes nor does it modify the Mkl
+ // data edges (1st and 2nd arguments of MklAdd).
+ bool FixMklMetaDataEdges(std::unique_ptr<Graph>* g, Node* n);
+
// Functions specific to operators to copy attributes
// We need operator-specific function to copy attributes because the framework
// does not provide any generic function for it.
@@ -4242,6 +4271,92 @@ MklLayoutRewritePass::CheckForNodeRewrite(const Node* n) const {
}
///////////////////////////////////////////////////////////////////////////////
+// Post-rewrite Mkl metadata fixup pass
+///////////////////////////////////////////////////////////////////////////////
+bool MklLayoutRewritePass::FixMklMetaDataEdgeIfNeeded(std::unique_ptr<Graph>* g,
+ const Edge* e_data, const Edge* e_metadata) {
+ if (g == nullptr || e_data == nullptr || e_metadata == nullptr) {
+ return false;
+ }
+
+ Node* n_data = e_data->src();
+ int n_data_op_slot = e_data->src_output();
+ int n_metadata_op_slot = GetTensorMetaDataIndex(n_data_op_slot,
+ n_data->num_outputs());
+
+ // If the source of meta edge is a constant node (producing dummy Mkl metadata
+ // tensor), then we will need to fix.
+ if (IsConstant(e_metadata->src())) {
+ Node* e_metadata_dst = e_metadata->dst();
+ int e_metadata_in_slot = e_metadata->dst_input();
+ CHECK_NOTNULL((*g)->AddEdge(n_data, n_metadata_op_slot,
+ e_metadata_dst, e_metadata_in_slot));
+
+ (*g)->RemoveEdge(e_metadata);
+ return true;
+ }
+
+ return false;
+}
+
+bool MklLayoutRewritePass::FixMklMetaDataEdges(std::unique_ptr<Graph>* g,
+ Node* n) {
+ bool result = false;
+
+ // If graph node is not Mkl node, then return.
+ DataType T = DT_INVALID;
+ if (!GetNodeAttr(n->def(), "T", &T).ok() ||
+ !mkl_op_registry::IsMklOp(n->type_string(), T)) {
+ return result;
+ }
+
+ // If it is Mkl node, then check if the input edges to this node that carry
+ // Mkl metadata are linked up correctly with the source node.
+
+ // For Mkl nodes, we generate twice the number of input tensors (n for Mkl
+ // data tensors + n for Mkl metadata tensors). We need to check for correct
+ // connection of n metadata tensors only.
+ int num_data_inputs = n->num_inputs() / 2;
+ for (int idx = 0; idx < num_data_inputs; idx++) {
+ // Get the edge connecting input slot with index (idx).
+ const Edge* e = nullptr;
+ TF_CHECK_OK(n->input_edge(idx, &e));
+
+ // If e is control edge, then skip.
+ if (e->IsControlEdge()) {
+ continue;
+ }
+
+ // Check that the source node for edge 'e' is Mkl node. If it is not an Mkl
+ // node, then we don't need to do anything.
+ Node* e_src = e->src();
+ if (GetNodeAttr(e_src->def(), "T", &T).ok() &&
+ mkl_op_registry::IsMklOp(e_src->type_string(), T)) {
+ // Source node for edge 'e' is Mkl node.
+ // Destination node and destination input slot of e is node 'n' and 'idx'
+ // resp.
+ CHECK_EQ(e->dst(), n);
+ CHECK_EQ(e->dst_input(), idx);
+
+ // Let's get edge that carries Mkl metadata corresponding to Mkl data edge
+ // 'e'. For that, let's first get the input slot of 'n' where the meta
+ // edge will feed the value.
+ int e_meta_in_slot = GetTensorMetaDataIndex(e->dst_input(),
+ n->num_inputs());
+ const Edge* e_meta = nullptr;
+ TF_CHECK_OK(n->input_edge(e_meta_in_slot, &e_meta));
+
+ // Let's check if we need to fix this meta edge.
+ if (FixMklMetaDataEdgeIfNeeded(g, e, e_meta)) {
+ result = true;
+ }
+ }
+ }
+
+ return result;
+}
+
+///////////////////////////////////////////////////////////////////////////////
// Run function for the pass
///////////////////////////////////////////////////////////////////////////////
@@ -4307,6 +4422,25 @@ bool MklLayoutRewritePass::RunPass(std::unique_ptr<Graph>* g) {
DumpGraph("After running MklLayoutRewritePass(NodeMerge+Rewrite)", &**g);
+ order.clear();
+ GetReversePostOrder(**g, &order); // This will give us topological sort.
+ for (Node* n : order) {
+ // If node is not an op or it cannot run on CPU device, then skip.
+ if (!n->IsOp() || !CanOpRunOnCPUDevice(n)) {
+ continue;
+ }
+ if (FixMklMetaDataEdges(g, n)) {
+ string node_name = n->name();
+ string op_name = n->type_string();
+
+ VLOG(1) << "MklLayoutRewritePass: fixed metadata edges for node "
+ << node_name << " with op " << op_name;
+ result = true;
+ }
+ }
+ DumpGraph("After running MklLayoutRewritePass(NodeMerge+Rewrite+Fixup)",
+ &**g);
+
return result;
}
diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc
index 029cdcf94a..7645b4a7f0 100644
--- a/tensorflow/core/graph/mkl_layout_pass_test.cc
+++ b/tensorflow/core/graph/mkl_layout_pass_test.cc
@@ -3519,6 +3519,37 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_DeviceTest) {
}
/////////////////////////////////////////////////////////////////////
+// Post-rewrite fixup pass test
+
+TEST_F(MklLayoutPassTest, PostRewriteFixUpPass) {
+ InitGraph(
+ "node { name: 'A' op: 'Input'}"
+ "node { name: 'B' op: 'Input'}"
+ "node { name: 'M' op: '_MklInput'}"
+ "node { name: 'N' op: '_MklInput'}"
+ "node { name: 'C' op: '_MklConv2D'"
+ " attr { key: 'T' value { type: DT_FLOAT } }"
+ " attr { key: 'data_format' value { s: 'NCHW' } }"
+ " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+ " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }"
+ " attr { key: 'padding' value { s: 'SAME' } }"
+ " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }"
+ " input: ['A', 'B', 'M', 'N']}"
+ "node { name: 'D' op: 'Const' "
+ " attr { key: 'dtype' value { type: DT_UINT8 } }"
+ " attr { key: 'value' value { "
+ " tensor { dtype: DT_UINT8 tensor_shape { dim { size: 1 } } "
+ " int_val: 0 } } } }"
+ "node { name: 'E' op: '_MklAdd'"
+ " attr {key: 'T' value { type: DT_FLOAT } }"
+ " input: ['C', 'A', 'D', 'D']}");
+ EXPECT_EQ(DoMklLayoutOptimizationPass(),
+ "A(Input);B(Input);C(_MklConv2D);D(Const);E(_MklAdd);"
+ "M(_MklInput);N(_MklInput)|A->C;A->E:1;B->C:1;C->E;C:2->E:2;"
+ "D->E:3;M->C:2;N->C:3");
+}
+
+/////////////////////////////////////////////////////////////////////
static void BM_MklLayoutRewritePass(int iters, int op_nodes) {
testing::StopTiming();
diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc
index 6749a7c571..0c02876ac5 100644
--- a/tensorflow/core/grappler/costs/graph_properties.cc
+++ b/tensorflow/core/grappler/costs/graph_properties.cc
@@ -610,7 +610,6 @@ class SymbolicShapeRefiner {
}
};
- // Compute the shape of the tensors outputed by node 'node' at output port
// 'port_index' as the union of shape1 and shape2.
ShapeHandle OutputAsUnion(const NodeDef* node, int port_index,
ShapeHandle shape1, ShapeHandle shape2) {
diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD
index 1b18087cdf..8ca726df0b 100644
--- a/tensorflow/core/grappler/optimizers/BUILD
+++ b/tensorflow/core/grappler/optimizers/BUILD
@@ -679,6 +679,7 @@ cc_library(
deps = [
":constant_folding",
":graph_optimizer",
+ "//tensorflow/core:lib",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core/grappler:graph_view",
"//tensorflow/core/grappler:grappler_item",
@@ -780,7 +781,6 @@ cc_library(
"//tensorflow/core:lib",
"//tensorflow/core:lib_internal",
"//tensorflow/core:protos_all_cc",
- "//tensorflow/core:scoped_allocator_ops_op_lib",
"//tensorflow/core/grappler:grappler_item",
"//tensorflow/core/grappler:op_types",
"//tensorflow/core/grappler:utils",
diff --git a/tensorflow/core/grappler/optimizers/remapper.cc b/tensorflow/core/grappler/optimizers/remapper.cc
index 4dde7ed1b4..03e36a7b9c 100644
--- a/tensorflow/core/grappler/optimizers/remapper.cc
+++ b/tensorflow/core/grappler/optimizers/remapper.cc
@@ -22,6 +22,7 @@ limitations under the License.
#include "tensorflow/core/grappler/op_types.h"
#include "tensorflow/core/grappler/optimizers/constant_folding.h"
#include "tensorflow/core/grappler/utils.h"
+#include "tensorflow/core/platform/logging.h"
namespace tensorflow {
namespace grappler {
@@ -200,8 +201,7 @@ Status Remapper::Optimize(Cluster* /*cluster*/, const GrapplerItem& item,
}
}
if (optimizable) {
- VLOG(2) << "Optimizing fused batch norm node " << node.DebugString()
- << std::endl;
+ VLOG(1) << "Optimizing fused batch norm node " << node.DebugString();
AddBatchNormNodes(optimized_graph, node);
continue;
}
diff --git a/tensorflow/core/kernels/as_string_op.cc b/tensorflow/core/kernels/as_string_op.cc
index 66c4aff3e3..a7757d1361 100644
--- a/tensorflow/core/kernels/as_string_op.cc
+++ b/tensorflow/core/kernels/as_string_op.cc
@@ -73,6 +73,7 @@ class AsStringOp : public OpKernel {
}
switch (dtype) {
case DT_INT8:
+ case DT_INT16:
case DT_INT32:
strings::Appendf(&format_, "d");
break;
@@ -129,6 +130,7 @@ class AsStringOp : public OpKernel {
ENCODE_TYPE(DT_FLOAT, float, format_);
ENCODE_TYPE(DT_DOUBLE, double, format_);
ENCODE_TYPE(DT_INT8, int8, format_);
+ ENCODE_TYPE(DT_INT16, int16, format_);
case (DT_BOOL): {
const auto& input_flat = input_tensor->flat<bool>();
for (int i = 0; i < input_flat.size(); ++i) {
diff --git a/tensorflow/core/kernels/cwise_op_clip.cc b/tensorflow/core/kernels/cwise_op_clip.cc
index 14d889e8e3..49b90e855b 100644
--- a/tensorflow/core/kernels/cwise_op_clip.cc
+++ b/tensorflow/core/kernels/cwise_op_clip.cc
@@ -33,52 +33,41 @@ class ClipOp : public OpKernel {
const Tensor& in0 = ctx->input(0);
const Tensor& in1 = ctx->input(1);
const Tensor& in2 = ctx->input(2);
+ OP_REQUIRES(ctx, (in0.shape() == in1.shape() ||
+ TensorShapeUtils::IsScalar(in1.shape())) &&
+ (in0.shape() == in2.shape() ||
+ TensorShapeUtils::IsScalar(in2.shape())),
+ errors::InvalidArgument(
+ "clip_value_min and clip_value_max must be either of "
+ "the same shape as input, or a scalar. ",
+ "input shape: ", in0.shape().DebugString(),
+ "clip_value_min shape: ", in1.shape().DebugString(),
+ "clip_value_max shape: ", in2.shape().DebugString()));
+
+ Tensor* out = nullptr;
+ OP_REQUIRES_OK(
+ ctx, ctx->forward_input_or_allocate_output({0}, 0, in0.shape(), &out));
+ if (out->NumElements() == 0) return; // Nothing to do for empty output
auto in0_flat = in0.flat<T>();
auto in1_flat = in1.flat<T>();
auto in2_flat = in2.flat<T>();
+ auto out_flat = out->flat<T>();
const Device& d = ctx->eigen_device<Device>();
- Tensor* out = nullptr;
- OP_REQUIRES_OK(
- ctx, ctx->forward_input_or_allocate_output({0}, 0, in0.shape(), &out));
- auto out_flat = out->flat<T>();
if (in1.shape() == in2.shape()) {
if (in0.shape() == in1.shape()) {
functor::TernaryClipOp<Device, T>()(d, in0_flat, in1_flat, in2_flat,
out_flat);
} else {
- OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(in1.shape()),
- errors::InvalidArgument(
- "clip_value_min and clip_value_max must be either of "
- "the same shape as input, or a scalar. ",
- "input shape: ", in0.shape().DebugString(),
- "clip_value_min shape: ", in1.shape().DebugString(),
- "clip_value_max shape: ", in2.shape().DebugString()));
functor::UnaryClipOp<Device, T>()(d, in0_flat, in1_flat, in2_flat,
out_flat);
}
} else {
if (in0.shape() == in1.shape()) {
- OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(in2.shape()),
- errors::InvalidArgument(
- "clip_value_min and clip_value_max must be either of "
- "the same shape as input, or a scalar. ",
- "input shape: ", in0.shape().DebugString(),
- "clip_value_min shape: ", in1.shape().DebugString(),
- "clip_value_max shape: ", in2.shape().DebugString()));
functor::BinaryLeftClipOp<Device, T>()(d, in0_flat, in1_flat, in2_flat,
out_flat);
} else {
- OP_REQUIRES(ctx,
- (in0.shape() == in2.shape() &&
- TensorShapeUtils::IsScalar(in1.shape())),
- errors::InvalidArgument(
- "clip_value_min and clip_value_max must be either of "
- "the same shape as input, or a scalar. ",
- "input shape: ", in0.shape().DebugString(),
- "clip_value_min shape: ", in1.shape().DebugString(),
- "clip_value_max shape: ", in2.shape().DebugString()));
functor::BinaryRightClipOp<Device, T>()(d, in0_flat, in1_flat, in2_flat,
out_flat);
}
diff --git a/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc b/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc
index 9a3b2303a3..17a85d9773 100644
--- a/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc
+++ b/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc
@@ -57,6 +57,7 @@ struct DenseUpdate<GPUDevice, T, SUB> {
template struct functor::DenseUpdate<GPUDevice, T, ADD>; \
template struct functor::DenseUpdate<GPUDevice, T, SUB>;
TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS);
+TF_CALL_int32(DEFINE_GPU_KERNELS);
TF_CALL_int64(DEFINE_GPU_KERNELS);
#undef DEFINE_GPU_KERNELS
diff --git a/tensorflow/core/kernels/gather_functor.cc b/tensorflow/core/kernels/gather_functor.cc
index e6fefe643b..5cd8e04927 100644
--- a/tensorflow/core/kernels/gather_functor.cc
+++ b/tensorflow/core/kernels/gather_functor.cc
@@ -37,6 +37,7 @@ namespace functor {
DECLARE_GPU_SPECS_INDEX(T, int32); \
DECLARE_GPU_SPECS_INDEX(T, int64)
+TF_CALL_int64(DECLARE_GPU_SPECS);
TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS);
TF_CALL_complex64(DECLARE_GPU_SPECS);
TF_CALL_complex128(DECLARE_GPU_SPECS);
diff --git a/tensorflow/core/kernels/gather_functor_gpu.cu.cc b/tensorflow/core/kernels/gather_functor_gpu.cu.cc
index 39b6924d74..4563fc6353 100644
--- a/tensorflow/core/kernels/gather_functor_gpu.cu.cc
+++ b/tensorflow/core/kernels/gather_functor_gpu.cu.cc
@@ -31,6 +31,7 @@ typedef Eigen::GpuDevice GPUDevice;
DEFINE_GPU_SPECS_INDEX(T, int32); \
DEFINE_GPU_SPECS_INDEX(T, int64);
+TF_CALL_int64(DEFINE_GPU_SPECS);
TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_SPECS);
TF_CALL_complex64(DEFINE_GPU_SPECS);
TF_CALL_complex128(DEFINE_GPU_SPECS);
diff --git a/tensorflow/core/kernels/gather_nd_op.cc b/tensorflow/core/kernels/gather_nd_op.cc
index 7e5a9e1ec5..4e53291b7f 100644
--- a/tensorflow/core/kernels/gather_nd_op.cc
+++ b/tensorflow/core/kernels/gather_nd_op.cc
@@ -228,6 +228,8 @@ namespace functor {
DECLARE_GPU_SPECS_INDEX(T, int32); \
DECLARE_GPU_SPECS_INDEX(T, int64)
+TF_CALL_int32(DECLARE_GPU_SPECS);
+TF_CALL_int64(DECLARE_GPU_SPECS);
TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS);
TF_CALL_complex64(DECLARE_GPU_SPECS);
TF_CALL_complex128(DECLARE_GPU_SPECS);
@@ -239,6 +241,8 @@ TF_CALL_complex128(DECLARE_GPU_SPECS);
// Registration of the GPU implementations.
#define REGISTER_GATHER_ND_GPU(type) REGISTER_GATHER_ND_ALL_INDICES(GPU, type)
+TF_CALL_int32(REGISTER_GATHER_ND_GPU);
+TF_CALL_int64(REGISTER_GATHER_ND_GPU);
TF_CALL_GPU_NUMBER_TYPES(REGISTER_GATHER_ND_GPU);
TF_CALL_complex64(REGISTER_GATHER_ND_GPU);
TF_CALL_complex128(REGISTER_GATHER_ND_GPU);
diff --git a/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc b/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc
index b03efc684f..da8d2e9e3c 100644
--- a/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc
@@ -119,6 +119,8 @@ struct GatherNdSlice<GPUDevice, T, Index, IXDIM> {
DEFINE_GPU_SPECS_INDEX(T, int32); \
DEFINE_GPU_SPECS_INDEX(T, int64);
+TF_CALL_int32(DEFINE_GPU_SPECS);
+TF_CALL_int64(DEFINE_GPU_SPECS);
TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_SPECS);
TF_CALL_complex64(DEFINE_GPU_SPECS);
TF_CALL_complex128(DEFINE_GPU_SPECS);
diff --git a/tensorflow/core/kernels/gather_op.cc b/tensorflow/core/kernels/gather_op.cc
index ef332ebee3..094504d6b9 100644
--- a/tensorflow/core/kernels/gather_op.cc
+++ b/tensorflow/core/kernels/gather_op.cc
@@ -153,6 +153,7 @@ TF_CALL_uint64(REGISTER_GATHER_CPU);
// Registration of the GPU implementations.
#define REGISTER_GATHER_GPU(type) REGISTER_GATHER_ALL_INDICES(GPU, type)
+TF_CALL_int64(REGISTER_GATHER_GPU);
TF_CALL_GPU_NUMBER_TYPES(REGISTER_GATHER_GPU);
TF_CALL_complex64(REGISTER_GATHER_GPU);
TF_CALL_complex128(REGISTER_GATHER_GPU);
diff --git a/tensorflow/core/kernels/mkl_concat_op.cc b/tensorflow/core/kernels/mkl_concat_op.cc
index 5eeb23d810..31d1b949ef 100644
--- a/tensorflow/core/kernels/mkl_concat_op.cc
+++ b/tensorflow/core/kernels/mkl_concat_op.cc
@@ -14,6 +14,7 @@ limitations under the License.
#include <limits>
#include <vector>
+#include <unordered_map>
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/core/framework/op_kernel.h"
@@ -590,8 +591,8 @@ class MklConcatOp : public OpKernel {
const int N = input_tensors.size();
// Get Tensor shapes.
- std::vector<MklDnnShape> input_shapes(N);
- GetMklShapeList(context, "values", &input_shapes);
+ std::vector<MklDnnShape> mkl_input_shapes(N);
+ GetMklShapeList(context, "values", &mkl_input_shapes);
const Tensor& concat_dim_tensor = (AxisArgName == NAME_IS_CONCAT_DIM)
? MklGetInput(context, 0)
@@ -610,19 +611,14 @@ class MklConcatOp : public OpKernel {
int i = 0;
bool invoke_eigen = false;
bool are_all_mkl_inputs = true, are_all_tf_inputs = true;
- const TensorShape expected_shape = input_shapes[0].IsMklTensor()
- ? input_shapes[0].GetTfShape()
- : input_tensors[0].shape();
+ const TensorShape expected_shape = mkl_input_shapes[0].IsMklTensor()
+ ? mkl_input_shapes[0].GetTfShape()
+ : input_tensors[0].shape();
size_t expected_dims = expected_shape.dims();
if (concat_dim < 0) concat_dim = expected_dims + concat_dim;
- for (auto& s : input_shapes) {
- if (s == expected_shape) {
- ++i;
- continue;
- }
-
+ for (auto& s : mkl_input_shapes) {
TensorShape s_shape =
s.IsMklTensor() ? s.GetTfShape() : input_tensors[i].shape();
size_t s_dims = s_shape.dims();
@@ -665,21 +661,14 @@ class MklConcatOp : public OpKernel {
// Call Eigen library
if (invoke_eigen) {
- TensorShapeList tf_input_shapes;
- i = 0;
- for (auto& s : input_shapes) {
- TensorShape s_shape =
- s.IsMklTensor() ? s.GetTfShape() : input_tensors[i].shape();
- tf_input_shapes.push_back(s_shape);
- ++i;
- }
- CallEigenVersion(context, input_tensors, tf_input_shapes);
+ CallEigenVersion(context, input_tensors, mkl_input_shapes);
return;
}
memory::dims dst_dims;
+
if (are_all_mkl_inputs)
- dst_dims = TFShapeToMklDnnDims(input_shapes[0].GetTfShape());
+ dst_dims = TFShapeToMklDnnDims(mkl_input_shapes[0].GetTfShape());
else
// When all the inputs are in Tensorflow format, we don't know
// what is the input data format. In that case, we just use
@@ -689,26 +678,61 @@ class MklConcatOp : public OpKernel {
std::vector<memory::primitive_desc> srcs_pd;
std::vector<MklDnnData<T>> srcs(N, MklDnnData<T>(&cpu_engine));
int64 dst_concat_dim_size = 0;
- for (int k = 0; k < N; k++) {
- bool is_mkl_tensor = input_shapes[k].IsMklTensor();
- memory::dims src_dims;
-
- // Same comment as dst_dims for src_dims.
- src_dims = (is_mkl_tensor)
- ? TFShapeToMklDnnDims(input_shapes[k].GetTfShape())
- : TFShapeToMklDnnDims(input_tensors[k].shape());
-
- dst_concat_dim_size += src_dims[concat_dim];
- auto src_md =
- is_mkl_tensor ? input_shapes[k].GetMklLayout() :
- // It does not matter what data format we use here
- // (NHWC or NCHW). We just need to ensure that output
- // of Concat uses same data format as input.
- memory::desc(src_dims, MklDnnType<T>(), memory::format::nchw);
-
- srcs[k].SetUsrMem(src_md, &input_tensors[k]);
- auto src_mpd = srcs[k].GetUsrMemPrimDesc();
- srcs_pd.push_back(src_mpd);
+
+ bool isMklReorderNeeded = false;
+ memory::format mkl_common_format = memory::format::any;
+ if (are_all_mkl_inputs) {
+ mkl_common_format =
+ FindMklCommonFormat(mkl_input_shapes, concat_dim,
+ &isMklReorderNeeded, &dst_concat_dim_size);
+
+ if (!isMklReorderNeeded) {
+ // All MKL tensors have a same format. Reorder is not needed.
+ for (int k = 0; k < N; k++) {
+ if (input_tensors[k].NumElements() == 0)
+ continue;
+
+ auto src_md = mkl_input_shapes[k].GetMklLayout();
+ srcs[k].SetUsrMem(src_md, &input_tensors[k]);
+ auto src_mpd = srcs[k].GetUsrMemPrimDesc();
+ srcs_pd.push_back(src_mpd);
+ }
+ } else {
+ // MKL tensors have different formats.
+ // Reorder them to most common format.
+ for (int k = 0; k < N; k++) {
+ if (input_tensors[k].NumElements() == 0)
+ continue;
+
+ auto src_dims = TFShapeToMklDnnDims(
+ mkl_input_shapes[k].GetTfShape());
+ auto src_md = mkl_input_shapes[k].GetMklLayout();
+ srcs[k].SetUsrMem(src_md, &input_tensors[k]);
+
+ if (src_md.data.format != mkl_common_format)
+ src_md = memory::desc(src_dims, MklDnnType<T>(),
+ mkl_common_format);
+
+ srcs_pd.push_back(memory::primitive_desc(src_md, cpu_engine));
+ }
+ }
+ } else { // All TF inputs
+ for (int k = 0; k < N; k++) {
+ if (input_tensors[k].NumElements() == 0)
+ continue;
+
+ memory::dims src_dims = TFShapeToMklDnnDims(input_tensors[k].shape());
+ dst_concat_dim_size += src_dims[concat_dim];
+
+ // It does not matter what data format to be used (NHWC versus NCHW).
+ // We just need to ensure that output uses same data format as inputs.
+ auto src_md =
+ memory::desc(src_dims, MklDnnType<T>(), memory::format::nchw);
+
+ srcs[k].SetUsrMem(src_md, &input_tensors[k]);
+ auto src_mpd = srcs[k].GetUsrMemPrimDesc();
+ srcs_pd.push_back(src_mpd);
+ }
}
dst_dims[concat_dim] = dst_concat_dim_size;
@@ -718,25 +742,33 @@ class MklConcatOp : public OpKernel {
if (are_all_mkl_inputs) {
// Since we are passing a specific format for destination,
// we need to have dst_dims in MklDnn order (NCHW).
- auto orig_tf_format = input_shapes[0].GetTfDataFormat();
+ auto orig_tf_format = mkl_input_shapes[0].GetTfDataFormat();
dst_dims_in_nchw = MklDnnDimsInNCHW(
dst_dims, MklDnnDataFormatToTFDataFormat(orig_tf_format));
- // We will set the output in the same format as input to avoid layout
- // conversions.
- // Currently we are setting dst format same as input format.
- // See if we can make this choice in a better way.
+ // Set the output format same as the most common format of inputs
+ // to avoid layout conversions.
dst_md = memory::desc(
- dst_dims_in_nchw, MklDnnType<T>(),
- (memory::format)input_shapes[0].GetMklLayout().data.format);
+ dst_dims_in_nchw, MklDnnType<T>(), mkl_common_format);
} else {
- // Again, format does not matter here. We just need to make it same as
- // input format.
+ // All inputs are TF tensors.
+ // Set the output format same as input format (nchw).
dst_md = memory::desc(dst_dims, MklDnnType<T>(), memory::format::nchw);
}
std::vector<primitive::at> inputs;
- for (int k = 0; k < input_tensors.size(); k++)
- inputs.push_back(srcs[k].GetOpMem());
+ std::vector<primitive> net;
+ if (isMklReorderNeeded) {
+ for (int k = 0; k < input_tensors.size(); k++) {
+ if (input_tensors[k].NumElements() > 0) {
+ srcs[k].CheckReorderToOpMem(srcs_pd[k], &net);
+ }
+ }
+ }
+ for (int k = 0; k < input_tensors.size(); k++) {
+ if (input_tensors[k].NumElements() > 0) {
+ inputs.push_back(srcs[k].GetOpMem());
+ }
+ }
// If all inputs are in MKL format, then meaning of concat_dim needs to
// change. Value of concat_dim is tied to input Tensorflow data format
@@ -745,7 +777,8 @@ class MklConcatOp : public OpKernel {
// But ifinput tensors are in NHWC order, then semantics need to change.
// E.g., if we are concatinating over Channel (dimension 3 for NHWC),
// then since MklDnn order is NCHW, concat_dim needs to be 1.
- if (are_all_mkl_inputs) concat_dim = input_shapes[0].TfDimIdx(concat_dim);
+ if (are_all_mkl_inputs)
+ concat_dim = mkl_input_shapes[0].TfDimIdx(concat_dim);
auto concat_pd = concat::primitive_desc(dst_md, concat_dim, srcs_pd);
@@ -758,7 +791,7 @@ class MklConcatOp : public OpKernel {
dnn_shape_dst.SetMklLayout(&dst_pd);
dnn_shape_dst.SetElemType(MklDnnType<T>());
dnn_shape_dst.SetTfLayout(dst_dims.size(), dst_dims_in_nchw,
- input_shapes[0].GetTfDataFormat());
+ mkl_input_shapes[0].GetTfDataFormat());
tf_shape_dst.AddDim((dst_pd.get_size() / sizeof(T)));
} else {
dnn_shape_dst.SetMklTensor(false);
@@ -773,7 +806,6 @@ class MklConcatOp : public OpKernel {
dst.SetUsrMem(dst_md, dst_tensor);
auto concat_op = concat(concat_pd, inputs, dst.GetOpMem());
- std::vector<primitive> net;
net.push_back(concat_op);
stream(stream::kind::eager).submit(net).wait();
} catch (mkldnn::error& e) {
@@ -787,15 +819,27 @@ class MklConcatOp : public OpKernel {
}
void CallEigenVersion(OpKernelContext* context, const OpInputList& values,
- const TensorShapeList& input_shapes) {
- CHECK_EQ(values.size(), input_shapes.size());
+ const MklDnnShapeList& mkl_input_shapes) {
+ CHECK_EQ(values.size(), mkl_input_shapes.size());
std::vector<Tensor> converted_values;
- for (int i = 0; i < input_shapes.size(); i++)
- converted_values.push_back(values[i]);
+ TensorShapeList tf_input_shapes;
+ for (int i = 0; i < mkl_input_shapes.size(); i++) {
+ if (mkl_input_shapes[i].IsMklTensor()) {
+ // do conversion from MKL to TF
+ Tensor tmp_tensor =
+ ConvertMklToTF<T>(context, values[i], mkl_input_shapes[i]);
+ converted_values.push_back(tmp_tensor);
+ tf_input_shapes.push_back(mkl_input_shapes[i].GetTfShape());
+ } else {
+ // no conversion since it is TF tensor already
+ converted_values.push_back(values[i]);
+ tf_input_shapes.push_back(values[i].shape());
+ }
+ }
// Call Eigen concat.
- eigen_concat_op_.Compute(context, converted_values, input_shapes);
+ eigen_concat_op_.Compute(context, converted_values, tf_input_shapes);
// Set output Mkl tensor for this op.
MklDnnShape dnn_shape_output;
@@ -812,6 +856,55 @@ class MklConcatOp : public OpKernel {
output_tensor->flat<uint8>().data(),
output_tensor->flat<uint8>().size() * sizeof(uint8));
}
+
+ // This method finds the most commom format accross all MKL inputs
+ // Inputs:
+ // 1. input_shapes: shapes of input (MKL) tensors.
+ // 2. concat_dim: concat dimension.
+ // Outputs:
+ // 1. is_reorder_needed is set to true if inputs have difference formats
+ // It is set to false otherwise.
+ // 2. concat_dim_size is the size of concat_dim.
+ // Return:
+ // return the common MKL format.
+ memory::format FindMklCommonFormat(const MklDnnShapeList& input_shapes,
+ int concat_dim, bool* is_reorder_needed, int64* concat_dim_size) {
+ *is_reorder_needed = false;
+ *concat_dim_size = 0;
+ std::unordered_map<int, int> occurrence_map;
+ if (input_shapes.size() == 0)
+ return memory::format::any;
+
+ // Compute ocurrences of each format of all inputs.
+ for (int k=0; k <input_shapes.size(); k++) {
+ auto src_dims = TFShapeToMklDnnDims(input_shapes[k].GetTfShape());
+ *concat_dim_size += src_dims[concat_dim];
+ int fmt = static_cast<int>(
+ input_shapes[k].GetMklLayout().data.format);
+ occurrence_map[fmt] += 1;
+ }
+
+ if (occurrence_map.size() == 1) {
+ // this means that all inputs have a same format
+ // return it with is_reorder_needed set false.
+ return static_cast<memory::format>(
+ input_shapes[0].GetMklLayout().data.format);
+ }
+
+ // Input tensors have different formats. Thus, reorder is needed.
+ // We pick up the most common format to minimize the total
+ // number of input reorder.
+ memory::format commonest_format = memory::format::any;
+ int max_occurrence = 0;
+ *is_reorder_needed = true;
+ for (auto item : occurrence_map) {
+ if (item.second > max_occurrence) {
+ commonest_format = static_cast<memory::format>(item.first);
+ max_occurrence = item.second;
+ }
+ }
+ return commonest_format;
+ }
};
#endif
diff --git a/tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc
index c1da0ded1d..f857be6c32 100644
--- a/tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc
@@ -18,6 +18,7 @@ limitations under the License.
// bias.
#ifdef INTEL_MKL
+#ifdef INTEL_MKL_ML
#define USE_EIGEN_TENSOR
#define EIGEN_USE_THREADS
@@ -264,4 +265,5 @@ class MklConv2DCustomBackpropBiasOp : public OpKernel {
TF_CALL_float(REGISTER_CPU_KERNELS);
#undef REGISTER_CPU_KERNELS
} /* namespace tensorflow */
+#endif /* INTEL_MKL_ML */
#endif /* INTEL_MKL */
diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.h b/tensorflow/core/kernels/mkl_pooling_ops_common.h
index 279167aba2..c0dfed7d7d 100644
--- a/tensorflow/core/kernels/mkl_pooling_ops_common.h
+++ b/tensorflow/core/kernels/mkl_pooling_ops_common.h
@@ -199,13 +199,15 @@ class MklPoolingForwardOpBase : public MklPoolingOpBase<T> {
CHECK_NOTNULL(pool_params);
CHECK_NOTNULL(dnn_data_input);
TensorShape input_tensor_shape = input_tensor.shape();
- memory::desc input_md =
+ if (input_tensor.NumElements() != 0) {
+ memory::desc input_md =
input_mkl_shape.IsMklTensor()
? input_mkl_shape.GetMklLayout()
: memory::desc(TFShapeToMklDnnDimsInNCHW(input_tensor_shape,
this->data_format_tf_),
MklDnnType<T>(), this->data_format_mkldnn_);
- dnn_data_input->SetUsrMem(input_md, &input_tensor);
+ dnn_data_input->SetUsrMem(input_md, &input_tensor);
+ }
this->InitMklPoolParameters(context, pool_params, input_mkl_shape,
input_tensor_shape);
}
diff --git a/tensorflow/core/kernels/scatter_nd_op.cc b/tensorflow/core/kernels/scatter_nd_op.cc
index 43c5b29509..e1fc2ea128 100644
--- a/tensorflow/core/kernels/scatter_nd_op.cc
+++ b/tensorflow/core/kernels/scatter_nd_op.cc
@@ -292,6 +292,7 @@ TF_CALL_string(REGISTER_SCATTER_ND_CPU);
REGISTER_SCATTER_ND_UPDATE_GPU(type); \
REGISTER_SCATTER_ND_GPU(type);
+TF_CALL_int32(REGISTER_SCATTER_ND_ALL_GPU);
// TODO(b/66916790): Support half types in ScatterNd.
TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ND_ALL_GPU);
TF_CALL_complex64(REGISTER_SCATTER_ND_ALL_GPU);
@@ -306,6 +307,8 @@ TF_CALL_complex128(REGISTER_SCATTER_ND_ALL_GPU);
#define REGISTER_SCATTER_ND_UPDATE_SYCL(type) \
REGISTER_SCATTER_ND_UPDATE(type, SYCL);
+TF_CALL_int32(REGISTER_SCATTER_ND_ADD_SUB_SYCL);
+TF_CALL_int32(REGISTER_SCATTER_ND_UPDATE_SYCL);
TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ND_ADD_SUB_SYCL);
TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ND_UPDATE_SYCL);
#undef REGISTER_SCATTER_ND_ADD_SUB_SYCL
@@ -576,6 +579,7 @@ namespace functor {
DECLARE_GPU_SPECS_INDEX(T, int32); \
DECLARE_GPU_SPECS_INDEX(T, int64)
+TF_CALL_int32(DECLARE_GPU_SPECS);
// TODO(b/66916790): Support half types in ScatterNd.
TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS);
TF_CALL_complex64(DECLARE_GPU_SPECS);
diff --git a/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc b/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc
index a3c21edc15..08b657f4c3 100644
--- a/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc
@@ -170,6 +170,7 @@ struct ScatterNdFunctor<GPUDevice, T, Index, op, IXDIM> {
DECLARE_GPU_SPECS_INDEX(T, int32); \
DECLARE_GPU_SPECS_INDEX(T, int64)
+TF_CALL_int32(DECLARE_GPU_SPECS);
TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS);
TF_CALL_complex64(DECLARE_GPU_SPECS);
TF_CALL_complex128(DECLARE_GPU_SPECS);
diff --git a/tensorflow/core/kernels/scoped_allocator_ops_test.cc b/tensorflow/core/kernels/scoped_allocator_ops_test.cc
index bb0129fa6f..634f9ba887 100644
--- a/tensorflow/core/kernels/scoped_allocator_ops_test.cc
+++ b/tensorflow/core/kernels/scoped_allocator_ops_test.cc
@@ -216,8 +216,13 @@ TEST_F(ScopedAllocatorConcatOpTest, Success3) {
}
TEST_F(ScopedAllocatorConcatOpTest, Reshape) {
- MakeOp({2, 2, 2}, DT_DOUBLE, true, "test", 120, 2);
- ExecOp(DT_DOUBLE, 120, {{2, 2}, {2, 2}});
+ MakeOp({2, 2, 4}, DT_DOUBLE, true, "test", 120, 2);
+
+ // The elements of the third parameter to ExecOp must be multiples of
+ // Allocator::kAllocatorAlignment in size. If they are not, the backing
+ // tensor allocated by PrepOp will have too many elements and reshaping
+ // will fail.
+ ExecOp(DT_DOUBLE, 120, {{2, 4}, {2, 4}});
}
TEST_F(ScopedAllocatorConcatOpTest, NoReshapeAttr) {
diff --git a/tensorflow/core/kernels/segment_reduction_ops.h b/tensorflow/core/kernels/segment_reduction_ops.h
index 7796bf3587..d65692a552 100644
--- a/tensorflow/core/kernels/segment_reduction_ops.h
+++ b/tensorflow/core/kernels/segment_reduction_ops.h
@@ -16,6 +16,14 @@ limitations under the License.
#ifndef TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_
#define TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_
+
+// This file requires the following include because it uses CudaAtomicMax:
+// #include "tensorflow/core/util/cuda_kernel_helper.h"
+
+// Unfortunately we can't add the #include, since it breaks compilation for
+// non-GPU targets. This only breaks in clang, because it's more strict for
+// template code and CudaAtomicMax is used in template context.
+
// This file requires the following include because it uses CudaAtomicMax:
// #include "tensorflow/core/util/cuda_kernel_helper.h"
@@ -130,4 +138,4 @@ struct Highest {
} // namespace functor
} // namespace tensorflow
-#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_
+#endif // TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_
diff --git a/tensorflow/core/kernels/sparse_matmul_op.cc b/tensorflow/core/kernels/sparse_matmul_op.cc
index a1f9667b78..866c5dcd52 100644
--- a/tensorflow/core/kernels/sparse_matmul_op.cc
+++ b/tensorflow/core/kernels/sparse_matmul_op.cc
@@ -1490,7 +1490,7 @@ inline void LibxsmmSparseMatMul<TL, TR>::Compute(
#endif // TENSORFLOW_USE_LIBXSMM
-// Here is a an overview of the SparseMatMul code. Note that we assume that the
+// Here is an overview of the SparseMatMul code. Note that we assume that the
// left matrix is sparse.
//
// The matrix "left" is divided into a grid with blocksize of (M, KL). Each
diff --git a/tensorflow/core/kernels/string_split_op.cc b/tensorflow/core/kernels/string_split_op.cc
index 4c2b312c34..26ab72f12e 100644
--- a/tensorflow/core/kernels/string_split_op.cc
+++ b/tensorflow/core/kernels/string_split_op.cc
@@ -22,6 +22,7 @@ limitations under the License.
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
#include "tensorflow/core/lib/strings/str_util.h"
namespace tensorflow {
@@ -43,6 +44,63 @@ std::vector<string> Split(const string& str, const string& delimiter,
return char_vector;
}
+std::vector<string> SplitV2(const string& str, StringPiece sep, int maxsplit) {
+ // This SplitV2 method matches the behavior of python's str.split:
+ // If sep is given, consecutive delimiters are not grouped together
+ // and are deemed to delimit empty strings (for example, '1,,2'.split(',')
+ // returns ['1', '', '2']). The sep argument may consist of multiple
+ // characters (for example, '1<>2<>3'.split('<>') returns ['1', '2', '3']).
+ // Splitting an empty string with a specified separator returns [''].
+ //
+ // If sep is not specified or is None, a different splitting algorithm is
+ // applied: runs of consecutive whitespace are regarded as a single
+ // separator, and the result will contain no empty strings at the start or
+ // end if the string has leading or trailing whitespace. Consequently,
+ // splitting an empty string or a string consisting of just whitespace
+ // with a None separator returns [].
+
+ std::vector<string> result;
+
+ StringPiece text(str);
+ if (maxsplit == 0) {
+ result.emplace_back(std::string(text));
+ return result;
+ }
+
+ if (sep.empty()) {
+ StringPiece token;
+ // Remove leading whitespaces.
+ str_util::RemoveLeadingWhitespace(&text);
+ int split = 0;
+ while (str_util::ConsumeNonWhitespace(&text, &token)) {
+ result.emplace_back(std::string(token));
+ str_util::RemoveLeadingWhitespace(&text);
+ ++split;
+ if (maxsplit > 0 && split == maxsplit) {
+ result.emplace_back(std::string(text));
+ return result;
+ }
+ }
+ return result;
+ }
+ auto p = std::search(text.begin(), text.end(), sep.begin(), sep.end());
+ int split = 0;
+ while (p != text.end()) {
+ StringPiece token = text.substr(0, p - text.begin());
+ result.emplace_back(std::string(token));
+ text.remove_prefix(token.size());
+ text.remove_prefix(sep.size());
+ ++split;
+ if (maxsplit > 0 && split == maxsplit) {
+ result.emplace_back(std::string(text));
+ return result;
+ }
+ p = std::search(text.begin(), text.end(), sep.begin(), sep.end());
+ }
+ result.emplace_back(std::string(text));
+ return result;
+}
+
} // namespace
class StringSplitOp : public OpKernel {
@@ -122,6 +180,78 @@ class StringSplitOp : public OpKernel {
bool skip_empty_;
};
+class StringSplitV2Op : public OpKernel {
+ public:
+ explicit StringSplitV2Op(OpKernelConstruction* context)
+ : OpKernel(context), maxsplit_(-1) {
+ OP_REQUIRES_OK(context, context->GetAttr("maxsplit", &maxsplit_));
+ }
+
+ void Compute(OpKernelContext* ctx) override {
+ const Tensor* input_tensor;
+ OP_REQUIRES_OK(ctx, ctx->input("input", &input_tensor));
+ OP_REQUIRES(ctx, TensorShapeUtils::IsVector(input_tensor->shape()),
+ errors::InvalidArgument("input must be a vector, got shape: ",
+ input_tensor->shape().DebugString()));
+
+ const auto input_vec = input_tensor->vec<string>();
+ const int64 batch_size = input_vec.dimension(0);
+
+ const Tensor* sep_tensor;
+ OP_REQUIRES_OK(ctx, ctx->input("sep", &sep_tensor));
+ OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(sep_tensor->shape()),
+ errors::InvalidArgument("sep must be a scalar, got shape: ",
+ sep_tensor->shape().DebugString()));
+ const auto sep_vec = sep_tensor->flat<string>();
+ StringPiece sep(sep_vec(0));
+ std::vector<string> tokens;
+ // Guess that we'll be unpacking a handful of tokens per example.
+ static constexpr int kReserveSize = 4;
+ tokens.reserve(batch_size * kReserveSize);
+
+ int64 output_size = 0;
+ int64 max_num_entries = 0;
+ std::vector<int64> num_indices(batch_size);
+ for (int64 i = 0; i < batch_size; ++i) {
+ std::vector<string> parts = SplitV2(input_vec(i), sep, maxsplit_);
+ int64 n_entries = parts.size();
+ num_indices[i] = n_entries;
+ output_size += n_entries;
+ max_num_entries = std::max(max_num_entries, n_entries);
+ tokens.insert(tokens.end(), parts.begin(), parts.end());
+ }
+
+ Tensor* sp_indices_t;
+ OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({output_size, 2}),
+ &sp_indices_t));
+ Tensor* sp_tokens_t;
+ OP_REQUIRES_OK(
+ ctx, ctx->allocate_output(1, TensorShape({output_size}), &sp_tokens_t));
+ Tensor* sp_shape_t;
+ OP_REQUIRES_OK(ctx, ctx->allocate_output(2, TensorShape({2}), &sp_shape_t));
+
+ auto sp_indices = sp_indices_t->matrix<int64>();
+ auto sp_tokens = sp_tokens_t->vec<string>();
+ auto sp_shape = sp_shape_t->vec<int64>();
+ sp_shape(0) = batch_size;
+ sp_shape(1) = max_num_entries;
+ size_t c = 0;
+ for (size_t i = 0; i < batch_size; ++i) {
+ for (size_t j = 0; j < num_indices[i]; ++j) {
+ sp_indices(c, 0) = i;
+ sp_indices(c, 1) = j;
+ sp_tokens(c) = tokens[c];
+ ++c;
+ }
+ }
+ }
+
+ private:
+ int maxsplit_;
+};
+
REGISTER_KERNEL_BUILDER(Name("StringSplit").Device(DEVICE_CPU), StringSplitOp);
+REGISTER_KERNEL_BUILDER(Name("StringSplitV2").Device(DEVICE_CPU),
+ StringSplitV2Op);
} // namespace tensorflow
diff --git a/tensorflow/core/ops/candidate_sampling_ops.cc b/tensorflow/core/ops/candidate_sampling_ops.cc
index 6e4d100b04..6e589c8d1c 100644
--- a/tensorflow/core/ops/candidate_sampling_ops.cc
+++ b/tensorflow/core/ops/candidate_sampling_ops.cc
@@ -145,12 +145,15 @@ REGISTER_OP("ComputeAccidentalHits")
int64 num_true;
TF_RETURN_IF_ERROR(c->GetAttr("num_true", &num_true));
- // Validate true_classes.
+ // Validate true_classes, must be a matrix.
ShapeHandle true_classes;
TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &true_classes));
DimensionHandle unused;
TF_RETURN_IF_ERROR(
c->WithValue(c->Dim(true_classes, 1), num_true, &unused));
+ // Validate sampled_candidates, must be a vector.
+ ShapeHandle sampled_candidates;
+ TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &sampled_candidates));
// All three outputs are the same shape.
ShapeHandle v = c->Vector(InferenceContext::kUnknownDim);
diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc
index 15e0ca8af9..9dca5f53ce 100644
--- a/tensorflow/core/ops/dataset_ops.cc
+++ b/tensorflow/core/ops/dataset_ops.cc
@@ -218,7 +218,17 @@ REGISTER_OP("MapAndBatchDataset")
.Attr("Targuments: list(type) >= 0")
.Attr("output_types: list(type) >= 1")
.Attr("output_shapes: list(shape) >= 1")
- .SetShapeFn(shape_inference::ScalarShape);
+ .SetShapeFn([](shape_inference::InferenceContext* c) {
+ // Use index from the end to retrieve the Input shapes,
+ // so that to avoid guessing the length of "other_arguments".
+ // batch_size, num_parallel_batches, and drop_remainder are 0-D scalars.
+ shape_inference::ShapeHandle unused;
+ TF_RETURN_IF_ERROR(c->WithRank(c->input(c->num_inputs() - 3), 0, &unused));
+ TF_RETURN_IF_ERROR(c->WithRank(c->input(c->num_inputs() - 2), 0, &unused));
+ TF_RETURN_IF_ERROR(c->WithRank(c->input(c->num_inputs() - 1), 0, &unused));
+
+ return shape_inference::ScalarShape(c);
+ });
REGISTER_OP("MapAndBatchDatasetV2")
.Input("input_dataset: variant")
@@ -231,7 +241,17 @@ REGISTER_OP("MapAndBatchDatasetV2")
.Attr("Targuments: list(type) >= 0")
.Attr("output_types: list(type) >= 1")
.Attr("output_shapes: list(shape) >= 1")
- .SetShapeFn(shape_inference::ScalarShape);
+ .SetShapeFn([](shape_inference::InferenceContext* c) {
+ // Use index from the end to retrieve the Input shapes,
+ // so that to avoid guessing the length of "other_arguments".
+ // batch_size, num_parallel_calls, and drop_remainder are 0-D scalars.
+ shape_inference::ShapeHandle unused;
+ TF_RETURN_IF_ERROR(c->WithRank(c->input(c->num_inputs() - 3), 0, &unused));
+ TF_RETURN_IF_ERROR(c->WithRank(c->input(c->num_inputs() - 2), 0, &unused));
+ TF_RETURN_IF_ERROR(c->WithRank(c->input(c->num_inputs() - 1), 0, &unused));
+
+ return shape_inference::ScalarShape(c);
+ });
REGISTER_OP("PrefetchDataset")
.Input("input_dataset: variant")
diff --git a/tensorflow/core/ops/image_ops.cc b/tensorflow/core/ops/image_ops.cc
index d949e70c66..87f4991134 100644
--- a/tensorflow/core/ops/image_ops.cc
+++ b/tensorflow/core/ops/image_ops.cc
@@ -454,7 +454,9 @@ REGISTER_OP("DrawBoundingBoxes")
DimensionHandle unused;
TF_RETURN_IF_ERROR(c->WithValue(c->Dim(boxes, 2), 4, &unused));
- return shape_inference::UnchangedShapeWithRankAtLeast(c, 3);
+ // The rank of the input image (rank = 4) has already been restricted
+ // above, and the output is of the same shape as the input.
+ return shape_inference::UnchangedShape(c);
});
// --------------------------------------------------------------------------
diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index 1740fa152c..b3487122e2 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@@ -1084,7 +1084,7 @@ REGISTER_OP("UnsortedSegmentProd")
.Input("segment_ids: Tindices")
.Input("num_segments: Tnumsegments")
.Output("output: T")
- .Attr("T: realnumbertype")
+ .Attr("T: numbertype")
.Attr("Tindices: {int32,int64}")
.Attr("Tnumsegments: {int32,int64} = DT_INT32")
.SetShapeFn(UnsortedSegmentReductionShapeFn);
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index fc60e807b9..41efa49ce3 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -1453,6 +1453,7 @@ REGISTER_OP("QuantizedReluX")
ShapeHandle unused;
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
+ TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
c->set_output(1, c->Scalar());
c->set_output(2, c->Scalar());
return Status::OK();
diff --git a/tensorflow/core/ops/string_ops.cc b/tensorflow/core/ops/string_ops.cc
index 1d5c743a56..4423062362 100644
--- a/tensorflow/core/ops/string_ops.cc
+++ b/tensorflow/core/ops/string_ops.cc
@@ -78,7 +78,7 @@ REGISTER_OP("ReduceJoin")
REGISTER_OP("AsString")
.Input("input: T")
.Output("output: string")
- .Attr("T: {int32, int64, complex64, float, double, bool, int8}")
+ .Attr("T: {int8, int16, int32, int64, complex64, float, double, bool}")
.Attr("precision: int = -1")
.Attr("scientific: bool = false")
.Attr("shortest: bool = false")
@@ -134,6 +134,24 @@ REGISTER_OP("StringSplit")
return Status::OK();
});
+REGISTER_OP("StringSplitV2")
+ .Input("input: string")
+ .Input("sep: string")
+ .Output("indices: int64")
+ .Output("values: string")
+ .Output("shape: int64")
+ .Attr("maxsplit: int = -1")
+ .SetShapeFn([](InferenceContext* c) {
+ ShapeHandle unused;
+ TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &unused));
+ TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+
+ c->set_output(0, c->Matrix(InferenceContext::kUnknownDim, 2));
+ c->set_output(1, c->Vector(InferenceContext::kUnknownDim));
+ c->set_output(2, c->Vector(2));
+ return Status::OK();
+ });
+
REGISTER_OP("StringStrip")
.Input("input: string")
.Output("output: string")
diff --git a/tensorflow/core/platform/cpu_info.cc b/tensorflow/core/platform/cpu_info.cc
index 99de364042..e9da3d8e32 100644
--- a/tensorflow/core/platform/cpu_info.cc
+++ b/tensorflow/core/platform/cpu_info.cc
@@ -344,5 +344,28 @@ int CPUModelNum() {
#endif
}
+int CPUIDNumSMT() {
+#ifdef PLATFORM_IS_X86
+ // https://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration
+ // https://software.intel.com/en-us/articles/intel-sdm (Vol 3A)
+ // Section: Detecting Hardware Multi-threads Support and Topology
+ // Uses CPUID Leaf 11 to enumerate system topology on Intel x86 architectures
+ // Other cases not supported
+ uint32 eax, ebx, ecx, edx;
+ // Check if system supports Leaf 11
+ GETCPUID(eax, ebx, ecx, edx, 0, 0);
+ if (eax >= 11) {
+ // 1) Leaf 11 available? CPUID.(EAX=11, ECX=0):EBX != 0
+ // 2) SMT_Mask_Width = CPUID.(EAX=11, ECX=0):EAX[4:0] if CPUID.(EAX=11,
+ // ECX=0):ECX[15:8] is 1
+ GETCPUID(eax, ebx, ecx, edx, 11, 0);
+ if (ebx != 0 && ((ecx & 0xff00) >> 8) == 1) {
+ return 1 << (eax & 0x1f); // 2 ^ SMT_Mask_Width
+ }
+ }
+#endif // PLATFORM_IS_X86
+ return 0;
+}
+
} // namespace port
} // namespace tensorflow
diff --git a/tensorflow/core/platform/cpu_info.h b/tensorflow/core/platform/cpu_info.h
index b5be7e8b54..175c9ae8b1 100644
--- a/tensorflow/core/platform/cpu_info.h
+++ b/tensorflow/core/platform/cpu_info.h
@@ -35,6 +35,10 @@ namespace port {
// software can change it dynamically.
int NumSchedulableCPUs();
+// Returns an estimate of the number of hyperthreads per physical core
+// on the CPU
+int NumHyperthreadsPerCore();
+
// Mostly ISA related features that we care about
enum CPUFeature {
// Do not change numeric assignments.
@@ -107,6 +111,9 @@ int CPUModelNum();
// Returns nominal core processor cycles per second of each processor.
double NominalCPUFrequency();
+// Returns num of hyperthreads per physical core
+int CPUIDNumSMT();
+
} // namespace port
} // namespace tensorflow
diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl
index ae81f9b5b3..a319ccbdbe 100644
--- a/tensorflow/core/platform/default/build_config.bzl
+++ b/tensorflow/core/platform/default/build_config.bzl
@@ -71,6 +71,8 @@ def pyx_library(
name = filename + "_cython_translation",
srcs = [filename],
outs = [filename.split(".")[0] + ".cpp"],
+ # Optionally use PYTHON_BIN_PATH on Linux platforms so that python 3
+ # works. Windows has issues with cython_binary so skip PYTHON_BIN_PATH.
cmd = "PYTHONHASHSEED=0 $(location @cython//:cython_binary) --cplus $(SRCS) --output-file $(OUTS)",
tools = ["@cython//:cython_binary"] + pxd_srcs,
)
diff --git a/tensorflow/core/platform/hadoop/hadoop_file_system.cc b/tensorflow/core/platform/hadoop/hadoop_file_system.cc
index 72c12318ca..ff4b4436bb 100644
--- a/tensorflow/core/platform/hadoop/hadoop_file_system.cc
+++ b/tensorflow/core/platform/hadoop/hadoop_file_system.cc
@@ -115,18 +115,17 @@ class LibHDFS {
const char* kLibHdfsDso = "libhdfs.so";
#endif
char* hdfs_home = getenv("HADOOP_HDFS_HOME");
- if (hdfs_home == nullptr) {
- status_ = errors::FailedPrecondition(
- "Environment variable HADOOP_HDFS_HOME not set");
- return;
- }
- string path = io::JoinPath(hdfs_home, "lib", "native", kLibHdfsDso);
- status_ = TryLoadAndBind(path.c_str(), &handle_);
- if (!status_.ok()) {
- // try load libhdfs.so using dynamic loader's search path in case
- // libhdfs.so is installed in non-standard location
- status_ = TryLoadAndBind(kLibHdfsDso, &handle_);
+ if (hdfs_home != nullptr) {
+ string path = io::JoinPath(hdfs_home, "lib", "native", kLibHdfsDso);
+ status_ = TryLoadAndBind(path.c_str(), &handle_);
+ if (status_.ok()) {
+ return;
+ }
}
+
+ // Try to load the library dynamically in case it has been installed
+ // to a in non-standard location.
+ status_ = TryLoadAndBind(kLibHdfsDso, &handle_);
}
Status status_;
diff --git a/tensorflow/core/platform/posix/port.cc b/tensorflow/core/platform/posix/port.cc
index 8e316472fe..708f32ba80 100644
--- a/tensorflow/core/platform/posix/port.cc
+++ b/tensorflow/core/platform/posix/port.cc
@@ -74,6 +74,11 @@ int NumSchedulableCPUs() {
return kDefaultCores;
}
+int NumHyperthreadsPerCore() {
+ static const int ht_per_core = tensorflow::port::CPUIDNumSMT();
+ return (ht_per_core > 0) ? ht_per_core : 1;
+}
+
void* AlignedMalloc(size_t size, int minimum_alignment) {
#if defined(__ANDROID__)
return memalign(minimum_alignment, size);
diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index 522a9d84fd..cb1fd09dbb 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -19,12 +19,12 @@ limitations under the License.
// TensorFlow uses semantic versioning, see http://semver.org/.
#define TF_MAJOR_VERSION 1
-#define TF_MINOR_VERSION 8
+#define TF_MINOR_VERSION 9
#define TF_PATCH_VERSION 0
// TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
// "-beta", "-rc", "-rc.1")
-#define TF_VERSION_SUFFIX ""
+#define TF_VERSION_SUFFIX "-rc0"
#define TF_STR_HELPER(x) #x
#define TF_STR(x) TF_STR_HELPER(x)
diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h
index dffc965b14..90b6533690 100644
--- a/tensorflow/core/util/mkl_util.h
+++ b/tensorflow/core/util/mkl_util.h
@@ -42,6 +42,7 @@ limitations under the License.
#ifndef INTEL_MKL_ML
#include "mkldnn.hpp"
+#include "tensorflow/core/lib/core/stringpiece.h"
using mkldnn::engine;
using mkldnn::memory;
@@ -712,15 +713,48 @@ inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor,
return output_tensor;
}
#else
+using mkldnn::stream;
+template <typename T> class MklDnnData;
+
template <typename T>
inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor,
const MklDnnShape& mkl_shape) {
Tensor output_tensor;
- TensorShape output_shape;
-
- TF_CHECK_OK(
- Status(error::Code::UNIMPLEMENTED, "Unimplemented conversion function"));
-
+ try {
+ if (!mkl_shape.IsMklTensor())
+ return mkl_tensor; // return input since it is already TF tensor
+
+ TensorShape output_shape = mkl_shape.GetTfShape();;
+
+ // Allocate output tensor.
+ context->allocate_temp(DataTypeToEnum<T>::v(),
+ output_shape, &output_tensor);
+
+ auto cpu_engine = engine(engine::cpu, 0);
+ MklDnnData<T> input(&cpu_engine);
+
+ // Get Mkl layout of input tensor.
+ auto input_mkl_md = mkl_shape.GetMklLayout();
+ auto output_tf_md = mkl_shape.GetTfLayout();
+ auto output_tf_pd = memory::primitive_desc(output_tf_md, cpu_engine);
+ input.SetUsrMem(input_mkl_md, &mkl_tensor);
+
+ // reorder
+ if (input.IsReorderNeeded(output_tf_pd)) {
+ std::vector<primitive> net;
+ CHECK_EQ(input.CheckReorderToOpMem(output_tf_pd, &output_tensor, &net),
+ true);
+ stream(stream::kind::eager).submit(net).wait();
+ } else {
+ // If not, just forward input tensor to output tensor.
+ CHECK(output_tensor.CopyFrom(mkl_tensor, output_shape));
+ }
+ } catch (mkldnn::error& e) {
+ string error_msg = "Status: " + std::to_string(e.status) +
+ ", message: " + string(e.message) + ", in file " +
+ string(__FILE__) + ":" + std::to_string(__LINE__);
+ LOG(FATAL) << "Operation received an exception: " << error_msg;
+ }
return output_tensor;
}
#endif
@@ -1843,7 +1877,7 @@ class FactoryKeyCreator {
template <typename T>
void AddAsKey(const T data) {
auto buffer = reinterpret_cast<const char *>(&data);
- Append(absl::string_view(buffer, sizeof(T)));
+ Append(StringPiece(buffer, sizeof(T)));
}
std::string GetKey() {
@@ -1854,8 +1888,8 @@ class FactoryKeyCreator {
string key_;
const char delimiter = 'x';
const int kMaxKeyLength = 256;
- void Append(absl::string_view s) {
- key_.append(string(s));
+ void Append(StringPiece s) {
+ key_.append(s.ToString());
key_.append(1, delimiter);
}
};
diff --git a/tensorflow/docs_src/community/groups.md b/tensorflow/docs_src/community/groups.md
index d92f5775fa..0b07d413da 100644
--- a/tensorflow/docs_src/community/groups.md
+++ b/tensorflow/docs_src/community/groups.md
@@ -1,17 +1,38 @@
# User Groups
-TensorFlow has communities around the world.
+TensorFlow has communities around the world. [Submit your community!](https://docs.google.com/forms/d/e/1FAIpQLSc_RQIUYtVgLLihzATaO_WUXkEyBDE_OoRoOXYDPmBEvHuEBA/viewform)
## Asia
-* [TensorFlow Korea (TF-KR) User Group](https://www.facebook.com/groups/TensorFlowKR/) _(Korean language)_
-* [TensorFlow User Group Tokyo](https://tfug-tokyo.connpass.com/) _(Japanese Language)_
-* [Soleil Data Dojo](https://soleildatadojo.connpass.com/) _(Japanese language)_
+* [TensorFlow China community](https://www.tensorflowers.cn)
+* [TensorFlow Korea (TF-KR) User Group](https://www.facebook.com/groups/TensorFlowKR/)
+* [TensorFlow User Group Tokyo](https://tfug-tokyo.connpass.com/)
+* [Soleil Data Dojo](https://soleildatadojo.connpass.com/)
* [TensorFlow User Group Utsunomiya](https://tfug-utsunomiya.connpass.com/)
+* [TensorFlow Philippines Community](https://www.facebook.com/groups/TensorFlowPH/)
+* [TensorFlow and Deep Learning Singapore](https://www.meetup.com/TensorFlow-and-Deep-Learning-Singapore/)
+* [TensorFlow India](https://www.facebook.com/tensorflowindia)
## Europe
* [TensorFlow Barcelona](https://www.meetup.com/Barcelona-Machine-Learning-Meetup/)
* [TensorFlow Madrid](https://www.meetup.com/TensorFlow-Madrid/)
+* [Tensorflow Belgium](https://www.meetup.com/TensorFlow-Belgium)
+* [TensorFlow x Rome Meetup](https://www.meetup.com/it-IT/TensorFlow-x-Rome-Meetup)
+* [TensorFlow London](https://www.meetup.com/TensorFlow-London/)
+* [TensorFlow Edinburgh](https://www.meetup.com/tensorflow-edinburgh/)
+
+## America
+
+* [TensorFlow Buenos Aires](https://www.meetup.com/TensorFlow-Buenos-Aires/)
+
+
+## Oceania
+* [Melbourne TensorFlow Meetup](https://www.meetup.com/Melbourne-TensorFlow-Meetup)
+
+
+## Africa
+
+* [TensorFlow Tunis Meetup](https://www.meetup.com/fr-FR/TensorFlow-Tunis-Meetup/)
diff --git a/tensorflow/docs_src/get_started/eager.md b/tensorflow/docs_src/get_started/eager.md
index f08ac74425..bbb25e20c6 100644
--- a/tensorflow/docs_src/get_started/eager.md
+++ b/tensorflow/docs_src/get_started/eager.md
@@ -1,3 +1,3 @@
# Get Started with Eager Execution
-[Colab notebook](https://colab.research.google.com/github/tensorflow/models/blob/r1.8.0/samples/core/get_started/eager.ipynb)
+[Colab notebook](https://colab.research.google.com/github/tensorflow/models/blob/r1.9.0/samples/core/get_started/eager.ipynb)
diff --git a/tensorflow/docs_src/get_started/index.md b/tensorflow/docs_src/get_started/index.md
index 55579d52fb..232d2f1547 100644
--- a/tensorflow/docs_src/get_started/index.md
+++ b/tensorflow/docs_src/get_started/index.md
@@ -10,9 +10,9 @@ course prior to diving into TensorFlow documentation:
TensorFlow is a tool for machine learning. While it contains a wide range of
functionality, TensorFlow is mainly designed for deep neural network models.
-The easiest way to get started with TensorFlow is using Eager Execution.
+The easiest way to get started with TensorFlow is by using Eager Execution.
- * @{$get_started/eager}, is for anyone new to machine learning or TensorFlow.
+ * @{$get_started/eager}, is for anyone new to machine learning or TensorFlow.
TensorFlow provides many APIs. The remainder of this section focuses on the
Estimator API which provide scalable, high-performance models. See the
diff --git a/tensorflow/docs_src/install/install_c.md b/tensorflow/docs_src/install/install_c.md
index 1abd840ab3..2901848745 100644
--- a/tensorflow/docs_src/install/install_c.md
+++ b/tensorflow/docs_src/install/install_c.md
@@ -38,7 +38,7 @@ enable TensorFlow for C:
OS="linux" # Change to "darwin" for macOS
TARGET_DIRECTORY="/usr/local"
curl -L \
- "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.8.0.tar.gz" |
+ "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.9.0-rc0.tar.gz" |
sudo tar -C $TARGET_DIRECTORY -xz
The `tar` command extracts the TensorFlow C library into the `lib`
diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md
index 52a2a3f8a6..55bc0f64e7 100644
--- a/tensorflow/docs_src/install/install_go.md
+++ b/tensorflow/docs_src/install/install_go.md
@@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go:
TF_TYPE="cpu" # Change to "gpu" for GPU support
TARGET_DIRECTORY='/usr/local'
curl -L \
- "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.8.0.tar.gz" |
+ "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.9.0-rc0.tar.gz" |
sudo tar -C $TARGET_DIRECTORY -xz
The `tar` command extracts the TensorFlow C library into the `lib`
diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md
index 1256fb99c4..637231da12 100644
--- a/tensorflow/docs_src/install/install_java.md
+++ b/tensorflow/docs_src/install/install_java.md
@@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs:
<dependency>
<groupId>org.tensorflow</groupId>
<artifactId>tensorflow</artifactId>
- <version>1.8.0</version>
+ <version>1.9.0-rc0</version>
</dependency>
```
@@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow:
<dependency>
<groupId>org.tensorflow</groupId>
<artifactId>tensorflow</artifactId>
- <version>1.8.0</version>
+ <version>1.9.0-rc0</version>
</dependency>
</dependencies>
</project>
@@ -124,12 +124,12 @@ instead:
<dependency>
<groupId>org.tensorflow</groupId>
<artifactId>libtensorflow</artifactId>
- <version>1.8.0</version>
+ <version>1.9.0-rc0</version>
</dependency>
<dependency>
<groupId>org.tensorflow</groupId>
<artifactId>libtensorflow_jni_gpu</artifactId>
- <version>1.8.0</version>
+ <version>1.9.0-rc0</version>
</dependency>
```
@@ -148,7 +148,7 @@ refer to the simpler instructions above instead.
Take the following steps to install TensorFlow for Java on Linux or macOS:
1. Download
- [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0.jar),
+ [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.9.0-rc0.jar),
which is the TensorFlow Java Archive (JAR).
2. Decide whether you will run TensorFlow for Java on CPU(s) only or with
@@ -167,7 +167,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS:
OS=$(uname -s | tr '[:upper:]' '[:lower:]')
mkdir -p ./jni
curl -L \
- "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.8.0.tar.gz" |
+ "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.9.0-rc0.tar.gz" |
tar -xz -C ./jni
### Install on Windows
@@ -175,13 +175,13 @@ Take the following steps to install TensorFlow for Java on Linux or macOS:
Take the following steps to install TensorFlow for Java on Windows:
1. Download
- [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0.jar),
+ [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.9.0-rc0.jar),
which is the TensorFlow Java Archive (JAR).
2. Download the following Java Native Interface (JNI) file appropriate for
- [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.8.0.zip).
+ [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.9.0-rc0.zip).
3. Extract this .zip file.
-
+__Note__: The native library (`tensorflow_jni.dll`) requires `msvcp140.dll` at runtime, which is included in the [Visual C++ 2015 Redistributable](https://www.microsoft.com/en-us/download/details.aspx?id=48145) package.
### Validate the installation
@@ -227,7 +227,7 @@ must be part of your `classpath`. For example, you can include the
downloaded `.jar` in your `classpath` by using the `-cp` compilation flag
as follows:
-<pre><b>javac -cp libtensorflow-1.8.0.jar HelloTF.java</b></pre>
+<pre><b>javac -cp libtensorflow-1.9.0-rc0.jar HelloTF.java</b></pre>
### Running
@@ -241,11 +241,11 @@ two files are available to the JVM:
For example, the following command line executes the `HelloTF` program on Linux
and macOS X:
-<pre><b>java -cp libtensorflow-1.8.0.jar:. -Djava.library.path=./jni HelloTF</b></pre>
+<pre><b>java -cp libtensorflow-1.9.0-rc0.jar:. -Djava.library.path=./jni HelloTF</b></pre>
And the following command line executes the `HelloTF` program on Windows:
-<pre><b>java -cp libtensorflow-1.8.0.jar;. -Djava.library.path=jni HelloTF</b></pre>
+<pre><b>java -cp libtensorflow-1.9.0-rc0.jar;. -Djava.library.path=jni HelloTF</b></pre>
If the program prints <tt>Hello from <i>version</i></tt>, you've successfully
installed TensorFlow for Java and are ready to use the API. If the program
diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md
index 0ed8160027..c8d706cf3c 100644
--- a/tensorflow/docs_src/install/install_linux.md
+++ b/tensorflow/docs_src/install/install_linux.md
@@ -339,9 +339,7 @@ Docker will download the TensorFlow binary image the first time you launch it.
#### GPU support
-Prior to installing TensorFlow with GPU support, ensure that your system meets all
-[NVIDIA software requirements](#NVIDIARequirements). To launch a Docker container
-with NVidia GPU support, enter a command of the following format:
+To launch a Docker container with NVidia GPU support, enter a command of the following format (this [does not require any local CUDA installation](https://github.com/nvidia/nvidia-docker/wiki/CUDA#requirements)):
<pre>
$ <b>nvidia-docker run -it</b> <i>-p hostPort:containerPort TensorFlowGPUImage</i>
@@ -438,7 +436,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
<pre>
(tensorflow)$ <b>pip install --ignore-installed --upgrade \
- https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp34-cp34m-linux_x86_64.whl</b></pre>
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc0-cp34-cp34m-linux_x86_64.whl</b></pre>
<a name="ValidateYourInstallation"></a>
## Validate your installation
@@ -517,7 +515,7 @@ on your system:
from source. To use the TensorFlow binaries, version 3.5 or higher is required.
See the [NVIDIA documentation](https://developer.nvidia.com/cuda-gpus) for a
list of supported GPU cards.
-* [GPU drivers](http://nvidia.com/driver) that support your version of the CUDA
+* [GPU drivers](http://nvidia.com/drivers) that support your version of the CUDA
Toolkit.
* The `libcupti-dev` library is the NVIDIA CUDA Profile Tools Interface. This
library provides advanced profiling support. To install this library,
@@ -684,14 +682,14 @@ This section documents the relevant values for Linux installations.
CPU only:
<pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc0-cp27-none-linux_x86_64.whl
</pre>
GPU support:
<pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.9.0rc0-cp27-none-linux_x86_64.whl
</pre>
Note that GPU support requires the NVIDIA hardware and software described in
@@ -703,14 +701,14 @@ Note that GPU support requires the NVIDIA hardware and software described in
CPU only:
<pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc0-cp34-cp34m-linux_x86_64.whl
</pre>
GPU support:
<pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.9.0rc0-cp34-cp34m-linux_x86_64.whl
</pre>
Note that GPU support requires the NVIDIA hardware and software described in
@@ -722,14 +720,14 @@ Note that GPU support requires the NVIDIA hardware and software described in
CPU only:
<pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc0-cp35-cp35m-linux_x86_64.whl
</pre>
GPU support:
<pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.9.0rc0-cp35-cp35m-linux_x86_64.whl
</pre>
@@ -741,14 +739,14 @@ Note that GPU support requires the NVIDIA hardware and software described in
CPU only:
<pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc0-cp36-cp36m-linux_x86_64.whl
</pre>
GPU support:
<pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.9.0rc0-cp36-cp36m-linux_x86_64.whl
</pre>
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md
index 29a867a9e3..9d01271c5a 100644
--- a/tensorflow/docs_src/install/install_mac.md
+++ b/tensorflow/docs_src/install/install_mac.md
@@ -119,7 +119,7 @@ Take the following steps to install TensorFlow with Virtualenv:
TensorFlow in the active Virtualenv is as follows:
<pre> $ <b>pip3 install --upgrade \
- https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py3-none-any.whl</b></pre>
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py3-none-any.whl</b></pre>
If you encounter installation problems, see
[Common Installation Problems](#common-installation-problems).
@@ -242,7 +242,7 @@ take the following steps:
issue the following command:
<pre> $ <b>sudo pip3 install --upgrade \
- https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py3-none-any.whl</b> </pre>
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py3-none-any.whl</b> </pre>
If the preceding command fails, see
[installation problems](#common-installation-problems).
@@ -350,7 +350,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
TensorFlow for Python 2.7:
<pre> (<i>targetDirectory</i>)$ <b>pip install --ignore-installed --upgrade \
- https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py2-none-any.whl</b></pre>
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py2-none-any.whl</b></pre>
<a name="ValidateYourInstallation"></a>
@@ -522,7 +522,7 @@ The value you specify depends on your Python version.
<pre>
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py2-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py2-none-any.whl
</pre>
@@ -530,5 +530,5 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py2-none-any.
<pre>
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py3-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py3-none-any.whl
</pre>
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md
index 5ba522b436..dc6c1e36fc 100644
--- a/tensorflow/docs_src/install/install_sources.md
+++ b/tensorflow/docs_src/install/install_sources.md
@@ -81,7 +81,7 @@ or
[macOS](#PrepareMac)
-<a name="#PrepareLinux"></a>
+<a name="PrepareLinux"></a>
## Prepare environment for Linux
Before building TensorFlow on Linux, install the following build
@@ -328,10 +328,10 @@ Invoke `pip install` to install that pip package.
The filename of the `.whl` file depends on your platform.
For example, the following command will install the pip package
-for TensorFlow 1.8.0 on Linux:
+for TensorFlow 1.9.0rc0 on Linux:
<pre>
-$ <b>sudo pip install /tmp/tensorflow_pkg/tensorflow-1.8.0-py2-none-any.whl</b>
+$ <b>sudo pip install /tmp/tensorflow_pkg/tensorflow-1.9.0rc0-py2-none-any.whl</b>
</pre>
## Validate your installation
@@ -373,9 +373,9 @@ The build and installation problems you encounter typically depend on the
operating system. See the "Common installation problems" section
of one of the following guides:
- * @{$install_linux#CommonInstallationProblems$Installing TensorFlow on Linux}
- * @{$install_mac#CommonInstallationProblems$Installing TensorFlow on Mac OS}
- * @{$install_windows#CommonInstallationProblems$Installing TensorFlow on Windows}
+ * @{$install_linux#common_installation_problems$Installing TensorFlow on Linux}
+ * @{$install_mac#common_installation_problems$Installing TensorFlow on Mac OS}
+ * @{$install_windows#common_installation_problems$Installing TensorFlow on Windows}
Beyond the errors documented in those two guides, the following table
notes additional errors specific to building TensorFlow. Note that we
@@ -433,6 +433,8 @@ Stack Overflow and specify the `tensorflow` tag.
**Linux**
<table>
<tr><th>Version:</th><th>CPU/GPU:</th><th>Python Version:</th><th>Compiler:</th><th>Build Tools:</th><th>cuDNN:</th><th>CUDA:</th></tr>
+<tr><td>tensorflow-1.9.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.11.0</td><td>N/A</td><td>N/A</td></tr>
+<tr><td>tensorflow_gpu-1.9.0</td><td>GPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.11.0</td><td>7</td><td>9</td></tr>
<tr><td>tensorflow-1.8.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.10.0</td><td>N/A</td><td>N/A</td></tr>
<tr><td>tensorflow_gpu-1.8.0</td><td>GPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.9.0</td><td>7</td><td>9</td></tr>
<tr><td>tensorflow-1.7.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.10.0</td><td>N/A</td><td>N/A</td></tr>
@@ -456,6 +458,7 @@ Stack Overflow and specify the `tensorflow` tag.
**Mac**
<table>
<tr><th>Version:</th><th>CPU/GPU:</th><th>Python Version:</th><th>Compiler:</th><th>Build Tools:</th><th>cuDNN:</th><th>CUDA:</th></tr>
+<tr><td>tensorflow-1.9.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.11.0</td><td>N/A</td><td>N/A</td></tr>
<tr><td>tensorflow-1.8.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.10.1</td><td>N/A</td><td>N/A</td></tr>
<tr><td>tensorflow-1.7.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.10.1</td><td>N/A</td><td>N/A</td></tr>
<tr><td>tensorflow-1.6.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.8.1</td><td>N/A</td><td>N/A</td></tr>
@@ -472,6 +475,8 @@ Stack Overflow and specify the `tensorflow` tag.
**Windows**
<table>
<tr><th>Version:</th><th>CPU/GPU:</th><th>Python Version:</th><th>Compiler:</th><th>Build Tools:</th><th>cuDNN:</th><th>CUDA:</th></tr>
+<tr><td>tensorflow-1.9.0</td><td>CPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>N/A</td><td>N/A</td></tr>
+<tr><td>tensorflow_gpu-1.9.0</td><td>GPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>7</td><td>9</td></tr>
<tr><td>tensorflow-1.8.0</td><td>CPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>N/A</td><td>N/A</td></tr>
<tr><td>tensorflow_gpu-1.8.0</td><td>GPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>7</td><td>9</td></tr>
<tr><td>tensorflow-1.7.0</td><td>CPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>N/A</td><td>N/A</td></tr>
diff --git a/tensorflow/docs_src/mobile/linking_libs.md b/tensorflow/docs_src/mobile/linking_libs.md
index cf0db59021..efef5dd0da 100644
--- a/tensorflow/docs_src/mobile/linking_libs.md
+++ b/tensorflow/docs_src/mobile/linking_libs.md
@@ -27,7 +27,7 @@ called `libandroid_tensorflow_inference_java.jar`. There are three ways to
include this functionality in your program:
1. Include the jcenter AAR which contains it, as in this
- [example app](https://github.com/googlecodelabs/tensorflow-for-poets-2/blob/master/android/build.gradle#L59-L65)
+ [example app](https://github.com/googlecodelabs/tensorflow-for-poets-2/blob/master/android/tfmobile/build.gradle#L59-L65)
2. Download the nightly precompiled version from
[ci.tensorflow.org](http://ci.tensorflow.org/view/Nightly/job/nightly-android/lastSuccessfulBuild/artifact/out/).
diff --git a/tensorflow/docs_src/mobile/prepare_models.md b/tensorflow/docs_src/mobile/prepare_models.md
index 8b22c04d87..2b84dbb973 100644
--- a/tensorflow/docs_src/mobile/prepare_models.md
+++ b/tensorflow/docs_src/mobile/prepare_models.md
@@ -105,8 +105,8 @@ inline constants so everything’s in one file. To handle the conversion, youâ€
need the `freeze_graph.py` script, that’s held in
[`tensorflow/python/tools/freeze_graph.py`](https://www.tensorflow.org/code/tensorflow/python/tools/freeze_graph.py). You’ll run it like this:
- bazel build tensorflow/tools:freeze_graph
- bazel-bin/tensorflow/tools/freeze_graph \
+ bazel build tensorflow/python/tools:freeze_graph
+ bazel-bin/tensorflow/python/tools/freeze_graph \
--input_graph=/tmp/model/my_graph.pb \
--input_checkpoint=/tmp/model/model.ckpt-1000 \
--output_graph=/tmp/frozen_graph.pb \
diff --git a/tensorflow/docs_src/performance/quantization.md b/tensorflow/docs_src/performance/quantization.md
index 2fea02d861..c97f74139c 100644
--- a/tensorflow/docs_src/performance/quantization.md
+++ b/tensorflow/docs_src/performance/quantization.md
@@ -227,8 +227,8 @@ of 30.0f, and an 8-bit array, the quantized values represent the following:
<table>
<tr><th>Quantized</th><th>Float</th></tr>
<tr><td>0</td><td>-10.0</td></tr>
- <tr><td>255</td><td>30.0</td></tr>
<tr><td>128</td><td>10.0</td></tr>
+ <tr><td>255</td><td>30.0</td></tr>
</table>
<figcaption>
<b>Table 2</b>: Example quantized value range
diff --git a/tensorflow/docs_src/programmers_guide/estimators.md b/tensorflow/docs_src/programmers_guide/estimators.md
index c4aae1d9d6..b13b47184d 100644
--- a/tensorflow/docs_src/programmers_guide/estimators.md
+++ b/tensorflow/docs_src/programmers_guide/estimators.md
@@ -21,18 +21,17 @@ Note: TensorFlow also includes a deprecated `Estimator` class at
Estimators provide the following benefits:
-* You can run Estimators-based models on a local host or on a
+* You can run Estimator-based models on a local host or on a
distributed multi-server environment without changing your model.
- Furthermore, you can run Estimators-based models on CPUs, GPUs,
+ Furthermore, you can run Estimator-based models on CPUs, GPUs,
or TPUs without recoding your model.
* Estimators simplify sharing implementations between model developers.
-* You can develop a state of the art model with high-level intuitive code,
+* You can develop a state of the art model with high-level intuitive code.
In short, it is generally much easier to create models with Estimators
than with the low-level TensorFlow APIs.
-* Estimators are themselves built on tf.layers, which
+* Estimators are themselves built on @{tf.layers}, which
simplifies customization.
-* Estimators build the graph for you. In other words, you don't have to
- build the graph.
+* Estimators build the graph for you.
* Estimators provide a safe distributed training loop that controls how and
when to:
* build the graph
@@ -57,7 +56,7 @@ the "plumbing" for you. That is, pre-made Estimators create and manage
pre-made Estimators let you experiment with different model architectures by
making only minimal code changes. @{tf.estimator.DNNClassifier$`DNNClassifier`},
for example, is a pre-made Estimator class that trains classification models
-through dense, feed-forward neural networks.
+based on dense, feed-forward neural networks.
### Structure of a pre-made Estimators program
@@ -79,7 +78,7 @@ of the following four steps:
an input function:
def input_fn(dataset):
- ... # manipulate dataset, extracting feature names and the label
+ ... # manipulate dataset, extracting the feature dict and the label
return feature_dict, label
(See @{$programmers_guide/datasets} for full details.)
@@ -96,13 +95,13 @@ of the following four steps:
population = tf.feature_column.numeric_column('population')
crime_rate = tf.feature_column.numeric_column('crime_rate')
median_education = tf.feature_column.numeric_column('median_education',
- normalizer_fn='lambda x: x - global_education_mean')
+ normalizer_fn=lambda x: x - global_education_mean)
3. **Instantiate the relevant pre-made Estimator.** For example, here's
a sample instantiation of a pre-made Estimator named `LinearClassifier`:
# Instantiate an estimator, passing the feature columns.
- estimator = tf.estimator.Estimator.LinearClassifier(
+ estimator = tf.estimator.LinearClassifier(
feature_columns=[population, crime_rate, median_education],
)
diff --git a/tensorflow/docs_src/programmers_guide/feature_columns.md b/tensorflow/docs_src/programmers_guide/feature_columns.md
index 845194fe0e..90f5c53a17 100644
--- a/tensorflow/docs_src/programmers_guide/feature_columns.md
+++ b/tensorflow/docs_src/programmers_guide/feature_columns.md
@@ -528,10 +528,10 @@ suggested by the following snippet:
categorical_column = ... # Create any categorical column
# Represent the categorical column as an embedding column.
-# This means creating a one-hot vector with one element for each category.
+# This means creating an embedding vector lookup table with one element for each category.
embedding_column = tf.feature_column.embedding_column(
categorical_column=categorical_column,
- dimension=dimension_of_embedding_vector)
+ dimension=embedding_dimensions)
```
@{$programmers_guide/embedding$Embeddings} is a significant topic within machine
diff --git a/tensorflow/examples/learn/iris.py b/tensorflow/examples/learn/iris.py
index 03e60972aa..86f5204ec3 100644
--- a/tensorflow/examples/learn/iris.py
+++ b/tensorflow/examples/learn/iris.py
@@ -21,7 +21,8 @@ from __future__ import division
from __future__ import print_function
import os
-import urllib
+
+from six.moves.urllib.request import urlretrieve
import tensorflow as tf
@@ -38,9 +39,7 @@ FEATURE_KEYS = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
def maybe_download_iris_data(file_name, download_url):
"""Downloads the file and returns the number of data."""
if not os.path.exists(file_name):
- raw = urllib.urlopen(download_url).read()
- with open(file_name, 'w') as f:
- f.write(raw)
+ urlretrieve(download_url, file_name)
# The first line is a comma-separated string. The first one is the number of
# total data in the file.
diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index 5602775b62..a5224fbda0 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -10955,7 +10955,7 @@ func SampleDistortedBoundingBoxAspectRatioRange(value []float32) SampleDistorted
// SampleDistortedBoundingBoxAreaRange sets the optional area_range attribute to value.
//
// value: The cropped area of the image must contain a fraction of the
-// supplied image within in this range.
+// supplied image within this range.
// If not specified, defaults to <f:0.05 f:1 >
func SampleDistortedBoundingBoxAreaRange(value []float32) SampleDistortedBoundingBoxAttr {
return func(m optionalAttr) {
@@ -18098,9 +18098,10 @@ func SparseFillEmptyRowsGrad(scope *Scope, reverse_index_map tf.Output, grad_val
}
// Computes scaled exponential linear: `scale * alpha * (exp(features) - 1)`
-//
// if < 0, `scale * features` otherwise.
//
+// Assumes weights to have zero mean and variance 1.0 / fan_in.
+//
// See [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515)
func Selu(scope *Scope, features tf.Output) (activations tf.Output) {
if scope.Err() != nil {
@@ -21625,7 +21626,7 @@ func ImageSummaryBadColor(value tf.Tensor) ImageSummaryAttr {
// generated sequentially as '*tag*/image/0', '*tag*/image/1', etc.
//
// The `bad_color` argument is the color to use in the generated images for
-// non-finite input values. It is a `unit8` 1-D tensor of length `channels`.
+// non-finite input values. It is a `uint8` 1-D tensor of length `channels`.
// Each element must be in the range `[0, 255]` (It represents the value of a
// pixel in the output image). Non-finite values in the input tensor are
// replaced by this tensor in the output image. The default value is the color
@@ -24018,7 +24019,7 @@ func SampleDistortedBoundingBoxV2AspectRatioRange(value []float32) SampleDistort
// SampleDistortedBoundingBoxV2AreaRange sets the optional area_range attribute to value.
//
// value: The cropped area of the image must contain a fraction of the
-// supplied image within in this range.
+// supplied image within this range.
// If not specified, defaults to <f:0.05 f:1 >
func SampleDistortedBoundingBoxV2AreaRange(value []float32) SampleDistortedBoundingBoxV2Attr {
return func(m optionalAttr) {
@@ -24714,8 +24715,7 @@ type DecodeProtoV2Attr func(optionalAttr)
// If not specified, defaults to "local://"
func DecodeProtoV2DescriptorSource(value string) DecodeProtoV2Attr {
return func(m optionalAttr) {
- m["descriptor_source"] = value
- }
+ m["descriptor_source"] = value }
}
// DecodeProtoV2MessageFormat sets the optional message_format attribute to value.
diff --git a/tensorflow/java/src/gen/cc/op_generator.cc b/tensorflow/java/src/gen/cc/op_generator.cc
index debd95fc62..9b171f66ec 100644
--- a/tensorflow/java/src/gen/cc/op_generator.cc
+++ b/tensorflow/java/src/gen/cc/op_generator.cc
@@ -376,9 +376,6 @@ void GenerateOp(const OpSpec& op, const EndpointSpec& endpoint,
}
}
// op annotations
- op_class.add_annotation(
- Annotation::Create("Generated", "javax.annotation")
- .attributes("value = \"TensorFlow Java Op Generator\""));
if (endpoint.deprecated()) {
op_class.add_annotation(Annotation::Create("Deprecated"));
string explanation;
@@ -415,8 +412,12 @@ void GenerateOp(const OpSpec& op, const EndpointSpec& endpoint,
SourceFileWriter writer(op_file.get());
std::list<Type> dependencies;
CollectOpDependencies(op, mode, &dependencies);
- writer.Write(kLicense).EndLine().BeginType(op_class, PUBLIC | FINAL,
- &dependencies, &op_javadoc);
+ writer.Write(kLicense)
+ .EndLine()
+ .Write("// This class has been generated, DO NOT EDIT!")
+ .EndLine()
+ .EndLine()
+ .BeginType(op_class, PUBLIC | FINAL, &dependencies, &op_javadoc);
if (!op.optional_attributes().empty()) {
RenderOptionsClass(op, op_class, &writer);
}
diff --git a/tensorflow/java/src/gen/cc/op_specs.cc b/tensorflow/java/src/gen/cc/op_specs.cc
index 181fd4c5e3..941ab2699c 100644
--- a/tensorflow/java/src/gen/cc/op_specs.cc
+++ b/tensorflow/java/src/gen/cc/op_specs.cc
@@ -96,6 +96,7 @@ Type TypeResolver::TypeOf(const OpDef_ArgDef& arg_def, bool* iterable_out) {
*iterable_out = true;
visited_attrs_.insert(std::make_pair(arg_def.number_attr(), Type::Int()));
}
+
Type type = Type::Wildcard();
if (arg_def.type() != DataType::DT_INVALID) {
// resolve type from DataType
diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py
index b2e6c60021..bd97b181ff 100644
--- a/tensorflow/python/eager/backprop.py
+++ b/tensorflow/python/eager/backprop.py
@@ -196,11 +196,11 @@ def implicit_val_and_grad(f):
# TODO(cais): Remove calls to tf.constant() once the gradients functions
# accept lists and np.ndarrays.
- def grad_fn(*args):
+ def grad_fn(*args, **kwds):
"""Computes the gradient of the wrapped function."""
this_tape = tape.push_new_tape()
try:
- end_node = f(*args)
+ end_node = f(*args, **kwds)
if end_node is None:
raise ValueError("Cannot differentiate a function that returns None; "
"did you forget to return a value from {}?".format(
diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD
index 9cd17e0407..20522098b0 100644
--- a/tensorflow/python/estimator/BUILD
+++ b/tensorflow/python/estimator/BUILD
@@ -978,7 +978,10 @@ py_test(
size = "large",
srcs = ["keras_test.py"],
srcs_version = "PY2AND3",
- tags = ["notsan"],
+ tags = [
+ "no_windows",
+ "notsan",
+ ],
deps = [
":keras",
"//tensorflow/core:protos_all_py",
diff --git a/tensorflow/python/estimator/exporter.py b/tensorflow/python/estimator/exporter.py
index 7cdf840c97..b18212cfcd 100644
--- a/tensorflow/python/estimator/exporter.py
+++ b/tensorflow/python/estimator/exporter.py
@@ -156,7 +156,7 @@ def _loss_smaller(best_eval_result, current_eval_result):
return best_eval_result[default_key] > current_eval_result[default_key]
-def _verify_compre_fn_args(compare_fn):
+def _verify_compare_fn_args(compare_fn):
"""Verifies compare_fn arguments."""
args = set(util.fn_args(compare_fn))
if 'best_eval_result' not in args:
@@ -265,7 +265,7 @@ class BestExporter(Exporter):
self._compare_fn = compare_fn
if self._compare_fn is None:
raise ValueError('`compare_fn` must not be None.')
- _verify_compre_fn_args(self._compare_fn)
+ _verify_compare_fn_args(self._compare_fn)
self._saved_model_exporter = _SavedModelExporter(
name, serving_input_receiver_fn, assets_extra, as_text)
diff --git a/tensorflow/python/estimator/inputs/numpy_io.py b/tensorflow/python/estimator/inputs/numpy_io.py
index 035c7c148c..a6cefdece2 100644
--- a/tensorflow/python/estimator/inputs/numpy_io.py
+++ b/tensorflow/python/estimator/inputs/numpy_io.py
@@ -136,11 +136,13 @@ def numpy_input_fn(x,
values in `x` have same shape).
ValueError: if duplicate keys are in both `x` and `y` when `y` is a dict.
ValueError: if x or y is an empty dict.
- TypeError: `x` is not a dict or array, or if `shuffle` is not bool.
+ TypeError: `x` is not a dict or array.
+ ValueError: if 'shuffle' is not provided or a bool.
"""
if not isinstance(shuffle, bool):
- raise TypeError('shuffle must be explicitly set as boolean; '
- 'got {}'.format(shuffle))
+ raise ValueError('shuffle must be provided and explicitly set as boolean '
+ '(it is recommended to set it as True for training); '
+ 'got {}'.format(shuffle))
def input_fn():
"""Numpy input function."""
diff --git a/tensorflow/python/estimator/inputs/numpy_io_test.py b/tensorflow/python/estimator/inputs/numpy_io_test.py
index 92d057e25d..81b201cc5c 100644
--- a/tensorflow/python/estimator/inputs/numpy_io_test.py
+++ b/tensorflow/python/estimator/inputs/numpy_io_test.py
@@ -286,8 +286,9 @@ class NumpyIoTest(test.TestCase):
x = np.arange(32, 36)
y = np.arange(4)
with self.test_session():
- with self.assertRaisesRegexp(TypeError,
- 'shuffle must be explicitly set as boolean'):
+ with self.assertRaisesRegexp(ValueError,
+ 'shuffle must be provided and explicitly '
+ 'set as boolean'):
# Default shuffle is None.
numpy_io.numpy_input_fn(x, y)
diff --git a/tensorflow/python/estimator/inputs/pandas_io.py b/tensorflow/python/estimator/inputs/pandas_io.py
index 938e244fb3..57f8e5fd6a 100644
--- a/tensorflow/python/estimator/inputs/pandas_io.py
+++ b/tensorflow/python/estimator/inputs/pandas_io.py
@@ -68,15 +68,16 @@ def pandas_input_fn(x,
Raises:
ValueError: if `x` already contains a column with the same name as `y`, or
if the indexes of `x` and `y` don't match.
- TypeError: `shuffle` is not bool.
+ ValueError: if 'shuffle' is not provided or a bool.
"""
if not HAS_PANDAS:
raise TypeError(
'pandas_input_fn should not be called without pandas installed')
if not isinstance(shuffle, bool):
- raise TypeError('shuffle must be explicitly set as boolean; '
- 'got {}'.format(shuffle))
+ raise ValueError('shuffle must be provided and explicitly set as boolean '
+ '(it is recommended to set it as True for training); '
+ 'got {}'.format(shuffle))
x = x.copy()
if y is not None:
diff --git a/tensorflow/python/estimator/inputs/pandas_io_test.py b/tensorflow/python/estimator/inputs/pandas_io_test.py
index e5912a3b28..dcecf6dd61 100644
--- a/tensorflow/python/estimator/inputs/pandas_io_test.py
+++ b/tensorflow/python/estimator/inputs/pandas_io_test.py
@@ -70,8 +70,9 @@ class PandasIoTest(test.TestCase):
return
x, _ = self.makeTestDataFrame()
y_noindex = pd.Series(np.arange(-32, -28))
- with self.assertRaisesRegexp(TypeError,
- 'shuffle must be explicitly set as boolean'):
+ with self.assertRaisesRegexp(ValueError,
+ 'shuffle must be provided and explicitly '
+ 'set as boolean'):
# Default shuffle is None
pandas_io.pandas_input_fn(x, y_noindex)
diff --git a/tensorflow/python/estimator/inputs/queues/feeding_functions.py b/tensorflow/python/estimator/inputs/queues/feeding_functions.py
index 8e2ec83020..51a61adb21 100644
--- a/tensorflow/python/estimator/inputs/queues/feeding_functions.py
+++ b/tensorflow/python/estimator/inputs/queues/feeding_functions.py
@@ -250,7 +250,7 @@ class _PandasFeedFn(object):
num_epochs=None):
if len(placeholders) != len(dataframe.columns) + 1:
raise ValueError("Expected {} placeholders; got {}.".format(
- len(dataframe.columns), len(placeholders)))
+ len(dataframe.columns) + 1, len(placeholders)))
self._index_placeholder = placeholders[0]
self._col_placeholders = placeholders[1:]
self._dataframe = dataframe
diff --git a/tensorflow/python/estimator/keras.py b/tensorflow/python/estimator/keras.py
index c80af08fba..2f439f765e 100644
--- a/tensorflow/python/estimator/keras.py
+++ b/tensorflow/python/estimator/keras.py
@@ -70,7 +70,7 @@ def _convert_tensor(x):
return x
-def _any_variable_initalized():
+def _any_variable_initialized():
"""Check if any variable has been initialized in the Keras model.
Returns:
@@ -511,7 +511,7 @@ def model_to_estimator(keras_model=None,
keras_model_fn, model_dir=model_dir, config=config)
# Check if we need to call get_weights:
- if _any_variable_initalized():
+ if _any_variable_initialized():
keras_weights = keras_model.get_weights()
# Warn if config passed to estimator tries to update GPUOptions. If a
# session has already been created, the GPUOptions passed to the first
diff --git a/tensorflow/python/estimator/keras_test.py b/tensorflow/python/estimator/keras_test.py
index 6688a84130..5e094ae92b 100644
--- a/tensorflow/python/estimator/keras_test.py
+++ b/tensorflow/python/estimator/keras_test.py
@@ -31,10 +31,10 @@ from tensorflow.python.estimator import run_config as run_config_lib
from tensorflow.python.estimator.inputs import numpy_io
from tensorflow.python.framework import ops
from tensorflow.python.framework import test_util
-from tensorflow.python.keras import backend as K
from tensorflow.python.keras import testing_utils
from tensorflow.python.keras.applications import mobilenet
from tensorflow.python.keras.optimizers import SGD
+from tensorflow.python.ops.parsing_ops import gen_parsing_ops
from tensorflow.python.platform import gfile
from tensorflow.python.platform import test
from tensorflow.python.summary.writer import writer_cache
@@ -146,13 +146,13 @@ def randomize_io_type(array, name):
def multi_inputs_multi_outputs_model():
a = keras.layers.Input(shape=(16,), name='input_a')
b = keras.layers.Input(shape=(16,), name='input_b')
- m = keras.layers.Input(shape=(8,), dtype='bool', name='input_m')
+ m = keras.layers.Input(shape=(8,), dtype='string', name='input_m')
dense = keras.layers.Dense(8, name='dense_1')
a_2 = dense(a)
- # Apply a mask
- s_2 = keras.layers.Lambda(lambda k:
- K.switch(k[0], k[1], K.zeros_like(k[1])))([m, a_2])
+ # Read m
+ m_2 = keras.layers.Lambda(gen_parsing_ops.string_to_number)(m)
+ s_2 = keras.layers.Lambda(lambda k: k[0] * k[1])([m_2, a_2])
b_2 = dense(b)
merged = keras.layers.concatenate([s_2, b_2], name='merge')
c = keras.layers.Dense(3, activation='softmax', name='dense_2')(merged)
@@ -372,13 +372,13 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
def train_input_fn():
input_dict = {'input_a': a_train, 'input_b': b_train,
- 'input_m': input_m_train > 0}
+ 'input_m': input_m_train.astype(np.str)}
output_dict = {'dense_2': c_train, 'dense_3': d_train}
return input_dict, output_dict
def eval_input_fn():
input_dict = {'input_a': a_test, 'input_b': b_test,
- 'input_m': input_m_test > 0}
+ 'input_m': input_m_test.astype(np.str)}
output_dict = {'dense_2': c_test, 'dense_3': d_test}
return input_dict, output_dict
diff --git a/tensorflow/python/grappler/layout_optimizer_test.py b/tensorflow/python/grappler/layout_optimizer_test.py
index 2d6925d1a8..af5d709f7e 100644
--- a/tensorflow/python/grappler/layout_optimizer_test.py
+++ b/tensorflow/python/grappler/layout_optimizer_test.py
@@ -1389,7 +1389,7 @@ class LayoutOptimizerTest(test.TestCase):
expected_num_transposes = 3
self.assertEqual(expected_num_transposes, num_transposes)
self._assert_trans_nhwc_to_nchw('map/while/Conv2D-0', nodes)
- self._assert_trans_nchw_to_nhwc('map/while/Add-0-2', nodes)
+ self._assert_trans_nchw_to_nhwc('map/while/Add_1-0-2', nodes)
self.assertAllClose(output_val_ref, output_val, atol=1e-3)
def testLoopWithVecAnd4D(self):
@@ -1413,7 +1413,7 @@ class LayoutOptimizerTest(test.TestCase):
expected_num_transposes = 2
self.assertEqual(expected_num_transposes, num_transposes)
self._assert_trans_nhwc_to_nchw('map/while/Conv2D-0', nodes)
- self._assert_trans_nchw_to_nhwc('map/while/Add-0-2', nodes)
+ self._assert_trans_nchw_to_nhwc('map/while/Add_1-0-2', nodes)
self.assertAllClose(output_val_ref, output_val, atol=1e-3)
def testBinaryOpSecondPort(self):
diff --git a/tensorflow/python/keras/activations.py b/tensorflow/python/keras/activations.py
index e487f583be..f608dea430 100644
--- a/tensorflow/python/keras/activations.py
+++ b/tensorflow/python/keras/activations.py
@@ -93,6 +93,8 @@ def selu(x):
- To be used together with the initialization "lecun_normal".
- To be used together with the dropout variant "AlphaDropout".
+ References:
+ - [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515)
"""
alpha = 1.6732632423543772848170429916717
scale = 1.0507009873554804934193349852946
diff --git a/tensorflow/python/keras/callbacks.py b/tensorflow/python/keras/callbacks.py
index 70b6a8431a..9f91368e5b 100644
--- a/tensorflow/python/keras/callbacks.py
+++ b/tensorflow/python/keras/callbacks.py
@@ -724,15 +724,6 @@ class TensorBoard(Callback):
for weight in layer.weights:
mapped_weight_name = weight.name.replace(':', '_')
tf_summary.histogram(mapped_weight_name, weight)
- if self.write_grads:
- grads = model.optimizer.get_gradients(model.total_loss, weight)
-
- def is_indexed_slices(grad):
- return type(grad).__name__ == 'IndexedSlices'
-
- grads = [grad.values if is_indexed_slices(grad) else grad
- for grad in grads]
- tf_summary.histogram('{}_grad'.format(mapped_weight_name), grads)
if self.write_images:
w_img = array_ops.squeeze(weight)
shape = K.int_shape(w_img)
@@ -759,6 +750,18 @@ class TensorBoard(Callback):
assert len(shape) == 4 and shape[-1] in [1, 3, 4]
tf_summary.image(mapped_weight_name, w_img)
+ if self.write_grads:
+ for weight in layer.trainable_weights:
+ mapped_weight_name = weight.name.replace(':', '_')
+ grads = model.optimizer.get_gradients(model.total_loss, weight)
+
+ def is_indexed_slices(grad):
+ return type(grad).__name__ == 'IndexedSlices'
+
+ grads = [grad.values if is_indexed_slices(grad) else grad
+ for grad in grads]
+ tf_summary.histogram('{}_grad'.format(mapped_weight_name), grads)
+
if hasattr(layer, 'output'):
tf_summary.histogram('{}_out'.format(layer.name), layer.output)
self.merged = tf_summary.merge_all()
diff --git a/tensorflow/python/keras/callbacks_test.py b/tensorflow/python/keras/callbacks_test.py
index b355f4a269..5062a26580 100644
--- a/tensorflow/python/keras/callbacks_test.py
+++ b/tensorflow/python/keras/callbacks_test.py
@@ -653,6 +653,8 @@ class KerasCallbacksTest(test.TestCase):
model.add(
keras.layers.Dense(
NUM_HIDDEN, input_dim=INPUT_DIM, activation='relu'))
+ # non_trainable_weights: moving_variance, moving_mean
+ model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dense(NUM_CLASSES, activation='softmax'))
model.compile(
loss='categorical_crossentropy',
diff --git a/tensorflow/python/keras/engine/network.py b/tensorflow/python/keras/engine/network.py
index a4cd017d60..1c9135982e 100644
--- a/tensorflow/python/keras/engine/network.py
+++ b/tensorflow/python/keras/engine/network.py
@@ -123,7 +123,7 @@ class Network(base_layer.Layer):
# Entries are unique. Includes input and output layers.
self._layers = []
- # Used in symbolic mode only, only in conjonction with graph-networks
+ # Used in symbolic mode only, only in conjunction with graph-networks
self._outbound_nodes = []
self._inbound_nodes = []
diff --git a/tensorflow/python/keras/engine/saving_test.py b/tensorflow/python/keras/engine/saving_test.py
index 6a94986b9c..7e82db028b 100644
--- a/tensorflow/python/keras/engine/saving_test.py
+++ b/tensorflow/python/keras/engine/saving_test.py
@@ -482,7 +482,7 @@ class TestWholeModelSaving(test.TestCase):
with h5py.File(fname, 'r') as h5file:
num_names_arrays = len([attr for attr in h5file['model_weights'].attrs
if attr.startswith('layer_names')])
- # The chunking of layer names array should have happend.
+ # The chunking of layer names array should have happened.
self.assertGreater(num_names_arrays, 0)
out2 = model.predict(x)
self.assertAllClose(out, out2, atol=1e-05)
@@ -527,7 +527,7 @@ class TestWholeModelSaving(test.TestCase):
num_weight_arrays = len(
[attr for attr in h5file['model_weights']['nested_model'].attrs
if attr.startswith('weight_names')])
- # The chunking of layer names array should have happend.
+ # The chunking of layer names array should have happened.
self.assertGreater(num_weight_arrays, 0)
out2 = model.predict(x)
self.assertAllClose(out, out2, atol=1e-05)
diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index 89c1f1a40f..fce6cbdb7a 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -24,6 +24,7 @@ import numpy as np
from tensorflow.python.data.ops import dataset_ops
from tensorflow.python.data.ops import iterator_ops
from tensorflow.python.eager import context
+from tensorflow.python.framework import constant_op
from tensorflow.python.framework import errors
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_util
@@ -409,11 +410,13 @@ class Model(Network):
else:
if sample_weight_mode == 'temporal':
sample_weights.append(array_ops.placeholder_with_default(
- [[1.]], shape=[None, None], name=name + '_sample_weights'))
+ constant_op.constant([[1.]], dtype=K.floatx()),
+ shape=[None, None], name=name + '_sample_weights'))
sample_weight_modes.append('temporal')
else:
sample_weights.append(array_ops.placeholder_with_default(
- [1.], shape=[None], name=name + '_sample_weights'))
+ constant_op.constant([1.], dtype=K.floatx()),
+ shape=[None], name=name + '_sample_weights'))
sample_weight_modes.append(None)
self.sample_weight_modes = sample_weight_modes
self._feed_sample_weight_modes = []
diff --git a/tensorflow/python/keras/engine/training_eager.py b/tensorflow/python/keras/engine/training_eager.py
index 2ecbff3a1c..e8838cd3bc 100644
--- a/tensorflow/python/keras/engine/training_eager.py
+++ b/tensorflow/python/keras/engine/training_eager.py
@@ -732,7 +732,7 @@ def slice_arrays(arrays, indices, contiguous=True):
"""Slices batches out of provided arrays (workaround for eager tensors).
Unfortunately eager tensors don't have the same slicing behavior as
- Numpy arrays (they folow the same slicing behavior as symbolic TF tensors),
+ Numpy arrays (they follow the same slicing behavior as symbolic TF tensors),
hence we cannot use `generic_utils.slice_arrays` directly
and we have to implement this workaround based on `concat`. This has a
performance cost.
diff --git a/tensorflow/python/keras/initializers_test.py b/tensorflow/python/keras/initializers_test.py
index a54d6da839..c519e194bd 100644
--- a/tensorflow/python/keras/initializers_test.py
+++ b/tensorflow/python/keras/initializers_test.py
@@ -71,7 +71,7 @@ class KerasInitializersTest(test.TestCase):
stddev=1,
seed=126),
tensor_shape,
- target_mean=0., target_std=None, target_max=2)
+ target_mean=0., target_max=2, target_min=-2)
def test_constant(self):
tensor_shape = (5, 6, 4)
@@ -83,49 +83,49 @@ class KerasInitializersTest(test.TestCase):
tensor_shape = (5, 6, 4, 2)
with self.test_session():
fan_in, _ = init_ops._compute_fans(tensor_shape)
- scale = np.sqrt(3. / fan_in)
+ std = np.sqrt(1. / fan_in)
self._runner(keras.initializers.lecun_uniform(seed=123), tensor_shape,
- target_mean=0., target_max=scale, target_min=-scale)
+ target_mean=0., target_std=std)
def test_glorot_uniform(self):
tensor_shape = (5, 6, 4, 2)
with self.test_session():
fan_in, fan_out = init_ops._compute_fans(tensor_shape)
- scale = np.sqrt(6. / (fan_in + fan_out))
+ std = np.sqrt(2. / (fan_in + fan_out))
self._runner(keras.initializers.glorot_uniform(seed=123), tensor_shape,
- target_mean=0., target_max=scale, target_min=-scale)
+ target_mean=0., target_std=std)
def test_he_uniform(self):
tensor_shape = (5, 6, 4, 2)
with self.test_session():
fan_in, _ = init_ops._compute_fans(tensor_shape)
- scale = np.sqrt(6. / fan_in)
+ std = np.sqrt(2. / fan_in)
self._runner(keras.initializers.he_uniform(seed=123), tensor_shape,
- target_mean=0., target_max=scale, target_min=-scale)
+ target_mean=0., target_std=std)
def test_lecun_normal(self):
tensor_shape = (5, 6, 4, 2)
with self.test_session():
fan_in, _ = init_ops._compute_fans(tensor_shape)
- scale = np.sqrt(1. / fan_in)
+ std = np.sqrt(1. / fan_in)
self._runner(keras.initializers.lecun_normal(seed=123), tensor_shape,
- target_mean=0., target_std=None, target_max=2 * scale)
+ target_mean=0., target_std=std)
def test_glorot_normal(self):
tensor_shape = (5, 6, 4, 2)
with self.test_session():
fan_in, fan_out = init_ops._compute_fans(tensor_shape)
- scale = np.sqrt(2. / (fan_in + fan_out))
+ std = np.sqrt(2. / (fan_in + fan_out))
self._runner(keras.initializers.glorot_normal(seed=123), tensor_shape,
- target_mean=0., target_std=None, target_max=2 * scale)
+ target_mean=0., target_std=std)
def test_he_normal(self):
tensor_shape = (5, 6, 4, 2)
with self.test_session():
fan_in, _ = init_ops._compute_fans(tensor_shape)
- scale = np.sqrt(2. / fan_in)
+ std = np.sqrt(2. / fan_in)
self._runner(keras.initializers.he_normal(seed=123), tensor_shape,
- target_mean=0., target_std=None, target_max=2 * scale)
+ target_mean=0., target_std=std)
def test_orthogonal(self):
tensor_shape = (20, 20)
diff --git a/tensorflow/python/keras/layers/core.py b/tensorflow/python/keras/layers/core.py
index 5061825d38..f60064ed63 100644
--- a/tensorflow/python/keras/layers/core.py
+++ b/tensorflow/python/keras/layers/core.py
@@ -19,7 +19,9 @@ from __future__ import division
from __future__ import print_function
import copy
+import sys
import types as python_types
+import warnings
import numpy as np
@@ -714,6 +716,7 @@ class Lambda(Layer):
return self.mask
def get_config(self):
+ module = self.function.__module__
if isinstance(self.function, python_types.LambdaType):
function = generic_utils.func_dump(self.function)
function_type = 'lambda'
@@ -721,21 +724,26 @@ class Lambda(Layer):
function = self.function.__name__
function_type = 'function'
+ output_shape_module = None
if isinstance(self._output_shape, python_types.LambdaType):
output_shape = generic_utils.func_dump(self._output_shape)
output_shape_type = 'lambda'
+ output_shape_module = self._output_shape.__module__
elif callable(self._output_shape):
output_shape = self._output_shape.__name__
output_shape_type = 'function'
+ output_shape_module = self._output_shape.__module__
else:
output_shape = self._output_shape
output_shape_type = 'raw'
config = {
'function': function,
+ 'module': module,
'function_type': function_type,
'output_shape': output_shape,
'output_shape_type': output_shape_type,
+ 'output_shape_module': output_shape_module,
'arguments': self.arguments
}
base_config = super(Lambda, self).get_config()
@@ -745,8 +753,16 @@ class Lambda(Layer):
def from_config(cls, config, custom_objects=None):
config = config.copy()
globs = globals()
+ module = config.pop('module', None)
+ if module in sys.modules:
+ globs.update(sys.modules[module].__dict__)
+ elif module is not None:
+ # Note: we don't know the name of the function if it's a lambda.
+ warnings.warn('{} is not loaded, but a Lambda layer uses it. '
+ 'It may cause errors.'.format(module)
+ , UserWarning)
if custom_objects:
- globs = dict(list(globs.items()) + list(custom_objects.items()))
+ globs.update(custom_objects)
function_type = config.pop('function_type')
if function_type == 'function':
# Simple lookup in custom objects
@@ -760,6 +776,14 @@ class Lambda(Layer):
else:
raise TypeError('Unknown function type:', function_type)
+ output_shape_module = config.pop('output_shape_module', None)
+ if output_shape_module in sys.modules:
+ globs.update(sys.modules[output_shape_module].__dict__)
+ elif output_shape_module is not None:
+ # Note: we don't know the name of the function if it's a lambda.
+ warnings.warn('{} is not loaded, but a Lambda layer uses it. '
+ 'It may cause errors.'.format(output_shape_module)
+ , UserWarning)
output_shape_type = config.pop('output_shape_type')
if output_shape_type == 'function':
# Simple lookup in custom objects
diff --git a/tensorflow/python/keras/models_test.py b/tensorflow/python/keras/models_test.py
index c616d8f24f..e6e45902a8 100644
--- a/tensorflow/python/keras/models_test.py
+++ b/tensorflow/python/keras/models_test.py
@@ -144,5 +144,19 @@ class CheckpointingTests(test.TestCase):
model.load_weights(save_prefix)
self.assertEqual(12., self.evaluate(beta1_power))
+class TestModelBackend(test.TestCase):
+
+ def test_model_backend_float64_use_cases(self):
+ # Test case for GitHub issue 19318
+ floatx = keras.backend.floatx()
+ keras.backend.set_floatx('float64')
+
+ x = keras.Input((5,))
+ y = keras.layers.Dense(1)(x)
+ model = keras.models.Model(x, y)
+ model.compile('rmsprop', 'mse')
+
+ keras.backend.set_floatx(floatx)
+
if __name__ == '__main__':
test.main()
diff --git a/tensorflow/python/kernel_tests/as_string_op_test.py b/tensorflow/python/kernel_tests/as_string_op_test.py
index 9d54add264..94ed8ebd31 100644
--- a/tensorflow/python/kernel_tests/as_string_op_test.py
+++ b/tensorflow/python/kernel_tests/as_string_op_test.py
@@ -130,6 +130,16 @@ class AsStringOpTest(test.TestCase):
result = output.eval(feed_dict={input_: int_inputs_})
self.assertAllEqual(s(result), ["%d" % x for x in int_inputs_])
+ def testHalfInt(self):
+ s = lambda strs: [x.decode("ascii") for x in strs]
+
+ with self.test_session():
+ input_ = array_ops.placeholder(dtypes.int16)
+ int_inputs_ = [np.iinfo(np.int16).min, np.iinfo(np.int16).max]
+ output = string_ops.as_string(input_)
+ result = output.eval(feed_dict={input_: int_inputs_})
+ self.assertAllEqual(s(result), ["%d" % x for x in int_inputs_])
+
def testBool(self):
bool_inputs_ = [False, True]
s = lambda strs: [x.decode("ascii") for x in strs]
diff --git a/tensorflow/python/kernel_tests/betainc_op_test.py b/tensorflow/python/kernel_tests/betainc_op_test.py
index 08b03f8518..16fdedac41 100644
--- a/tensorflow/python/kernel_tests/betainc_op_test.py
+++ b/tensorflow/python/kernel_tests/betainc_op_test.py
@@ -172,7 +172,7 @@ class BetaincTest(test.TestCase):
tf_gout_t = math_ops.betainc(tf_ga_s, tf_gb_s, tf_gx_s)
err = gradient_checker.compute_gradient_error(
[tf_gx_s], [gx_s.shape], tf_gout_t, gx_s.shape)
- print("betainc gradient err = %g " % err)
+ tf_logging.info("betainc gradient err = %g " % err)
self.assertLess(err, err_tolerance)
# Test broadcast gradient
@@ -181,7 +181,7 @@ class BetaincTest(test.TestCase):
tf_gout_t = math_ops.betainc(tf_ga_s, tf_gb_s, tf_gx_s)
err = gradient_checker.compute_gradient_error(
[tf_gx_s], [()], tf_gout_t, ga_s.shape)
- print("betainc gradient err = %g " % err)
+ tf_logging.info("betainc gradient err = %g " % err)
self.assertLess(err, err_tolerance)
diff --git a/tensorflow/python/kernel_tests/clip_ops_test.py b/tensorflow/python/kernel_tests/clip_ops_test.py
index e08123b041..fb52d10475 100644
--- a/tensorflow/python/kernel_tests/clip_ops_test.py
+++ b/tensorflow/python/kernel_tests/clip_ops_test.py
@@ -18,9 +18,12 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
+import numpy as np
+
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
from tensorflow.python.ops import clip_ops
from tensorflow.python.ops import gradient_checker
from tensorflow.python.platform import test
@@ -414,6 +417,16 @@ class ClipTest(test.TestCase):
self.assertAllClose(np_ans, tf_ans)
+ def testClipByValueEmptyTensor(self):
+ # Test case for GitHub issue 19337
+ zero = array_ops.placeholder(dtype=dtypes.float32, shape=None)
+ x = clip_ops.clip_by_value(zero, zero, zero)
+ y = clip_ops.clip_by_value(zero, 1.0, 1.0)
+ z = clip_ops.clip_by_value(zero, zero, 1.0)
+ w = clip_ops.clip_by_value(zero, 1.0, zero)
+ with self.test_session(use_gpu=True) as sess:
+ sess.run([x, y, z, w], feed_dict={zero: np.zeros((7, 0))})
+
if __name__ == '__main__':
test.main()
diff --git a/tensorflow/python/kernel_tests/conv_ops_test.py b/tensorflow/python/kernel_tests/conv_ops_test.py
index 8699fd5b25..80ba7dafc9 100644
--- a/tensorflow/python/kernel_tests/conv_ops_test.py
+++ b/tensorflow/python/kernel_tests/conv_ops_test.py
@@ -312,8 +312,8 @@ class Conv2DTest(test.TestCase):
expected_values = self.evaluate(expected_results)
computed_values = self.evaluate(computed_results)
for e_value, c_value in zip(expected_values, computed_values):
- print("expected = ", e_value)
- print("actual = ", c_value)
+ tf_logging.info("expected = ", e_value)
+ tf_logging.info("actual = ", c_value)
self.assertAllClose(
e_value.flatten(), c_value.flatten(), atol=tolerance, rtol=1e-4)
@@ -337,8 +337,8 @@ class Conv2DTest(test.TestCase):
for i in range(len(tensors)):
conv = tensors[i]
value = values[i]
- print("expected = ", expected)
- print("actual = ", value)
+ tf_logging.info("expected = ", expected)
+ tf_logging.info("actual = ", value)
tol = 1e-5
if value.dtype == np.float16:
tol = 1e-3
@@ -547,8 +547,8 @@ class Conv2DTest(test.TestCase):
# "values" consists of two tensors for two backprops
value = self.evaluate(conv)
self.assertShapeEqual(value, conv)
- print("expected = ", expected)
- print("actual = ", value)
+ tf_logging.info("expected = ", expected)
+ tf_logging.info("actual = ", value)
self.assertArrayNear(expected, value.flatten(), err)
def _CompareBackpropInput(self, input_sizes, filter_sizes, output_sizes,
@@ -723,8 +723,8 @@ class Conv2DTest(test.TestCase):
data_format=data_format)
value = self.evaluate(conv)
self.assertShapeEqual(value, conv)
- print("expected = ", expected)
- print("actual = ", value)
+ tf_logging.info("expected = ", expected)
+ tf_logging.info("actual = ", value)
self.assertArrayNear(expected, value.flatten(), 1e-5)
def _CompareBackFilter(self, input_sizes, filter_sizes, output_sizes,
@@ -912,8 +912,8 @@ class Conv2DTest(test.TestCase):
value_2 = sess.run(conv_2)
self.assertShapeEqual(value, conv)
self.assertShapeEqual(value_2, conv_2)
- print("expected = ", value_2)
- print("actual = ", value)
+ tf_logging.info("expected = ", value_2)
+ tf_logging.info("actual = ", value)
self.assertArrayNear(value_2.flatten(), value.flatten(), err)
# Testing for backprops
@@ -965,8 +965,8 @@ class Conv2DTest(test.TestCase):
value_2 = sess.run(conv_2)
self.assertShapeEqual(value, conv)
self.assertShapeEqual(value_2, conv_2)
- print("expected = ", value_2)
- print("actual = ", value)
+ tf_logging.info("expected = ", value_2)
+ tf_logging.info("actual = ", value)
self.assertArrayNear(value_2.flatten(), value.flatten(), err)
def testConv2D2x2Depth3ValidBackpropFilterStride1x1Dilation2x1(self):
@@ -1178,7 +1178,7 @@ class Conv2DTest(test.TestCase):
# since fp16 numerical gradients are too imprecise.
err = np.fabs(jacob_t - reference_jacob_t).max()
- print("conv_2d gradient error = ", err)
+ tf_logging.info("conv_2d gradient error = ", err)
self.assertLess(err, 0.002)
def testInputGradientValidPaddingStrideOne(self):
@@ -1546,7 +1546,7 @@ class DepthwiseConv2DTest(test.TestCase):
conv = nn_impl.depthwise_conv2d(
t1, t2, strides=[1, stride, stride, 1], padding=padding)
value = sess.run(conv)
- print("value = ", value)
+ tf_logging.info("value = ", value)
self.assertArrayNear(expected, np.ravel(value), 1e-5)
self.assertShapeEqual(value, conv)
@@ -1668,7 +1668,7 @@ class SeparableConv2DTest(test.TestCase):
conv = array_ops.transpose(conv, [0, 2, 3, 1])
value = sess.run(conv)
- print("value = ", value)
+ tf_logging.info("value = ", value)
self.assertArrayNear(expected, np.ravel(value), 1e-5)
self.assertShapeEqual(value, conv)
@@ -1826,7 +1826,7 @@ class Conv2DBenchmark(test.Benchmark):
wall_time = time.time() - start
self.report_benchmark(
name="conv_stack_iter_%d" % iter_index, wall_time=wall_time)
- print("conv_stack_iter_%d: %.4f" % (iter_index, wall_time))
+ tf_logging.info("conv_stack_iter_%d: %.4f" % (iter_index, wall_time))
def GetInceptionFwdTest(input_size, filter_size, stride, padding,
diff --git a/tensorflow/python/kernel_tests/gather_nd_op_test.py b/tensorflow/python/kernel_tests/gather_nd_op_test.py
index 91ebe8de99..58e2a8ac2a 100644
--- a/tensorflow/python/kernel_tests/gather_nd_op_test.py
+++ b/tensorflow/python/kernel_tests/gather_nd_op_test.py
@@ -197,7 +197,21 @@ class GatherNdTest(test.TestCase):
self.assertEqual(None, shape.ndims)
self.assertEqual(None, shape[0].value)
- def testBadIndices(self):
+ def testBadIndicesCPU(self):
+ with self.test_session(use_gpu=False):
+ params = [0, 1, 2]
+ indices = [[[0], [7]]] # Make this one higher rank
+ gather_nd = array_ops.gather_nd(params, indices)
+ with self.assertRaisesOpError(
+ r"flat indices\[1, :\] = \[7\] does not index into param "
+ r"\(shape: \[3\]\)"):
+ gather_nd.eval()
+
+ def _disabledTestBadIndicesGPU(self):
+ # TODO disabled due to different behavior on GPU and CPU
+ # On GPU the bad indices do not raise error but fetch 0 values
+ if not test.is_gpu_available():
+ return
with self.test_session(use_gpu=True):
params = [0, 1, 2]
indices = [[[0], [7]]] # Make this one higher rank
@@ -207,7 +221,21 @@ class GatherNdTest(test.TestCase):
r"\(shape: \[3\]\)"):
gather_nd.eval()
- def testBadIndicesWithSlices(self):
+ def testBadIndicesWithSlicesCPU(self):
+ with self.test_session(use_gpu=False):
+ params = [[0, 1, 2]]
+ indices = [[[0], [0], [1]]] # Make this one higher rank
+ gather_nd = array_ops.gather_nd(params, indices)
+ with self.assertRaisesOpError(
+ r"flat indices\[2, :\] = \[1\] does not index into param "
+ r"\(shape: \[1,3\]\)"):
+ gather_nd.eval()
+
+ def _disabledTestBadIndicesWithSlicesGPU(self):
+ # TODO disabled due to different behavior on GPU and CPU
+ # On GPU the bad indices do not raise error but fetch 0 values
+ if not test.is_gpu_available():
+ return
with self.test_session(use_gpu=True):
params = [[0, 1, 2]]
indices = [[[0], [0], [1]]] # Make this one higher rank
diff --git a/tensorflow/python/kernel_tests/gather_op_test.py b/tensorflow/python/kernel_tests/gather_op_test.py
index a2fcd751df..033fa95935 100644
--- a/tensorflow/python/kernel_tests/gather_op_test.py
+++ b/tensorflow/python/kernel_tests/gather_op_test.py
@@ -27,7 +27,8 @@ from tensorflow.python.ops import array_ops
from tensorflow.python.ops import gradients_impl
from tensorflow.python.platform import test
-_TEST_TYPES = (dtypes.float32, dtypes.complex64, dtypes.complex128)
+_TEST_TYPES = (dtypes.int64, dtypes.float32,
+ dtypes.complex64, dtypes.complex128)
class GatherTest(test.TestCase):
@@ -122,6 +123,9 @@ class GatherTest(test.TestCase):
gather, [tf_params, tf_indices, tf_axis], gather_grad)
self.assertEqual(indices_grad, None)
self.assertEqual(axis_grad, None)
+ if dtype.is_integer:
+ self.assertEqual(params_grad, None)
+ continue
# For axis 0, we are able to create an efficient IndexedSlices for
# the gradient.
if axis == 0:
@@ -177,7 +181,19 @@ class GatherTest(test.TestCase):
gather_t = array_ops.gather(params, indices, axis=axis)
self.assertEqual(None, gather_t.shape)
- def testBadIndices(self):
+ def testBadIndicesCPU(self):
+ with self.test_session(use_gpu=False):
+ params = [[0, 1, 2], [3, 4, 5]]
+ with self.assertRaisesOpError(r"indices\[0,0\] = 7 is not in \[0, 2\)"):
+ array_ops.gather(params, [[7]], axis=0).eval()
+ with self.assertRaisesOpError(r"indices\[0,0\] = 7 is not in \[0, 3\)"):
+ array_ops.gather(params, [[7]], axis=1).eval()
+
+ def _disabledTestBadIndicesGPU(self):
+ # TODO disabled due to different behavior on GPU and CPU
+ # On GPU the bad indices do not raise error but fetch 0 values
+ if not test.is_gpu_available():
+ return
with self.test_session(use_gpu=True):
params = [[0, 1, 2], [3, 4, 5]]
with self.assertRaisesOpError(r"indices\[0,0\] = 7 is not in \[0, 2\)"):
diff --git a/tensorflow/python/kernel_tests/init_ops_test.py b/tensorflow/python/kernel_tests/init_ops_test.py
index a9b55854f1..795aa67248 100644
--- a/tensorflow/python/kernel_tests/init_ops_test.py
+++ b/tensorflow/python/kernel_tests/init_ops_test.py
@@ -362,6 +362,33 @@ class UniformUnitScalingInitializationTest(test.TestCase):
dtype=dtypes.string)
+class VarianceScalingInitializationTest(test.TestCase):
+
+ def testNormalDistribution(self):
+ shape = [100, 100]
+ expect_mean = 0.
+ expect_var = 1. / shape[0]
+ init = init_ops.variance_scaling_initializer(distribution='normal')
+
+ with self.test_session(use_gpu=True):
+ x = init(shape).eval()
+
+ self.assertNear(np.mean(x), expect_mean, err=1e-2)
+ self.assertNear(np.var(x), expect_var, err=1e-2)
+
+ def testUniformDistribution(self):
+ shape = [100, 100]
+ expect_mean = 0.
+ expect_var = 1. / shape[0]
+ init = init_ops.variance_scaling_initializer(distribution='uniform')
+
+ with self.test_session(use_gpu=True):
+ x = init(shape).eval()
+
+ self.assertNear(np.mean(x), expect_mean, err=1e-2)
+ self.assertNear(np.var(x), expect_var, err=1e-2)
+
+
# TODO(vrv): move to sequence_ops_test?
class RangeTest(test.TestCase):
diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py
index a0c372db7d..e95c729715 100644
--- a/tensorflow/python/kernel_tests/pooling_ops_test.py
+++ b/tensorflow/python/kernel_tests/pooling_ops_test.py
@@ -947,7 +947,7 @@ class PoolingTest(test.TestCase):
output_sizes,
x_init_value=x_init_value,
delta=1e-2)
- print("%s gradient error = " % func_name, err)
+ tf_logging.info("%s gradient error = " % func_name, err)
self.assertLess(err, err_tolerance)
def _ConstructAndTestSecondGradient(self,
@@ -1024,7 +1024,7 @@ class PoolingTest(test.TestCase):
input_sizes,
x_init_value=x_init_value,
delta=1e-2)
- print("%s second-order gradient error = " % func_name, err)
+ tf_logging.info("%s second-order gradient error = " % func_name, err)
self.assertLess(err, err_tolerance)
def _testMaxPoolGradValidPadding1_1(self, data_format, use_gpu):
diff --git a/tensorflow/python/kernel_tests/py_func_test.py b/tensorflow/python/kernel_tests/py_func_test.py
index 677253946e..253e43920b 100644
--- a/tensorflow/python/kernel_tests/py_func_test.py
+++ b/tensorflow/python/kernel_tests/py_func_test.py
@@ -19,6 +19,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
+import gc
import re
import numpy as np
@@ -434,13 +435,29 @@ class PyFuncTest(test.TestCase):
# ----- Tests shared by py_func and eager_py_func -----
def testCleanup(self):
- for _ in xrange(1000):
- g = ops.Graph()
- with g.as_default():
- c = constant_op.constant([1.], dtypes.float32)
- _ = script_ops.py_func(lambda x: x + 1, [c], [dtypes.float32])
- _ = script_ops.eager_py_func(lambda x: x + 1, [c], [dtypes.float32])
- self.assertLess(script_ops._py_funcs.size(), 100)
+ # Delete everything created by previous tests to avoid side effects.
+ ops.reset_default_graph()
+ gc.collect()
+ initial_size = script_ops._py_funcs.size()
+ # Encapsulate the graph generation, so locals can be deleted.
+ def make_graphs():
+ for _ in xrange(1000):
+ g = ops.Graph()
+ with g.as_default():
+ c = constant_op.constant([1.], dtypes.float32)
+ _ = script_ops.py_func(lambda x: x + 1, [c], [dtypes.float32])
+ _ = script_ops.eager_py_func(lambda x: x + 1, [c], [dtypes.float32])
+ # These ops have a reference to 'c' which has a reference to the graph.
+ # Checks if the functions are being deleted though the graph is referenced from them.
+ # (see #18292)
+ _ = script_ops.py_func(lambda x: x + c.shape[0], [c], [dtypes.float32])
+ _ = script_ops.eager_py_func(lambda x: x + c.shape[0], [c], [dtypes.float32])
+
+ # Call garbage collector to enforce deletion.
+ make_graphs()
+ ops.reset_default_graph()
+ gc.collect()
+ self.assertEqual(initial_size, script_ops._py_funcs.size())
# ----- Tests for eager_py_func -----
@test_util.run_in_graph_and_eager_modes()
diff --git a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
index 79fe927b8a..faa4b49a8d 100644
--- a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
+++ b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
@@ -144,7 +144,9 @@ class StatefulScatterNdTest(test.TestCase):
self.assertAllClose(new, ref_var.eval())
def _VariableRankTests(self, np_scatter, tf_scatter):
- for vtype in (np.float32, np.float64, np.complex64, np.complex128):
+ for vtype in (np.int32,
+ np.float32, np.float64,
+ np.complex64, np.complex128):
for itype in (np.int32, np.int64):
self._VariableRankTest(np_scatter, tf_scatter, vtype, itype)
@@ -221,7 +223,7 @@ class StatefulScatterNdTest(test.TestCase):
# self._VariableRankTests(_NumpyDiv, state_ops.scatter_nd_div)
def _ScatterRepeatIndicesTest(self, np_scatter, tf_scatter):
- for vtype in (np.float32, np.float64):
+ for vtype in (np.int32, np.float32, np.float64):
for itype in (np.int32, np.int64):
self._VariableRankTest(
np_scatter, tf_scatter, vtype, itype, repeat_indices=True)
diff --git a/tensorflow/python/kernel_tests/scatter_ops_test.py b/tensorflow/python/kernel_tests/scatter_ops_test.py
index c70a4ffce7..1a0fa744ae 100644
--- a/tensorflow/python/kernel_tests/scatter_ops_test.py
+++ b/tensorflow/python/kernel_tests/scatter_ops_test.py
@@ -159,7 +159,13 @@ class ScatterTest(test.TestCase):
# Clips small values to avoid division by zero.
def clip_small_values(x):
- return 1e-4 * np.sign(x) if np.abs(x) < 1e-4 else x
+ threshold = 1e-4
+ sign = np.sign(x)
+
+ if isinstance(x, np.int32):
+ threshold = 1
+ sign = np.random.choice([-1, 1])
+ return threshold * sign if np.abs(x) < threshold else x
updates = np.vectorize(clip_small_values)(updates)
old = _AsType(np.random.randn(*((first_dim,) + extra_shape)), vtype)
@@ -181,7 +187,11 @@ class ScatterTest(test.TestCase):
tf_scatter,
repeat_indices=False,
updates_are_scalar=False):
- for vtype in (np.float32, np.float64):
+ vtypes = [np.float32, np.float64]
+ if tf_scatter != state_ops.scatter_div:
+ vtypes.append(np.int32)
+
+ for vtype in vtypes:
for itype in (np.int32, np.int64):
self._VariableRankTest(tf_scatter, vtype, itype, repeat_indices,
updates_are_scalar)
diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
index 794be096b7..a82855dfeb 100644
--- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
+++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
@@ -264,7 +264,9 @@ class UnsortedSegmentTest(SegmentReductionHelper):
# A subset of ops has been enabled for complex numbers
self.complex_ops_list = [(np.add, None,
- math_ops.unsorted_segment_sum, lambda t: 0)]
+ math_ops.unsorted_segment_sum, lambda t: 0),
+ (np.ndarray.__mul__, None,
+ math_ops.unsorted_segment_prod, lambda t: 1)]
self.differentiable_dtypes = [dtypes_lib.float16, dtypes_lib.float32,
dtypes_lib.float64]
self.all_dtypes = (self.differentiable_dtypes +
diff --git a/tensorflow/python/kernel_tests/string_split_op_test.py b/tensorflow/python/kernel_tests/string_split_op_test.py
index a5bd1b6ee0..e20daccb28 100644
--- a/tensorflow/python/kernel_tests/string_split_op_test.py
+++ b/tensorflow/python/kernel_tests/string_split_op_test.py
@@ -146,5 +146,101 @@ class StringSplitOpTest(test.TestCase):
self.assertAllEqual(shape, [3, 1])
+class StringSplitV2OpTest(test.TestCase):
+
+ def testSplitV2(self):
+ strings = ["pigs on the wing", "animals"]
+
+ with self.test_session() as sess:
+ tokens = string_ops.string_split_v2(strings)
+ indices, values, shape = sess.run(tokens)
+ self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2], [0, 3], [1, 0]])
+ self.assertAllEqual(values, [b"pigs", b"on", b"the", b"wing", b"animals"])
+ self.assertAllEqual(shape, [2, 4])
+
+ def testSplitV2MultiCharSeparator(self):
+ # Match Python behavior:
+ # >>> '1<>2<>3'.split('<>')
+ # ['1', '2', '3']
+ # >>> "<><>4<>5<><>6<>".split("<>")
+ # ['', '', '4', '5', '', '6', '']
+ strings = ["1<>2<>3", "<><>4<>5<><>6<>"]
+
+ with self.test_session() as sess:
+ tokens = string_ops.string_split_v2(strings, sep="<>")
+ indices, values, shape = sess.run(tokens)
+ self.assertAllEqual(
+ indices, [[0, 0], [0, 1], [0, 2],
+ [1, 0], [1, 1], [1, 2], [1, 3], [1, 4], [1, 5], [1, 6]])
+ self.assertAllEqual(values, [b"1", b"2", b"3",
+ b"", b"", b"4", b"5", b"", b"6", b""])
+ self.assertAllEqual(shape, [2, 7])
+
+ def testSplitV2SimpleSeparator(self):
+ # Match Python behavior:
+ # >>> '1,2,3'.split(',')
+ # ['1', '2', '3']
+ # >>> '1,2,,3,'.split(',')
+ # ['1', '2', '', '3', '']
+ strings = ["1,2,3", "4,5,,6,"]
+
+ with self.test_session() as sess:
+ tokens = string_ops.string_split_v2(strings, sep=',')
+ indices, values, shape = sess.run(tokens)
+ self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2],
+ [1, 0], [1, 1], [1, 2], [1, 3], [1, 4]])
+ self.assertAllEqual(values, [b"1", b"2", b"3",
+ b"4", b"5", b"", b"6", b""])
+ self.assertAllEqual(shape, [2, 5])
+
+ def testSplitV2EmptySeparator(self):
+ # Match Python behavior:
+ # >>> '1 2 3'.split()
+ # ['1', '2', '3']
+ #>>> ' 1 2 3 '.split()
+ #['1', '2', '3']
+ strings = ["1 2 3", " 4 5 6 "]
+
+ with self.test_session() as sess:
+ tokens = string_ops.string_split_v2(strings)
+ indices, values, shape = sess.run(tokens)
+ self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2],
+ [1, 0], [1, 1], [1, 2]])
+ self.assertAllEqual(values, [b"1", b"2", b"3", b"4", b"5", b"6"])
+ self.assertAllEqual(shape, [2, 3])
+
+ def testSplitV2SimpleSeparatorMaxSplit(self):
+ # Match Python behavior:
+ # >>> '1,2,3'.split(',', maxsplit=1)
+ # ['1', '2,3']
+ # >>> '4,5,,6,'.split(',', maxsplit=1)
+ # ['4', '5,,6,']
+ strings = ["1,2,3", "4,5,,6,"]
+
+ with self.test_session() as sess:
+ tokens = string_ops.string_split_v2(strings, sep=',', maxsplit=1)
+ indices, values, shape = sess.run(tokens)
+ self.assertAllEqual(indices, [[0, 0], [0, 1],
+ [1, 0], [1, 1]])
+ self.assertAllEqual(values, [b"1", b"2,3", b"4", b"5,,6,"])
+ self.assertAllEqual(shape, [2, 2])
+
+ def testSplitV2EmptySeparatorMaxSplit(self):
+ # Match Python behavior:
+ # '1 2 3'.split(maxsplit=1)
+ # ['1', '2 3']
+ # >>> " 4 5 6 ".split(maxsplit=1)
+ # ['4', '5 6 ']
+ strings = ["1 2 3", " 4 5 6 "]
+
+ with self.test_session() as sess:
+ tokens = string_ops.string_split_v2(strings, maxsplit=1)
+ indices, values, shape = sess.run(tokens)
+ self.assertAllEqual(indices, [[0, 0], [0, 1],
+ [1, 0], [1, 1]])
+ self.assertAllEqual(values, [b"1", b"2 3", b"4", b"5 6 "])
+ self.assertAllEqual(shape, [2, 2])
+
+
if __name__ == "__main__":
test.main()
diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index 8129334703..fae63b1132 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -2619,6 +2619,10 @@ reverse.__doc__ = gen_array_ops.reverse_v2.__doc__
# pylint: disable=redefined-builtin
@tf_export("reverse_sequence")
+@deprecation.deprecated_args(
+ None, "seq_dim is deprecated, use seq_axis instead", "seq_dim")
+@deprecation.deprecated_args(
+ None, "batch_dim is deprecated, use batch_axis instead", "batch_dim")
def reverse_sequence(input,
seq_lengths,
seq_axis=None,
diff --git a/tensorflow/python/ops/gradient_checker.py b/tensorflow/python/ops/gradient_checker.py
index 12afcd0b51..94c8d79335 100644
--- a/tensorflow/python/ops/gradient_checker.py
+++ b/tensorflow/python/ops/gradient_checker.py
@@ -283,10 +283,10 @@ def compute_gradient(x,
numbers. For example, if `x` is complex with shape `[m]` and `y` is complex
with shape `[n]`, each Jacobian `J` will have shape `[m * 2, n * 2]` with
- J[:m, :n] = d(Re y)/d(Re x)
- J[:m, n:] = d(Im y)/d(Re x)
- J[m:, :n] = d(Re y)/d(Im x)
- J[m:, n:] = d(Im y)/d(Im x)
+ J[::2, ::2] = d(Re y)/d(Re x)
+ J[::2, 1::2] = d(Im y)/d(Re x)
+ J[1::2, ::2] = d(Re y)/d(Im x)
+ J[1::2, 1::2] = d(Im y)/d(Im x)
Args:
x: a tensor or list of tensors
diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py
index bdcf420980..f27d9224c1 100644
--- a/tensorflow/python/ops/image_ops_impl.py
+++ b/tensorflow/python/ops/image_ops_impl.py
@@ -28,6 +28,7 @@ from tensorflow.python.framework import tensor_util
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import check_ops
from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import functional_ops
from tensorflow.python.ops import gen_image_ops
from tensorflow.python.ops import gen_nn_ops
from tensorflow.python.ops import math_ops
@@ -258,14 +259,14 @@ def random_flip_up_down(image, seed=None):
dimension, which is `height`. Otherwise output the image as-is.
Args:
- image: A 3-D tensor of shape `[height, width, channels].`
+ image: 4-D Tensor of shape `[batch, height, width, channels]` or
+ 3-D Tensor of shape `[height, width, channels]`.
seed: A Python integer. Used to create a random seed. See
@{tf.set_random_seed}
for behavior.
Returns:
- A 3-D tensor of the same type and shape as `image`.
-
+ A tensor of the same type and shape as `image`.
Raises:
ValueError: if the shape of `image` not supported.
"""
@@ -280,13 +281,14 @@ def random_flip_left_right(image, seed=None):
second dimension, which is `width`. Otherwise output the image as-is.
Args:
- image: A 3-D tensor of shape `[height, width, channels].`
+ image: 4-D Tensor of shape `[batch, height, width, channels]` or
+ 3-D Tensor of shape `[height, width, channels]`.
seed: A Python integer. Used to create a random seed. See
@{tf.set_random_seed}
for behavior.
Returns:
- A 3-D tensor of the same type and shape as `image`.
+ A tensor of the same type and shape as `image`.
Raises:
ValueError: if the shape of `image` not supported.
@@ -297,7 +299,8 @@ def random_flip_left_right(image, seed=None):
def _random_flip(image, flip_index, seed, scope_name):
"""Randomly (50% chance) flip an image along axis `flip_index`.
Args:
- image: A 3-D tensor of shape `[height, width, channels].`
+ image: 4-D Tensor of shape `[batch, height, width, channels]` or
+ 3-D Tensor of shape `[height, width, channels]`.
flip_index: The dimension along which to flip the image.
Vertical: 0, Horizontal: 1
seed: A Python integer. Used to create a random seed. See
@@ -306,22 +309,37 @@ def _random_flip(image, flip_index, seed, scope_name):
scope_name: Name of the scope in which the ops are added.
Returns:
- A 3-D tensor of the same type and shape as `image`.
+ A tensor of the same type and shape as `image`.
Raises:
ValueError: if the shape of `image` not supported.
"""
with ops.name_scope(None, scope_name, [image]) as scope:
image = ops.convert_to_tensor(image, name='image')
- image = _Assert3DImage(image)
- uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed)
- mirror_cond = math_ops.less(uniform_random, .5)
- result = control_flow_ops.cond(
- mirror_cond,
- lambda: array_ops.reverse(image, [flip_index]),
- lambda: image,
- name=scope)
- return fix_image_flip_shape(image, result)
+ image = _AssertAtLeast3DImage(image)
+ shape = image.get_shape()
+ if shape.ndims == 3 or shape.ndims is None:
+ uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed)
+ mirror_cond = math_ops.less(uniform_random, .5)
+ result = control_flow_ops.cond(
+ mirror_cond,
+ lambda: array_ops.reverse(image, [flip_index]),
+ lambda: image,
+ name=scope
+ )
+ return fix_image_flip_shape(image, result)
+ elif shape.ndims == 4:
+ uniform_random = random_ops.random_uniform(
+ [array_ops.shape(image)[0]], 0, 1.0, seed=seed
+ )
+ mirror_cond = math_ops.less(uniform_random, .5)
+ return array_ops.where(
+ mirror_cond,
+ image,
+ functional_ops.map_fn(lambda x: array_ops.reverse(x, [flip_index]), image, dtype=image.dtype)
+ )
+ else:
+ raise ValueError('\'image\' must have either 3 or 4 dimensions.')
@tf_export('image.flip_left_right')
@@ -1634,13 +1652,13 @@ def is_jpeg(contents, name=None):
@tf_export('image.decode_image')
-def decode_image(contents, channels=None, name=None):
+def decode_image(contents, channels=None, dtype=dtypes.uint8, name=None):
"""Convenience function for `decode_bmp`, `decode_gif`, `decode_jpeg`,
and `decode_png`.
Detects whether an image is a BMP, GIF, JPEG, or PNG, and performs the
- appropriate operation to convert the input bytes `string` into a `Tensor` of
- type `uint8`.
+ appropriate operation to convert the input bytes `string` into a `Tensor`
+ of type `dtype`.
Note: `decode_gif` returns a 4-D array `[num_frames, height, width, 3]`, as
opposed to `decode_bmp`, `decode_jpeg` and `decode_png`, which return 3-D
@@ -1652,10 +1670,11 @@ def decode_image(contents, channels=None, name=None):
contents: 0-D `string`. The encoded image bytes.
channels: An optional `int`. Defaults to `0`. Number of color channels for
the decoded image.
+ dtype: The desired DType of the returned `Tensor`.
name: A name for the operation (optional)
Returns:
- `Tensor` with type `uint8` with shape `[height, width, num_channels]` for
+ `Tensor` with type `dtype` and shape `[height, width, num_channels]` for
BMP, JPEG, and PNG images and shape `[num_frames, height, width, 3]` for
GIF images.
@@ -1679,7 +1698,7 @@ def decode_image(contents, channels=None, name=None):
channels_msg = 'Channels must be in (None, 0, 3) when decoding BMP images'
assert_channels = control_flow_ops.Assert(good_channels, [channels_msg])
with ops.control_dependencies([assert_decode, assert_channels]):
- return gen_image_ops.decode_bmp(contents)
+ return convert_image_dtype(gen_image_ops.decode_bmp(contents), dtype)
def _gif():
# Create assert to make sure that channels is not set to 1
@@ -1692,7 +1711,7 @@ def decode_image(contents, channels=None, name=None):
channels_msg = 'Channels must be in (None, 0, 3) when decoding GIF images'
assert_channels = control_flow_ops.Assert(good_channels, [channels_msg])
with ops.control_dependencies([assert_channels]):
- return gen_image_ops.decode_gif(contents)
+ return convert_image_dtype(gen_image_ops.decode_gif(contents), dtype)
def check_gif():
# Create assert op to check that bytes are GIF decodable
@@ -1701,7 +1720,11 @@ def decode_image(contents, channels=None, name=None):
def _png():
"""Decodes a PNG image."""
- return gen_image_ops.decode_png(contents, channels)
+ return convert_image_dtype(
+ gen_image_ops.decode_png(contents, channels,
+ dtype=dtypes.uint8
+ if dtype == dtypes.uint8
+ else dtypes.uint16), dtype)
def check_png():
"""Checks if an image is PNG."""
@@ -1717,7 +1740,8 @@ def decode_image(contents, channels=None, name=None):
'images')
assert_channels = control_flow_ops.Assert(good_channels, [channels_msg])
with ops.control_dependencies([assert_channels]):
- return gen_image_ops.decode_jpeg(contents, channels)
+ return convert_image_dtype(
+ gen_image_ops.decode_jpeg(contents, channels), dtype)
# Decode normal JPEG images (start with \xff\xd8\xff\xe0)
# as well as JPEG images with EXIF data (start with \xff\xd8\xff\xe1).
@@ -1878,7 +1902,7 @@ def sample_distorted_bounding_box(image_size,
width / height within this range.
area_range: An optional list of `floats`. Defaults to `[0.05, 1]`.
The cropped area of the image must contain a fraction of the
- supplied image within in this range.
+ supplied image within this range.
max_attempts: An optional `int`. Defaults to `100`.
Number of attempts at generating a cropped region of the image
of the specified constraints. After `max_attempts` failures, return the
diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py
index 45499dcce0..2a6ab26e96 100644
--- a/tensorflow/python/ops/image_ops_test.py
+++ b/tensorflow/python/ops/image_ops_test.py
@@ -533,6 +533,37 @@ class FlipImageBenchmark(test.Benchmark):
iters=benchmark_rounds,
wall_time=step_time)
+ def _benchmarkBatchedRandomFlipLeftRight(self, device, cpu_count):
+ image_shape = [16, 299, 299, 3]
+ warmup_rounds = 100
+ benchmark_rounds = 1000
+ config = config_pb2.ConfigProto()
+ if cpu_count is not None:
+ config.inter_op_parallelism_threads = 1
+ config.intra_op_parallelism_threads = cpu_count
+ with session.Session("", graph=ops.Graph(), config=config) as sess:
+ with ops.device(device):
+ inputs = variables.Variable(
+ random_ops.random_uniform(image_shape, dtype=dtypes.float32) * 255,
+ trainable=False,
+ dtype=dtypes.float32)
+ run_op = image_ops.random_flip_left_right(inputs)
+ sess.run(variables.global_variables_initializer())
+ for i in xrange(warmup_rounds + benchmark_rounds):
+ if i == warmup_rounds:
+ start = time.time()
+ sess.run(run_op)
+ end = time.time()
+ step_time = (end - start) / benchmark_rounds
+ tag = device + "_%s" % (cpu_count if cpu_count is not None else "_all")
+ print("benchmarkBatchedRandomFlipLeftRight_16_299_299_3_%s step_time: "
+ "%.2f us" %
+ (tag, step_time * 1e6))
+ self.report_benchmark(
+ name="benchmarkBatchedRandomFlipLeftRight_16_299_299_3_%s" % (tag),
+ iters=benchmark_rounds,
+ wall_time=step_time)
+
def benchmarkFlipLeftRightCpu1(self):
self._benchmarkFlipLeftRight("/cpu:0", 1)
@@ -551,6 +582,15 @@ class FlipImageBenchmark(test.Benchmark):
def benchmarkRandomFlipLeftRightGpu(self):
self._benchmarkRandomFlipLeftRight(test.gpu_device_name(), None)
+ def benchmarkBatchedRandomFlipLeftRightCpu1(self):
+ self._benchmarkBatchedRandomFlipLeftRight("/cpu:0", 1)
+
+ def benchmarkBatchedRandomFlipLeftRightCpuAll(self):
+ self._benchmarkBatchedRandomFlipLeftRight("/cpu:0", None)
+
+ def benchmarkBatchedRandomFlipLeftRightGpu(self):
+ self._benchmarkBatchedRandomFlipLeftRight(test.gpu_device_name(), None)
+
class AdjustHueBenchmark(test.Benchmark):
@@ -987,7 +1027,7 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
with self.test_session(use_gpu=True):
x_tf = constant_op.constant(x_np, shape=x_np.shape)
- y = image_ops.random_flip_left_right(x_tf)
+ y = image_ops.random_flip_left_right(x_tf, seed=seed)
self.assertTrue(y.op.name.startswith("random_flip_left_right"))
count_flipped = 0
@@ -1008,6 +1048,50 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
self.assertGreaterEqual(count_flipped, 20)
self.assertGreaterEqual(count_unflipped, 20)
+ def testRandomFlipLeftRightWithBatch(self):
+ batch_size = 16
+ seed = 42
+
+ # create single item of test data
+ x_np_raw = np.array(
+ [[1, 2, 3], [1, 2, 3]], dtype=np.uint8
+ ).reshape([1, 2, 3, 1])
+ y_np_raw = np.array(
+ [[3, 2, 1], [3, 2, 1]], dtype=np.uint8
+ ).reshape([1, 2, 3, 1])
+
+ # create batched test data
+ x_np = np.vstack([x_np_raw for _ in range(batch_size)])
+ y_np = np.vstack([y_np_raw for _ in range(batch_size)])
+
+ with self.test_session(use_gpu=True):
+ x_tf = constant_op.constant(x_np, shape=x_np.shape)
+ y = image_ops.random_flip_left_right(x_tf, seed=seed)
+ self.assertTrue(y.op.name.startswith("random_flip_left_right"))
+
+ count_flipped = 0
+ count_unflipped = 0
+ for _ in range(100):
+ y_tf = y.eval()
+
+ # check every element of the batch
+ for i in range(batch_size):
+ if y_tf[i][0][0] == 1:
+ self.assertAllEqual(y_tf[i], x_np[i])
+ count_unflipped += 1
+ else:
+ self.assertAllEqual(y_tf[i], y_np[i])
+ count_flipped += 1
+
+ # 100 trials, each containing batch_size elements
+ # Mean: 50 * batch_size
+ # Std Dev: ~5 * sqrt(batch_size)
+ # Six Sigma: 50 * batch_size - (5 * 6 * sqrt(batch_size))
+ # = 50 * batch_size - 30 * sqrt(batch_size) = 800 - 30 * 4 = 680
+ six_sigma = 50 * batch_size - 30 * np.sqrt(batch_size)
+ self.assertGreaterEqual(count_flipped, six_sigma)
+ self.assertGreaterEqual(count_unflipped, six_sigma)
+
def testInvolutionUpDown(self):
x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1])
@@ -1057,9 +1141,11 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1])
y_np = np.array([[4, 5, 6], [1, 2, 3]], dtype=np.uint8).reshape([2, 3, 1])
+ seed = 42
+
with self.test_session(use_gpu=True):
x_tf = constant_op.constant(x_np, shape=x_np.shape)
- y = image_ops.random_flip_up_down(x_tf, seed=42)
+ y = image_ops.random_flip_up_down(x_tf, seed=seed)
self.assertTrue(y.op.name.startswith("random_flip_up_down"))
count_flipped = 0
count_unflipped = 0
@@ -1079,6 +1165,50 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
self.assertGreaterEqual(count_flipped, 20)
self.assertGreaterEqual(count_unflipped, 20)
+ def testRandomFlipUpDownWithBatch(self):
+ batch_size = 16
+ seed = 42
+
+ # create single item of test data
+ x_np_raw = np.array(
+ [[1, 2, 3], [4, 5, 6]], dtype=np.uint8
+ ).reshape([1, 2, 3, 1])
+ y_np_raw = np.array(
+ [[4, 5, 6], [1, 2, 3]], dtype=np.uint8
+ ).reshape([1, 2, 3, 1])
+
+ # create batched test data
+ x_np = np.vstack([x_np_raw for _ in range(batch_size)])
+ y_np = np.vstack([y_np_raw for _ in range(batch_size)])
+
+ with self.test_session(use_gpu=True):
+ x_tf = constant_op.constant(x_np, shape=x_np.shape)
+ y = image_ops.random_flip_up_down(x_tf, seed=seed)
+ self.assertTrue(y.op.name.startswith("random_flip_up_down"))
+
+ count_flipped = 0
+ count_unflipped = 0
+ for _ in range(100):
+ y_tf = y.eval()
+
+ # check every element of the batch
+ for i in range(batch_size):
+ if y_tf[i][0][0] == 1:
+ self.assertAllEqual(y_tf[i], x_np[i])
+ count_unflipped += 1
+ else:
+ self.assertAllEqual(y_tf[i], y_np[i])
+ count_flipped += 1
+
+ # 100 trials, each containing batch_size elements
+ # Mean: 50 * batch_size
+ # Std Dev: ~5 * sqrt(batch_size)
+ # Six Sigma: 50 * batch_size - (5 * 6 * sqrt(batch_size))
+ # = 50 * batch_size - 30 * sqrt(batch_size) = 800 - 30 * 4 = 680
+ six_sigma = 50 * batch_size - 30 * np.sqrt(batch_size)
+ self.assertGreaterEqual(count_flipped, six_sigma)
+ self.assertGreaterEqual(count_unflipped, six_sigma)
+
def testInvolutionTranspose(self):
x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1])
@@ -1156,6 +1286,7 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
#Ops that support 4D input
for op in [
image_ops.flip_left_right, image_ops.flip_up_down,
+ image_ops.random_flip_left_right, image_ops.random_flip_up_down,
image_ops.transpose_image, image_ops.rot90
]:
transformed_unknown_dims_4 = op(p_unknown_dims_4)
@@ -1166,14 +1297,6 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
"must be at least three-dimensional"):
op(p_wrong_rank)
- for op in [
- image_ops.random_flip_left_right,
- image_ops.random_flip_up_down,
- ]:
- with self.assertRaisesRegexp(ValueError, "must be three-dimensional"):
- op(p_wrong_rank)
-
-
def testRot90GroupOrder(self):
image = np.arange(24, dtype=np.uint8).reshape([2, 4, 3])
with self.test_session(use_gpu=True):
@@ -1208,41 +1331,6 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
y_np = np.rot90(image, k=k, axes=(1, 2))
self.assertAllEqual(y_np, y_tf.eval({k_placeholder: k}))
-class RandomFlipTest(test_util.TensorFlowTestCase):
-
- def testRandomLeftRight(self):
- x_np = np.array([0, 1], dtype=np.uint8).reshape([1, 2, 1])
- num_iterations = 500
-
- hist = [0, 0]
- with self.test_session(use_gpu=True):
- x_tf = constant_op.constant(x_np, shape=x_np.shape)
- y = image_ops.random_flip_left_right(x_tf)
- for _ in xrange(num_iterations):
- y_np = y.eval().flatten()[0]
- hist[y_np] += 1
-
- # Ensure that each entry is observed within 4 standard deviations.
- four_stddev = 4.0 * np.sqrt(num_iterations / 2.0)
- self.assertAllClose(hist, [num_iterations / 2.0] * 2, atol=four_stddev)
-
- def testRandomUpDown(self):
- x_np = np.array([0, 1], dtype=np.uint8).reshape([2, 1, 1])
- num_iterations = 500
-
- hist = [0, 0]
- with self.test_session(use_gpu=True):
- x_tf = constant_op.constant(x_np, shape=x_np.shape)
- y = image_ops.random_flip_up_down(x_tf)
- for _ in xrange(num_iterations):
- y_np = y.eval().flatten()[0]
- hist[y_np] += 1
-
- # Ensure that each entry is observed within 4 standard deviations.
- four_stddev = 4.0 * np.sqrt(num_iterations / 2.0)
- self.assertAllClose(hist, [num_iterations / 2.0] * 2, atol=four_stddev)
-
-
class AdjustContrastTest(test_util.TensorFlowTestCase):
def _testContrast(self, x_np, y_np, contrast_factor):
@@ -3880,5 +3968,88 @@ class SobelEdgesTest(test_util.TensorFlowTestCase):
self.assertAllClose(expected_batch, actual_sobel)
+class DecodeImageTest(test_util.TensorFlowTestCase):
+
+ def testJpegUint16(self):
+ with self.test_session(use_gpu=True) as sess:
+ base = "tensorflow/core/lib/jpeg/testdata"
+ jpeg0 = io_ops.read_file(os.path.join(base, "jpeg_merge_test1.jpg"))
+ image0 = image_ops.decode_image(jpeg0, dtype=dtypes.uint16)
+ image1 = image_ops.convert_image_dtype(image_ops.decode_jpeg(jpeg0),
+ dtypes.uint16)
+ image0, image1 = sess.run([image0, image1])
+ self.assertAllEqual(image0, image1)
+
+ def testPngUint16(self):
+ with self.test_session(use_gpu=True) as sess:
+ base = "tensorflow/core/lib/png/testdata"
+ png0 = io_ops.read_file(os.path.join(base, "lena_rgba.png"))
+ image0 = image_ops.decode_image(png0, dtype=dtypes.uint16)
+ image1 = image_ops.convert_image_dtype(
+ image_ops.decode_png(png0, dtype=dtypes.uint16), dtypes.uint16)
+ image0, image1 = sess.run([image0, image1])
+ self.assertAllEqual(image0, image1)
+
+ def testGifUint16(self):
+ with self.test_session(use_gpu=True) as sess:
+ base = "tensorflow/core/lib/gif/testdata"
+ gif0 = io_ops.read_file(os.path.join(base, "scan.gif"))
+ image0 = image_ops.decode_image(gif0, dtype=dtypes.uint16)
+ image1 = image_ops.convert_image_dtype(image_ops.decode_gif(gif0),
+ dtypes.uint16)
+ image0, image1 = sess.run([image0, image1])
+ self.assertAllEqual(image0, image1)
+
+ def testBmpUint16(self):
+ with self.test_session(use_gpu=True) as sess:
+ base = "tensorflow/core/lib/bmp/testdata"
+ bmp0 = io_ops.read_file(os.path.join(base, "lena.bmp"))
+ image0 = image_ops.decode_image(bmp0, dtype=dtypes.uint16)
+ image1 = image_ops.convert_image_dtype(image_ops.decode_bmp(bmp0),
+ dtypes.uint16)
+ image0, image1 = sess.run([image0, image1])
+ self.assertAllEqual(image0, image1)
+
+ def testJpegFloat32(self):
+ with self.test_session(use_gpu=True) as sess:
+ base = "tensorflow/core/lib/jpeg/testdata"
+ jpeg0 = io_ops.read_file(os.path.join(base, "jpeg_merge_test1.jpg"))
+ image0 = image_ops.decode_image(jpeg0, dtype=dtypes.float32)
+ image1 = image_ops.convert_image_dtype(image_ops.decode_jpeg(jpeg0),
+ dtypes.float32)
+ image0, image1 = sess.run([image0, image1])
+ self.assertAllEqual(image0, image1)
+
+ def testPngFloat32(self):
+ with self.test_session(use_gpu=True) as sess:
+ base = "tensorflow/core/lib/png/testdata"
+ png0 = io_ops.read_file(os.path.join(base, "lena_rgba.png"))
+ image0 = image_ops.decode_image(png0, dtype=dtypes.float32)
+ image1 = image_ops.convert_image_dtype(
+ image_ops.decode_png(png0, dtype=dtypes.uint16), dtypes.float32)
+ image0, image1 = sess.run([image0, image1])
+ self.assertAllEqual(image0, image1)
+
+ def testGifFloat32(self):
+ with self.test_session(use_gpu=True) as sess:
+ base = "tensorflow/core/lib/gif/testdata"
+ gif0 = io_ops.read_file(os.path.join(base, "scan.gif"))
+ image0 = image_ops.decode_image(gif0, dtype=dtypes.float32)
+ image1 = image_ops.convert_image_dtype(image_ops.decode_gif(gif0),
+ dtypes.float32)
+ image0, image1 = sess.run([image0, image1])
+ self.assertAllEqual(image0, image1)
+
+ def testBmpFloat32(self):
+ with self.test_session(use_gpu=True) as sess:
+ base = "tensorflow/core/lib/bmp/testdata"
+ bmp0 = io_ops.read_file(os.path.join(base, "lena.bmp"))
+ image0 = image_ops.decode_image(bmp0, dtype=dtypes.float32)
+ image1 = image_ops.convert_image_dtype(image_ops.decode_bmp(bmp0),
+ dtypes.float32)
+ image0, image1 = sess.run([image0, image1])
+ self.assertAllEqual(image0, image1)
+
+
if __name__ == "__main__":
googletest.main()
diff --git a/tensorflow/python/ops/init_ops.py b/tensorflow/python/ops/init_ops.py
index 2df230d470..724fcc39cd 100644
--- a/tensorflow/python/ops/init_ops.py
+++ b/tensorflow/python/ops/init_ops.py
@@ -467,7 +467,8 @@ class VarianceScaling(Initializer):
else:
scale /= max(1., (fan_in + fan_out) / 2.)
if self.distribution == "normal":
- stddev = math.sqrt(scale)
+ # constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.)
+ stddev = math.sqrt(scale) / .87962566103423978
return random_ops.truncated_normal(
shape, 0.0, stddev, dtype, seed=self.seed)
else:
diff --git a/tensorflow/python/ops/logging_ops.py b/tensorflow/python/ops/logging_ops.py
index 222b8ebc9d..8276047cb6 100644
--- a/tensorflow/python/ops/logging_ops.py
+++ b/tensorflow/python/ops/logging_ops.py
@@ -35,8 +35,9 @@ from tensorflow.python.util.tf_export import tf_export
# Assert and Print are special symbols in python, so we must
-# use an upper-case version of them.
-@tf_export("Print")
+# have an upper-case version of them. For users with Python 3 or Python 2.7
+# with `from __future__ import print_function`, we also allow lowercase.
+@tf_export("Print", "print")
def Print(input_, data, message=None, first_n=None, summarize=None,
name=None):
"""Prints a list of tensors.
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index e40481f3a7..466d0dadc8 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -125,8 +125,8 @@ def abs(x, name=None): # pylint: disable=redefined-builtin
```
Args:
- x: A `Tensor` or `SparseTensor` of type `float32`, `float64`, `int32`,
- `int64`, `complex64` or `complex128`.
+ x: A `Tensor` or `SparseTensor` of type `float16`, `float32`, `float64`,
+ `int32`, `int64`, `complex64` or `complex128`.
name: A name for the operation (optional).
Returns:
@@ -430,10 +430,10 @@ def pow(x, y, name=None): # pylint: disable=redefined-builtin
```
Args:
- x: A `Tensor` of type `float32`, `float64`, `int32`, `int64`, `complex64`,
- or `complex128`.
- y: A `Tensor` of type `float32`, `float64`, `int32`, `int64`, `complex64`,
- or `complex128`.
+ x: A `Tensor` of type `float16`, `float32`, `float64`, `int32`, `int64`,
+ `complex64`, or `complex128`.
+ y: A `Tensor` of type `float16`, `float32`, `float64`, `int32`, `int64`,
+ `complex64`, or `complex128`.
name: A name for the operation (optional).
Returns:
@@ -600,7 +600,7 @@ def round(x, name=None): # pylint: disable=redefined-builtin
```
Args:
- x: A `Tensor` of type `float32` or `float64`.
+ x: A `Tensor` of type `float16`, `float32`, `float64`, `int32`, or `int64`.
name: A name for the operation (optional).
Returns:
@@ -1257,7 +1257,7 @@ def reduce_sum(input_tensor,
entry in `axis`. If `keepdims` is true, the reduced dimensions
are retained with length 1.
- If `axis` has no entries, all dimensions are reduced, and a
+ If `axis` is None, all dimensions are reduced, and a
tensor with a single element is returned.
For example:
@@ -1397,7 +1397,7 @@ def reduce_mean(input_tensor,
entry in `axis`. If `keepdims` is true, the reduced dimensions
are retained with length 1.
- If `axis` has no entries, all dimensions are reduced, and a
+ If `axis` is None, all dimensions are reduced, and a
tensor with a single element is returned.
For example:
@@ -1469,7 +1469,7 @@ def reduce_prod(input_tensor,
entry in `axis`. If `keepdims` is true, the reduced dimensions
are retained with length 1.
- If `axis` has no entries, all dimensions are reduced, and a
+ If `axis` is None, all dimensions are reduced, and a
tensor with a single element is returned.
Args:
@@ -1519,7 +1519,7 @@ def reduce_min(input_tensor,
entry in `axis`. If `keepdims` is true, the reduced dimensions
are retained with length 1.
- If `axis` has no entries, all dimensions are reduced, and a
+ If `axis` is None, all dimensions are reduced, and a
tensor with a single element is returned.
Args:
@@ -1568,7 +1568,7 @@ def reduce_max(input_tensor,
entry in `axis`. If `keepdims` is true, the reduced dimensions
are retained with length 1.
- If `axis` has no entries, all dimensions are reduced, and a
+ If `axis` is None, all dimensions are reduced, and a
tensor with a single element is returned.
Args:
@@ -1617,7 +1617,7 @@ def reduce_all(input_tensor,
entry in `axis`. If `keepdims` is true, the reduced dimensions
are retained with length 1.
- If `axis` has no entries, all dimensions are reduced, and a
+ If `axis` is None, all dimensions are reduced, and a
tensor with a single element is returned.
For example:
@@ -1675,7 +1675,7 @@ def reduce_any(input_tensor,
entry in `axis`. If `keepdims` is true, the reduced dimensions
are retained with length 1.
- If `axis` has no entries, all dimensions are reduced, and a
+ If `axis` is None, all dimensions are reduced, and a
tensor with a single element is returned.
For example:
diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py
index 783d485892..f47f38e29e 100644
--- a/tensorflow/python/ops/nn_impl.py
+++ b/tensorflow/python/ops/nn_impl.py
@@ -621,7 +621,7 @@ def normalize_moments(counts, mean_ss, variance_ss, shift, name=None):
"""Calculate the mean and variance of based on the sufficient statistics.
Args:
- counts: A `Tensor` containing a the total count of the data (one value).
+ counts: A `Tensor` containing the total count of the data (one value).
mean_ss: A `Tensor` containing the mean sufficient statistics: the (possibly
shifted) sum of the elements to average over.
variance_ss: A `Tensor` containing the variance sufficient statistics: the
@@ -689,6 +689,9 @@ def moments(
# Compute true mean while keeping the dims for proper broadcasting.
mean = math_ops.reduce_mean(y, axes, keepdims=True, name="mean")
# sample variance, not unbiased variance
+ # Note: stop_gradient does not change the gradient that gets
+ # backpropagated to the mean from the variance calculation,
+ # because that gradient is zero
variance = math_ops.reduce_mean(
math_ops.squared_difference(y, array_ops.stop_gradient(mean)),
axes,
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index a0b55eb077..0c2f5b06c4 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -1596,12 +1596,12 @@ def leaky_relu(features, alpha=0.2, name=None):
Returns:
The activation value.
"""
- with ops.name_scope(name, "LeakyRelu", [features, alpha]):
+ with ops.name_scope(name, "LeakyRelu", [features, alpha]) as name:
features = ops.convert_to_tensor(features, name="features")
if features.dtype.is_integer:
features = math_ops.to_float(features)
alpha = ops.convert_to_tensor(alpha, dtype=features.dtype, name="alpha")
- return math_ops.maximum(alpha * features, features)
+ return math_ops.maximum(alpha * features, features, name=name)
def _flatten_outer_dims(logits):
diff --git a/tensorflow/python/ops/nn_test.py b/tensorflow/python/ops/nn_test.py
index 46a5f4fae6..035b4735af 100644
--- a/tensorflow/python/ops/nn_test.py
+++ b/tensorflow/python/ops/nn_test.py
@@ -962,6 +962,16 @@ class LeakyReluTest(test_lib.TestCase):
self.assertAllClose(
outputs, [-0.4, -0.2, 0.0, 1.0, 2.0], rtol=tol, atol=tol)
+ def testName(self):
+ np_values = np.array([-2, -1, 0, 1, 2], dtype=np.float64)
+ outputs_with_name_set = nn_ops.leaky_relu(
+ constant_op.constant(np_values),
+ name='test_relu_op')
+ self.assertEqual(outputs_with_name_set.name, 'test_relu_op:0')
+ outputs_without_name_set = nn_ops.leaky_relu(
+ constant_op.constant(np_values))
+ self.assertEqual(outputs_without_name_set.name, 'LeakyRelu:0')
+
class SwishTest(test_lib.TestCase):
diff --git a/tensorflow/python/ops/script_ops.py b/tensorflow/python/ops/script_ops.py
index f8676ccb5f..219562de5d 100644
--- a/tensorflow/python/ops/script_ops.py
+++ b/tensorflow/python/ops/script_ops.py
@@ -23,6 +23,7 @@ import threading
# Used by py_util.cc to get tracebacks.
import traceback # pylint: disable=unused-import
+import weakref
import numpy as np
import six
@@ -129,11 +130,14 @@ class FuncRegistry(object):
def __init__(self):
self._lock = threading.Lock()
self._unique_id = 0 # GUARDED_BY(self._lock)
- self._funcs = {}
+ # Only store weakrefs to the funtions. The strong reference is stored in
+ # the graph.
+ self._funcs = weakref.WeakValueDictionary()
def insert(self, func):
"""Registers `func` and returns a unique token for this entry."""
token = self._next_unique_token()
+ # Store a weakref to the function
self._funcs[token] = func
return token
@@ -186,7 +190,7 @@ class FuncRegistry(object):
Raises:
ValueError: if no function is registered for `token`.
"""
- func = self._funcs[token]
+ func = self._funcs.get(token, None)
if func is None:
raise ValueError("callback %s is not found" % token)
if isinstance(func, EagerFunc):
@@ -228,19 +232,6 @@ _py_funcs = FuncRegistry()
pywrap_tensorflow.InitializePyTrampoline(_py_funcs)
-class CleanupFunc(object):
- """A helper class to remove a registered function from _py_funcs."""
-
- def __init__(self, token):
- self._token = token
-
- def __del__(self):
- if _py_funcs is not None:
- # If _py_funcs is None, the program is most likely in shutdown, and the
- # _py_funcs object has been destroyed already.
- _py_funcs.remove(self._token)
-
-
def _internal_py_func(func,
inp,
Tout,
@@ -270,17 +261,15 @@ def _internal_py_func(func,
# bound to that of the outer graph instead.
graph = graph._outer_graph
- cleanup = CleanupFunc(token)
-
# TODO(zhifengc): Consider adding a Graph method to collect
# `cleanup` objects in one of its member.
- if not hasattr(graph, "_cleanup_py_funcs_used_in_graph"):
- graph._cleanup_py_funcs_used_in_graph = []
+ if not hasattr(graph, "_py_funcs_used_in_graph"):
+ graph._py_funcs_used_in_graph = []
- # When `graph` is destroyed, elements in _cleanup_py_funcs_used_in_graph
- # will be destroyed and their __del__ will remove the 'token' from
- # the funcs registry.
- graph._cleanup_py_funcs_used_in_graph.append(cleanup)
+ # Store a reference to the function in the graph to ensure it stays alive
+ # as long as the graph lives. When the graph is destroyed, the function
+ # is left to the garbage collector for destruction as well.
+ graph._py_funcs_used_in_graph.append(func)
# pylint: enable=protected-access
if eager:
diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py
index 0130233746..c3b16a7bd5 100644
--- a/tensorflow/python/ops/sparse_ops.py
+++ b/tensorflow/python/ops/sparse_ops.py
@@ -84,6 +84,8 @@ def _convert_to_sparse_tensors(sp_inputs):
# pylint: disable=protected-access
@tf_export("sparse_concat")
+@deprecation.deprecated_args(
+ None, "concat_dim is deprecated, use axis instead", "concat_dim")
def sparse_concat(axis,
sp_inputs,
name=None,
@@ -597,6 +599,8 @@ class KeywordRequired(object):
@tf_export("sparse_split")
+@deprecation.deprecated_args(
+ None, "split_dim is deprecated, use axis instead", "split_dim")
def sparse_split(keyword_required=KeywordRequired(),
sp_input=None,
num_split=None,
diff --git a/tensorflow/python/ops/string_ops.py b/tensorflow/python/ops/string_ops.py
index ae79c01949..0280c89c10 100644
--- a/tensorflow/python/ops/string_ops.py
+++ b/tensorflow/python/ops/string_ops.py
@@ -91,6 +91,59 @@ def string_split(source, delimiter=" ", skip_empty=True): # pylint: disable=inv
shape.set_shape([2])
return sparse_tensor.SparseTensor(indices, values, shape)
+@tf_export("strings.split")
+def string_split_v2(source, sep=None, maxsplit=-1):
+ """Split elements of `source` based on `sep` into a `SparseTensor`.
+
+ Let N be the size of source (typically N will be the batch size). Split each
+ element of `source` based on `sep` and return a `SparseTensor`
+ containing the split tokens. Empty tokens are ignored.
+
+ For example, N = 2, source[0] is 'hello world' and source[1] is 'a b c',
+ then the output will be
+
+ st.indices = [0, 0;
+ 0, 1;
+ 1, 0;
+ 1, 1;
+ 1, 2]
+ st.shape = [2, 3]
+ st.values = ['hello', 'world', 'a', 'b', 'c']
+
+ If `sep` is given, consecutive delimiters are not grouped together and are
+ deemed to delimit empty strings. For example, source of `"1<>2<><>3"` and
+ sep of `"<>"` returns `["1", "2", "", "3"]`. If `sep` is None or an empty
+ string, consecutive whitespace are regarded as a single separator, and the
+ result will contain no empty strings at the startor end if the string has
+ leading or trailing whitespace.
+
+ Note that the above mentioned behavior matches python's str.split.
+
+ Args:
+ source: `1-D` string `Tensor`, the strings to split.
+ sep: `0-D` string `Tensor`, the delimiter character.
+ maxsplit: An `int`. If `maxsplit > 0`, limit of the split of the result.
+
+ Raises:
+ ValueError: If sep is not a string.
+
+ Returns:
+ A `SparseTensor` of rank `2`, the strings split according to the delimiter.
+ The first column of the indices corresponds to the row in `source` and the
+ second column corresponds to the index of the split component in this row.
+ """
+ if sep is None:
+ sep = ''
+ sep = ops.convert_to_tensor(sep, dtype=dtypes.string)
+ source = ops.convert_to_tensor(source, dtype=dtypes.string)
+
+ indices, values, shape = gen_string_ops.string_split_v2(
+ source, sep=sep, maxsplit=maxsplit)
+ indices.set_shape([None, 2])
+ values.set_shape([None])
+ shape.set_shape([2])
+ return sparse_tensor.SparseTensor(indices, values, shape)
+
def _reduce_join_reduction_dims(x, axis, reduction_indices):
"""Returns range(rank(x) - 1, 0, -1) if reduction_indices is None."""
diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py
index f49e2d314d..47414c28af 100644
--- a/tensorflow/python/ops/variable_scope.py
+++ b/tensorflow/python/ops/variable_scope.py
@@ -1786,6 +1786,23 @@ class variable_scope(object):
assert v.name == "foo/bar/v:0"
```
+ Simple example of how to reenter a premade variable scope safely:
+
+ ```python
+ with tf.variable_scope("foo") as vs:
+ pass
+
+ # Re-enter the variable scope.
+ with tf.variable_scope(vs,
+ auxiliary_name_scope=False) as vs1:
+ # Restore the original name_scope.
+ with tf.name_scope(vs1.original_name_scope):
+ v = tf.get_variable("v", [1])
+ assert v.name == "foo/v:0"
+ c = tf.constant([1], name="c")
+ assert c.name == "foo/c:0"
+ ```
+
Basic example of sharing a variable AUTO_REUSE:
```python
@@ -1924,7 +1941,9 @@ class variable_scope(object):
(which must have the same shape). Constraints are not safe to
use when doing asynchronous distributed training.
auxiliary_name_scope: If `True`, we create an auxiliary name scope with
- the scope. If `False`, we don't touch name scope.
+ the scope. If `False`, we don't create it. Note that the argument is
+ not inherited, and it only takes effect for once when creating. You
+ should only use it for re-entering a premade variable scope.
Returns:
A scope that can be captured and reused.
diff --git a/tensorflow/python/tools/import_pb_to_tensorboard.py b/tensorflow/python/tools/import_pb_to_tensorboard.py
index 00de044505..00de044505 100755..100644
--- a/tensorflow/python/tools/import_pb_to_tensorboard.py
+++ b/tensorflow/python/tools/import_pb_to_tensorboard.py
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 522965990b..b59f8e1f98 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -1719,7 +1719,7 @@ def tf_py_build_info_genrule():
name="py_build_info_gen",
outs=["platform/build_info.py"],
cmd=
- "$(location //tensorflow/tools/build_info:gen_build_info.py) --raw_generate \"$@\" --build_config " + if_cuda("cuda", "cpu"),
+ "$(location //tensorflow/tools/build_info:gen_build_info.py) --raw_generate \"$@\" --build_config " + if_cuda("cuda", "cpu"),
local=1,
tools=[clean_dep("//tensorflow/tools/build_info:gen_build_info.py")],)
diff --git a/tensorflow/tools/api/generator/create_python_api.py b/tensorflow/tools/api/generator/create_python_api.py
index bca9fa49eb..671b7e387e 100644
--- a/tensorflow/tools/api/generator/create_python_api.py
+++ b/tensorflow/tools/api/generator/create_python_api.py
@@ -41,7 +41,11 @@ _GENERATED_FILE_HEADER = """# This file is MACHINE GENERATED! Do not edit.
# Generated by: tensorflow/tools/api/generator/create_python_api.py script.
\"\"\"%s
\"\"\"
+
+from __future__ import print_function
+
"""
+_GENERATED_FILE_FOOTER = "\n\ndel print_function\n"
class SymbolExposedTwiceError(Exception):
@@ -149,6 +153,7 @@ class _ModuleInitCodeBuilder(object):
_names_with_underscore = [%s]
__all__ = [_s for _s in dir() if not _s.startswith('_')]
__all__.extend([_s for _s in _names_with_underscore])
+__all__.remove('print_function')
''' % underscore_names_str
return module_text_map
@@ -333,7 +338,8 @@ def create_api_files(
if module or not root_init_template:
contents = (
_GENERATED_FILE_HEADER %
- get_module_docstring(module, package, api_name) + text)
+ get_module_docstring(module, package, api_name) +
+ text + _GENERATED_FILE_FOOTER)
else:
# Read base init file
with open(root_init_template, 'r') as root_init_template_file:
diff --git a/tensorflow/tools/api/golden/tensorflow.image.pbtxt b/tensorflow/tools/api/golden/tensorflow.image.pbtxt
index 5bb3b3c444..10171b3d60 100644
--- a/tensorflow/tools/api/golden/tensorflow.image.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.image.pbtxt
@@ -58,7 +58,7 @@ tf_module {
}
member_method {
name: "decode_image"
- argspec: "args=[\'contents\', \'channels\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+ argspec: "args=[\'contents\', \'channels\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'uint8\'>\", \'None\'], "
}
member_method {
name: "decode_jpeg"
diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt
index dc2bd40096..3051c4437e 100644
--- a/tensorflow/tools/api/golden/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.pbtxt
@@ -1533,6 +1533,10 @@ tf_module {
argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
}
member_method {
+ name: "print"
+ argspec: "args=[\'input_\', \'data\', \'message\', \'first_n\', \'summarize\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+ }
+ member_method {
name: "py_func"
argspec: "args=[\'func\', \'inp\', \'Tout\', \'stateful\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], "
}
diff --git a/tensorflow/tools/api/golden/tensorflow.strings.pbtxt b/tensorflow/tools/api/golden/tensorflow.strings.pbtxt
index a3fbe95bba..b641c39feb 100644
--- a/tensorflow/tools/api/golden/tensorflow.strings.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.strings.pbtxt
@@ -4,4 +4,8 @@ tf_module {
name: "regex_full_match"
argspec: "args=[\'input\', \'pattern\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
}
+ member_method {
+ name: "split"
+ argspec: "args=[\'source\', \'sep\', \'maxsplit\'], varargs=None, keywords=None, defaults=[\'None\', \'-1\'], "
+ }
}
diff --git a/tensorflow/tools/ci_build/builds/pip.sh b/tensorflow/tools/ci_build/builds/pip.sh
index 5fa75e1d61..883bb93647 100755
--- a/tensorflow/tools/ci_build/builds/pip.sh
+++ b/tensorflow/tools/ci_build/builds/pip.sh
@@ -322,6 +322,10 @@ create_activate_virtualenv_and_install_tensorflow() {
pip install -v ${PIP_FLAGS} ${WHL_PATH} || \
die "pip install (forcing to reinstall tensorflow) FAILED"
echo "Successfully installed pip package ${TF_WHEEL_PATH}"
+
+ # Force downgrade setuptools.
+ pip install --upgrade setuptools==39.1.0
+
}
################################################################################
diff --git a/tensorflow/tools/ci_build/builds/with_the_same_user b/tensorflow/tools/ci_build/builds/with_the_same_user
index d4bf546d40..b216e3549f 100755
--- a/tensorflow/tools/ci_build/builds/with_the_same_user
+++ b/tensorflow/tools/ci_build/builds/with_the_same_user
@@ -40,7 +40,7 @@ if [ -n "${CI_BUILD_USER_FORCE_BADNAME}" ]; then
ADDUSER_OPTS="--force-badname"
fi
-getent group "${CI_BUILD_GID}" || addgroup --gid "${CI_BUILD_GID}" "${CI_BUILD_GROUP}"
+getent group "${CI_BUILD_GID}" || addgroup ${ADDUSER_OPTS} --gid "${CI_BUILD_GID}" "${CI_BUILD_GROUP}"
getent passwd "${CI_BUILD_UID}" || adduser ${ADDUSER_OPTS} \
--gid "${CI_BUILD_GID}" --uid "${CI_BUILD_UID}" \
--gecos "${CI_BUILD_USER} (generated by with_the_same_user script)" \
diff --git a/tensorflow/tools/ci_build/ci_build.sh b/tensorflow/tools/ci_build/ci_build.sh
index 072dd6ab99..1f0fd0387a 100755
--- a/tensorflow/tools/ci_build/ci_build.sh
+++ b/tensorflow/tools/ci_build/ci_build.sh
@@ -134,6 +134,12 @@ if [[ $? != "0" ]]; then
die "ERROR: docker build failed. Dockerfile is at ${DOCKERFILE_PATH}"
fi
+# If caller wants the with_the_same_user script to allow bad usernames,
+# pass the var to the docker environment
+if [ -n "${CI_BUILD_USER_FORCE_BADNAME}" ]; then
+ CI_BUILD_USER_FORCE_BADNAME_ENV="-e CI_BUILD_USER_FORCE_BADNAME=yes"
+fi
+
# Run the command inside the container.
echo "Running '${COMMAND[*]}' inside ${DOCKER_IMG_NAME}..."
mkdir -p ${WORKSPACE}/bazel-ci_build-cache
@@ -148,6 +154,7 @@ ${DOCKER_BINARY} run --rm --pid=host \
-e "CI_BUILD_GROUP=$(id -g -n)" \
-e "CI_BUILD_GID=$(id -g)" \
-e "CI_TENSORFLOW_SUBMODULE_PATH=${CI_TENSORFLOW_SUBMODULE_PATH}" \
+ ${CI_BUILD_USER_FORCE_BADNAME_ENV} \
-v ${WORKSPACE}:/workspace \
-w /workspace \
${GPU_EXTRA_PARAMS} \
diff --git a/tensorflow/tools/ci_build/copy_binary.py b/tensorflow/tools/ci_build/copy_binary.py
index 420d390d2b..148526492d 100755
--- a/tensorflow/tools/ci_build/copy_binary.py
+++ b/tensorflow/tools/ci_build/copy_binary.py
@@ -32,7 +32,8 @@ import shutil
import tempfile
import zipfile
-TF_NIGHTLY_REGEX = r"(.+)tf_nightly(|_gpu)-(\d\.\d\.\d.dev[\d]{0,8})-(.+)\.whl"
+TF_NIGHTLY_REGEX = (r"(.+)tf_nightly(|_gpu)-(\d\.[\d]{1,2}"
+ "\.\d.dev[\d]{0,8})-(.+)\.whl")
BINARY_STRING_TEMPLATE = "%s-%s-%s.whl"
diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh
index 60290df833..88f1d04193 100755
--- a/tensorflow/tools/ci_build/install/install_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh
@@ -115,3 +115,7 @@ pip2 install keras_applications==1.0.2
pip3 install keras_applications==1.0.2
pip2 install keras_preprocessing==1.0.1
pip3 install keras_preprocessing==1.0.1
+
+# Install last working version of setuptools.
+pip2 install --upgrade setuptools==39.1.0
+pip3 install --upgrade setuptools==39.1.0
diff --git a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
index edb9d4b929..acd69ef346 100755
--- a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
@@ -39,7 +39,6 @@ if [[ -z $pip35_version ]]; then
fi
set -e
-pip3.5 install --upgrade setuptools
pip3.5 install --upgrade pip
pip3.5 install --upgrade virtualenv
@@ -86,4 +85,7 @@ pip3.5 install --upgrade termcolor
pip3.5 install keras_applications==1.0.2
pip3.5 install keras_preprocessing==1.0.1
+# Install last working version of setuptools.
+pip3.5 install --upgrade setuptools==39.1.0
+
# LINT.ThenChange(//tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh)
diff --git a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
index 5635977731..323b30f48e 100755
--- a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
@@ -49,7 +49,6 @@ cd Python-3.6.1
make altinstall
ln -s /usr/local/bin/pip3.6 /usr/local/bin/pip3
-pip3 install --upgrade setuptools
pip3 install --upgrade pip
pip3 install --upgrade virtualenv
@@ -101,4 +100,8 @@ pip3 install --upgrade termcolor
# Keras
pip3.5 install keras_applications==1.0.2
pip3.5 install keras_preprocessing==1.0.1
+
+# Install last working version of setuptools.
+pip3 install --upgrade setuptools==39.1.0
+
# LINT.ThenChange(//tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh)
diff --git a/tensorflow/tools/ci_build/linux/mkl/basic-mkl-test.sh b/tensorflow/tools/ci_build/linux/mkl/basic-mkl-test.sh
new file mode 100755
index 0000000000..10a09a415a
--- /dev/null
+++ b/tensorflow/tools/ci_build/linux/mkl/basic-mkl-test.sh
@@ -0,0 +1,29 @@
+#!/usr/bin/env bash
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Usage: basic_mkl_test.sh
+
+# Helper function to traverse directories up until given file is found.
+function upsearch () {
+ test / == "$PWD" && return || \
+ test -e "$1" && echo "$PWD" && return || \
+ cd .. && upsearch "$1"
+}
+
+# Set up WORKSPACE.
+WORKSPACE="${WORKSPACE:-$(upsearch WORKSPACE)}"
+
+BUILD_TAG=mkl-ci-test CI_BUILD_USER_FORCE_BADNAME=yes ${WORKSPACE}/tensorflow/tools/ci_build/ci_build.sh cpu tensorflow/tools/ci_build/linux/cpu/run_mkl.sh
diff --git a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
index 1bd1852ffc..b8bce57c87 100755
--- a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
+++ b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
@@ -79,6 +79,7 @@ if [[ $1 == "PI_ONE" ]]; then
--linkopt=-L${OPENBLAS_INSTALL_PATH}/lib/
--linkopt=-l:libopenblas.a"
echo "Building for the Pi One/Zero, with no NEON support"
+ WHEEL_ARCH=linux_armv6l
else
PI_COPTS='--copt=-march=armv7-a --copt=-mfpu=neon-vfpv4
--copt=-std=gnu11 --copt=-DS_IREAD=S_IRUSR --copt=-DS_IWRITE=S_IWUSR
@@ -86,6 +87,7 @@ else
--copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1
--copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2
--copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8'
+ WHEEL_ARCH=linux_armv7l
echo "Building for the Pi Two/Three, with NEON acceleration"
fi
@@ -100,6 +102,8 @@ bazel build -c opt ${PI_COPTS} \
--copt=-fomit-frame-pointer --cpu=armeabi \
--crosstool_top=@local_config_arm_compiler//:toolchain \
--verbose_failures \
+ //tensorflow:libtensorflow.so \
+ //tensorflow:libtensorflow_framework.so \
//tensorflow/tools/benchmark:benchmark_model \
//tensorflow/tools/pip_package:build_pip_package
@@ -112,10 +116,12 @@ BDIST_OPTS="--universal" \
bazel-bin/tensorflow/tools/pip_package/build_pip_package "${OUTDIR}"
OLD_FN=$(ls "${OUTDIR}" | grep -m 1 \.whl)
-SUB='s/tensorflow-([^-]+)-([^-]+)-.*/tensorflow-\1-\2-none-any.whl/; print'
+SUB='s/tensorflow-([^-]+)-([^-]+)-.*/tensorflow-\1-\2-none-'${WHEEL_ARCH}'.whl/; print'
NEW_FN=$(echo "${OLD_FN}" | perl -ne "${SUB}")
mv "${OUTDIR}/${OLD_FN}" "${OUTDIR}/${NEW_FN}"
cp bazel-bin/tensorflow/tools/benchmark/benchmark_model "${OUTDIR}"
+cp bazel-bin/tensorflow/libtensorflow.so "${OUTDIR}"
+cp bazel-bin/tensorflow/libtensorflow_framework.so "${OUTDIR}"
echo "Output can be found here:"
find "${OUTDIR}"
diff --git a/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl b/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl
index 47539b2423..f8f63e276c 100644
--- a/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl
+++ b/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl
@@ -31,7 +31,11 @@ def _def_file_filter_configure_impl(repository_ctx):
vc_path = find_vc_path(repository_ctx)
if vc_path == "visual-studio-not-found":
auto_configure_fail("Visual C++ build tools not found on your machine")
- undname_bin_path = find_msvc_tool(repository_ctx, vc_path, "undname.exe").replace("\\", "\\\\")
+
+ undname = find_msvc_tool(repository_ctx, vc_path, "undname.exe")
+ if undname == None:
+ auto_configure_fail("Couldn't find undname.exe under %s, please check your VC installation and set BAZEL_VC environment variable correctly." % vc_path)
+ undname_bin_path = undname.replace("\\", "\\\\")
repository_ctx.template(
"def_file_filter.py",
diff --git a/tensorflow/tools/dist_test/local_test.sh b/tensorflow/tools/dist_test/local_test.sh
index 06c2b997cb..b0114721bd 100755
--- a/tensorflow/tools/dist_test/local_test.sh
+++ b/tensorflow/tools/dist_test/local_test.sh
@@ -64,9 +64,6 @@ die() {
# Configurations
DOCKER_IMG_NAME="tensorflow/tf-dist-test-local-cluster"
-# Use TensorFlow v1.5.0 for Python 2.7 and CPU only as we set num_gpus to 0 in the below
-DEFAULT_WHL_FILE_LOCATION="https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0-cp27-none-linux_x86_64.whl"
-
# Parse input arguments
LEAVE_CONTAINER_RUNNING=0
MODEL_NAME=""
@@ -77,8 +74,7 @@ SYNC_REPLICAS_FLAG=""
WHL_FILE_LOCATION=${1}
if [[ -z "${WHL_FILE_LOCATION}" ]]; then
- WHL_FILE_LOCATION=${DEFAULT_WHL_FILE_LOCATION}
- echo "use default whl file location"
+ echo "WARNING: No wheel url passed. Will use latest tf-nightly cpu p2 wheel."
fi
while true; do
@@ -131,7 +127,11 @@ echo "Building in temporary directory: ${BUILD_DIR}"
cp -r ${DIR}/* "${BUILD_DIR}"/ || \
die "Failed to copy files to ${BUILD_DIR}"
-if [[ $WHL_FILE_LOCATION =~ 'http://' || $WHL_FILE_LOCATION =~ 'https://' ]]; then
+# Download whl file into the build context directory.
+if [[ -z "${WHL_FILE_LOCATION}" ]]; then
+ pip2 download --no-deps tf-nightly
+ cp tf-nightly-*.whl "${BUILD_DIR}"/tensorflow-none-any.whl
+elif [[ $WHL_FILE_LOCATION =~ 'http://' || $WHL_FILE_LOCATION =~ 'https://' ]]; then
# Download whl file into the build context directory.
wget -P "${BUILD_DIR}" "${WHL_FILE_LOCATION}" || \
die "Failed to download tensorflow whl file from URL: ${WHL_FILE_LOCATION}"
diff --git a/tensorflow/tools/dist_test/remote_test.sh b/tensorflow/tools/dist_test/remote_test.sh
index 935535312d..e188c88c8f 100755
--- a/tensorflow/tools/dist_test/remote_test.sh
+++ b/tensorflow/tools/dist_test/remote_test.sh
@@ -108,7 +108,7 @@ fi
# Parse command-line arguments.
WHL_URL=${1}
if [[ -z "${WHL_URL}" ]]; then
- die "whl URL is not specified"
+ echo "WARNING: No wheel url passed. Will use latest tf-nightly cpu p2 wheel."
fi
# Create docker build context directory.
@@ -121,8 +121,13 @@ cp -r ${DIR}/* ${BUILD_DIR}/ || \
die "Failed to copy files to ${BUILD_DIR}"
# Download whl file into the build context directory.
-wget -P "${BUILD_DIR}" ${WHL_URL} || \
- die "Failed to download tensorflow whl file from URL: ${WHL_URL}"
+if [[ -z "${WHL_URL}" ]]; then
+ pip2 download --no-deps tf-nightly
+ cp tf-nightly-*.whl "${BUILD_DIR}"/tensorflow-none-any.whl
+else
+ wget -P "${BUILD_DIR}" ${WHL_URL} || \
+ die "Failed to download tensorflow whl file from URL: ${WHL_URL}"
+fi
# Build docker image for test.
docker build ${NO_CACHE_FLAG} \
diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel
index 406d134699..57a491255e 100644
--- a/tensorflow/tools/docker/Dockerfile.devel
+++ b/tensorflow/tools/docker/Dockerfile.devel
@@ -76,7 +76,7 @@ RUN mkdir /bazel && \
# Download and build TensorFlow.
WORKDIR /tensorflow
-RUN git clone --branch=r1.8 --depth=1 https://github.com/tensorflow/tensorflow.git .
+RUN git clone --branch=r1.9 --depth=1 https://github.com/tensorflow/tensorflow.git .
# TODO(craigcitro): Don't install the pip package, since it makes it
# more difficult to experiment with local changes. Instead, just add
diff --git a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl
index a6cd44ced1..6796ad70e5 100644
--- a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl
+++ b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl
@@ -3,7 +3,7 @@ FROM tensorflow/tensorflow:latest-devel
LABEL maintainer="Clayne Robison<clayne.b.robison@intel.com>"
# These arguments are parameterized. Use --build-args to override.
-ARG TF_BRANCH=r1.8
+ARG TF_BRANCH=r1.9
ARG WHL_DIR=/whl
RUN apt-get update && apt-get install -y --no-install-recommends \
diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu
index 2fe47f3356..204b5b4dba 100644
--- a/tensorflow/tools/docker/Dockerfile.devel-gpu
+++ b/tensorflow/tools/docker/Dockerfile.devel-gpu
@@ -13,8 +13,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
cuda-cusparse-dev-9-0 \
curl \
git \
- libcudnn7=7.0.5.15-1+cuda9.0 \
- libcudnn7-dev=7.0.5.15-1+cuda9.0 \
+ libcudnn7=7.1.4.18-1+cuda9.0 \
+ libcudnn7-dev=7.1.4.18-1+cuda9.0 \
libcurl3-dev \
libfreetype6-dev \
libhdf5-serial-dev \
@@ -85,7 +85,7 @@ RUN mkdir /bazel && \
# Download and build TensorFlow.
WORKDIR /tensorflow
-RUN git clone --branch=r1.8 --depth=1 https://github.com/tensorflow/tensorflow.git .
+RUN git clone --branch=r1.9 --depth=1 https://github.com/tensorflow/tensorflow.git .
# Configure the build for our CUDA configuration.
ENV CI_BUILD_PYTHON python
diff --git a/tensorflow/tools/docker/Dockerfile.gpu b/tensorflow/tools/docker/Dockerfile.gpu
index bff4a20392..9197651ff4 100644
--- a/tensorflow/tools/docker/Dockerfile.gpu
+++ b/tensorflow/tools/docker/Dockerfile.gpu
@@ -12,7 +12,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
cuda-cusolver-9-0 \
cuda-cusparse-9-0 \
curl \
- libcudnn7=7.0.5.15-1+cuda9.0 \
+ libcudnn7=7.1.4.18-1+cuda9.0 \
libfreetype6-dev \
libhdf5-serial-dev \
libpng12-dev \
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index 5910f0625e..620fef9363 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -61,6 +61,7 @@ COMMON_PIP_DEPS = [
"//tensorflow/contrib/autograph/core:core",
"//tensorflow/contrib/autograph/impl:impl",
"//tensorflow/contrib/autograph/lang:lang",
+ "//tensorflow/contrib/autograph/operators:operators",
"//tensorflow/contrib/autograph/pyct:pyct",
"//tensorflow/contrib/autograph/pyct/static_analysis:static_analysis",
"//tensorflow/contrib/boosted_trees:boosted_trees_pip",
diff --git a/tensorflow/tools/pip_package/build_pip_package.sh b/tensorflow/tools/pip_package/build_pip_package.sh
index 0c4065bc77..f7e42ce536 100755
--- a/tensorflow/tools/pip_package/build_pip_package.sh
+++ b/tensorflow/tools/pip_package/build_pip_package.sh
@@ -41,51 +41,15 @@ function is_windows() {
fi
}
-function main() {
+function prepare_src() {
if [ $# -lt 1 ] ; then
echo "No destination dir provided"
exit 1
fi
- DEST=$(real_path $1)
- TMPDIR=$(mktemp -d -t tmp.XXXXXXXXXX)
-
- PKG_NAME_FLAG=""
- GPU_BUILD=0
- NIGHTLY_BUILD=0
- PROJECT_NAME=""
- while true; do
- if [[ "$1" == "--nightly_flag" ]]; then
- NIGHTLY_BUILD=1
- elif [[ "$1" == "--gpu" ]]; then
- GPU_BUILD=1
- elif [[ "$1" == "--gpudirect" ]]; then
- PKG_NAME_FLAG="--project_name tensorflow_gpudirect"
- elif [[ "$1" == "--project_name" ]]; then
- shift
- if [[ -z "$1" ]]; then
- break
- fi
- PROJECT_NAME="$1"
- fi
- shift
-
- if [[ -z "$1" ]]; then
- break
- fi
- done
-
- if [[ -n ${PROJECT_NAME} ]]; then
- PKG_NAME_FLAG="--project_name ${PROJECT_NAME}"
- elif [[ ${NIGHTLY_BUILD} == "1" && ${GPU_BUILD} == "1" ]]; then
- PKG_NAME_FLAG="--project_name tf_nightly_gpu"
- elif [[ ${NIGHTLY_BUILD} == "1" ]]; then
- PKG_NAME_FLAG="--project_name tf_nightly"
- elif [[ ${GPU_BUILD} == "1" ]]; then
- PKG_NAME_FLAG="--project_name tensorflow_gpu"
- fi
-
- echo $(date) : "=== Using tmpdir: ${TMPDIR}"
+ TMPDIR="$1"
+ mkdir -p "$TMPDIR"
+ echo $(date) : "=== Preparing sources in dir: ${TMPDIR}"
if [ ! -d bazel-bin/tensorflow ]; then
echo "Could not find bazel-bin. Did you run from the root of the build tree?"
@@ -155,17 +119,28 @@ function main() {
# over so user defined ops can be compiled.
mkdir -p ${TMPDIR}/google
mkdir -p ${TMPDIR}/third_party
- pushd ${RUNFILES%org_tensorflow}
+ pushd ${RUNFILES%org_tensorflow} > /dev/null
for header in $(find protobuf_archive -name \*.h); do
mkdir -p "${TMPDIR}/google/$(dirname ${header})"
cp "$header" "${TMPDIR}/google/$(dirname ${header})/"
done
- popd
+ popd > /dev/null
cp -R $RUNFILES/third_party/eigen3 ${TMPDIR}/third_party
cp tensorflow/tools/pip_package/MANIFEST.in ${TMPDIR}
cp tensorflow/tools/pip_package/README ${TMPDIR}
cp tensorflow/tools/pip_package/setup.py ${TMPDIR}
+}
+
+function build_wheel() {
+ if [ $# -lt 2 ] ; then
+ echo "No src and dest dir provided"
+ exit 1
+ fi
+
+ TMPDIR="$1"
+ DEST="$2"
+ PKG_NAME_FLAG="$3"
# Before we leave the top-level directory, make sure we know how to
# call python.
@@ -173,15 +148,110 @@ function main() {
source tools/python_bin_path.sh
fi
- pushd ${TMPDIR}
+ pushd ${TMPDIR} > /dev/null
rm -f MANIFEST
echo $(date) : "=== Building wheel"
"${PYTHON_BIN_PATH:-python}" setup.py bdist_wheel ${PKG_NAME_FLAG} >/dev/null
mkdir -p ${DEST}
cp dist/* ${DEST}
- popd
- rm -rf ${TMPDIR}
+ popd > /dev/null
echo $(date) : "=== Output wheel file is in: ${DEST}"
}
+function usage() {
+ echo "Usage:"
+ echo "$0 [--src srcdir] [--dst dstdir] [options]"
+ echo "$0 dstdir [options]"
+ echo ""
+ echo " --src prepare sources in srcdir"
+ echo " will use temporary dir if not specified"
+ echo ""
+ echo " --dst build wheel in dstdir"
+ echo " if dstdir is not set do not build, only prepare sources"
+ echo ""
+ echo " Options:"
+ echo " --project_name <name> set project name to name"
+ echo " --gpu build tensorflow_gpu"
+ echo " --gpudirect build tensorflow_gpudirect"
+ echo " --nightly_flag build tensorflow nightly"
+ echo ""
+ exit 1
+}
+
+function main() {
+ PKG_NAME_FLAG=""
+ PROJECT_NAME=""
+ GPU_BUILD=0
+ NIGHTLY_BUILD=0
+ SRCDIR=""
+ DSTDIR=""
+ CLEANSRC=1
+ while true; do
+ if [[ "$1" == "--help" ]]; then
+ usage
+ exit 1
+ elif [[ "$1" == "--nightly_flag" ]]; then
+ NIGHTLY_BUILD=1
+ elif [[ "$1" == "--gpu" ]]; then
+ GPU_BUILD=1
+ elif [[ "$1" == "--gpudirect" ]]; then
+ PKG_NAME_FLAG="--project_name tensorflow_gpudirect"
+ elif [[ "$1" == "--project_name" ]]; then
+ shift
+ if [[ -z "$1" ]]; then
+ break
+ fi
+ PROJECT_NAME="$1"
+ elif [[ "$1" == "--src" ]]; then
+ shift
+ SRCDIR="$(real_path $1)"
+ CLEANSRC=0
+ elif [[ "$1" == "--dst" ]]; then
+ shift
+ DSTDIR="$(real_path $1)"
+ else
+ DSTDIR="$(real_path $1)"
+ fi
+ shift
+
+ if [[ -z "$1" ]]; then
+ break
+ fi
+ done
+
+ if [[ -z "$DSTDIR" ]] && [[ -z "$SRCDIR" ]]; then
+ echo "No destination dir provided"
+ usage
+ exit 1
+ fi
+
+ if [[ -z "$SRCDIR" ]]; then
+ # make temp srcdir if none set
+ SRCDIR="$(mktemp -d -t tmp.XXXXXXXXXX)"
+ fi
+
+ prepare_src "$SRCDIR"
+
+ if [[ -z "$DSTDIR" ]]; then
+ # only want to prepare sources
+ exit
+ fi
+
+ if [[ -n ${PROJECT_NAME} ]]; then
+ PKG_NAME_FLAG="--project_name ${PROJECT_NAME}"
+ elif [[ ${NIGHTLY_BUILD} == "1" && ${GPU_BUILD} == "1" ]]; then
+ PKG_NAME_FLAG="--project_name tf_nightly_gpu"
+ elif [[ ${NIGHTLY_BUILD} == "1" ]]; then
+ PKG_NAME_FLAG="--project_name tf_nightly"
+ elif [[ ${GPU_BUILD} == "1" ]]; then
+ PKG_NAME_FLAG="--project_name tensorflow_gpu"
+ fi
+
+ build_wheel "$SRCDIR" "$DSTDIR" "$PKG_NAME_FLAG"
+
+ if [[ $CLEANSRC -ne 0 ]]; then
+ rm -rf "${TMPDIR}"
+ fi
+}
+
main "$@"
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index d25a9e77b1..97f625e7e9 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -45,7 +45,7 @@ DOCLINES = __doc__.split('\n')
# This version string is semver compatible, but incompatible with pip.
# For pip, we will remove all '-' characters from this string, and use the
# result for pip.
-_VERSION = '1.8.0'
+_VERSION = '1.9.0-rc0'
REQUIRED_PACKAGES = [
'absl-py >= 0.1.6',
@@ -54,6 +54,7 @@ REQUIRED_PACKAGES = [
'numpy >= 1.13.3',
'six >= 1.10.0',
'protobuf >= 3.4.0',
+ 'setuptools <= 39.1.0',
'tensorboard >= 1.8.0, < 1.9.0',
'termcolor >= 1.1.0',
]
diff --git a/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc b/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc
index 29add6d5ea..15d7c70281 100644
--- a/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc
+++ b/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc
@@ -814,6 +814,9 @@ void Generator::Generate(const FileDescriptor& fd) {
// Add header to cc file.
SetOutput(&cc_);
Print("// GENERATED FILE - DO NOT MODIFY");
+ Print();
+ Print("#include <algorithm>"); // for `std::stable_sort()`
+ Print();
headers = {GetProtoTextHeaderName(fd, true /* impl */)};
AddHeadersToCurrentSection(headers);
Print();
diff --git a/tensorflow/tools/quantization/quantize_graph_test.py b/tensorflow/tools/quantization/quantize_graph_test.py
index df71840b64..92bb5127da 100644
--- a/tensorflow/tools/quantization/quantize_graph_test.py
+++ b/tensorflow/tools/quantization/quantize_graph_test.py
@@ -119,8 +119,8 @@ def are_tensors_near(a, b, tolerance):
flat_a = a.flatten()
flat_b = b.flatten()
if len(flat_a) != len(flat_b):
- print("Tensors are different sizes: " + str(len(flat_a)) + " vs " + str(
- len(flat_b)))
+ tf_logging.info("Tensors are different sizes: " + str(len(flat_a)) + " vs "
+ + str(len(flat_b)))
return False
value_count = len(flat_a)
how_many_different = 0
@@ -140,10 +140,10 @@ def are_tensors_near(a, b, tolerance):
if how_many_different == 0:
return True
else:
- print("Tensors have {0} different values ({1}%), with mean difference"
- " {2} and mean absolute difference {3}".format(
- how_many_different, proportion_different * 100, mean_difference,
- mean_abs_difference))
+ tf_logging.info("Tensors have {0} different values ({1}%), with mean"
+ " difference {2} and mean absolute difference {3}".format(
+ how_many_different, proportion_different * 100,
+ mean_difference, mean_abs_difference))
return False
diff --git a/tensorflow/tools/test/upload_test_benchmarks.py b/tensorflow/tools/test/upload_test_benchmarks.py
index 9c45359ee1..c030575109 100644
--- a/tensorflow/tools/test/upload_test_benchmarks.py
+++ b/tensorflow/tools/test/upload_test_benchmarks.py
@@ -89,7 +89,6 @@ import shutil
from six import text_type
from google.cloud import datastore
-from six import text_type
def is_real_file(dirpath, fname):
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index dbec66216a..4f3df570a5 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -50,31 +50,31 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
mkl_repository(
name = "mkl_linux",
urls = [
- "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.13/mklml_lnx_2018.0.2.20180127.tgz",
- "https://github.com/intel/mkl-dnn/releases/download/v0.13/mklml_lnx_2018.0.2.20180127.tgz",
+ "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.14/mklml_lnx_2018.0.3.20180406.tgz",
+ "https://github.com/intel/mkl-dnn/releases/download/v0.14/mklml_lnx_2018.0.3.20180406.tgz"
],
- sha256 = "74844bd77294742bf2396ff040369d1aa4cdd9e826fcd38cf8398ae83564d146",
- strip_prefix = "mklml_lnx_2018.0.2.20180127",
+ sha256 = "d2305244fdc9b87db7426ed4496e87a4b3977ad3374d73b8000e8b7a5b7aa725",
+ strip_prefix = "mklml_lnx_2018.0.3.20180406",
build_file = clean_dep("//third_party/mkl:mkl.BUILD")
)
mkl_repository(
name = "mkl_windows",
urls = [
- "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.13/mklml_win_2018.0.2.20180127.zip",
- "https://github.com/intel/mkl-dnn/releases/download/v0.13/mklml_win_2018.0.2.20180127.zip"
+ "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.14/mklml_win_2018.0.3.20180406.zip",
+ "https://github.com/intel/mkl-dnn/releases/download/v0.14/mklml_win_2018.0.3.20180406.zip"
],
- sha256 = "d8fbf0faa0684bffa3548005d05fe5cfe56ff9dbc0e15e7612d7ac01055a6ded",
- strip_prefix = "mklml_win_2018.0.2.20180127",
+ sha256 = "a584a5bf1c8d2ad70b90d12b52652030e9a338217719064fdb84b7ad0d693694",
+ strip_prefix = "mklml_win_2018.0.3.20180406",
build_file = clean_dep("//third_party/mkl:mkl.BUILD")
)
mkl_repository(
name = "mkl_darwin",
urls = [
- "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.13/mklml_mac_2018.0.2.20180127.tgz",
- "https://github.com/intel/mkl-dnn/releases/download/v0.13/mklml_mac_2018.0.2.20180127.tgz"
+ "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.14/mklml_mac_2018.0.3.20180406.tgz",
+ "https://github.com/intel/mkl-dnn/releases/download/v0.14/mklml_mac_2018.0.3.20180406.tgz"
],
- sha256 = "aa740d71e14562bfea56e6829e6dc186e7487cbcf6748a88dec73826b7ec1943",
- strip_prefix = "mklml_mac_2018.0.2.20180127",
+ sha256 = "094e3dfd61c816136dc8d12a45cc611ce26c5f4828176a3644cd0b0efa15a25b",
+ strip_prefix = "mklml_mac_2018.0.3.20180406",
build_file = clean_dep("//third_party/mkl:mkl.BUILD")
)
@@ -85,11 +85,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
tf_http_archive(
name = "mkl_dnn",
urls = [
- "https://mirror.bazel.build/github.com/intel/mkl-dnn/archive/v0.13.tar.gz",
- "https://github.com/intel/mkl-dnn/archive/v0.13.tar.gz",
+ "https://mirror.bazel.build/github.com/intel/mkl-dnn/archive/v0.14.tar.gz",
+ "https://github.com/intel/mkl-dnn/archive/v0.14.tar.gz",
],
- sha256 = "d2cfd93a70cfe86ebe054477c530c9b5c1218b70f75856eb6d1956c68ee89e8f",
- strip_prefix = "mkl-dnn-0.13",
+ sha256 = "efebc53882856afec86457a2da644693f5d59c68772d41d640d6b60a8efc4eb0",
+ strip_prefix = "mkl-dnn-0.14",
build_file = clean_dep("//third_party/mkl_dnn:mkldnn.BUILD"),
)
@@ -187,11 +187,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
tf_http_archive(
name = "highwayhash",
urls = [
- "https://mirror.bazel.build/github.com/google/highwayhash/archive/dfcb97ca4fe9277bf9dc1802dd979b071896453b.tar.gz",
- "https://github.com/google/highwayhash/archive/dfcb97ca4fe9277bf9dc1802dd979b071896453b.tar.gz",
+ "http://mirror.bazel.build/github.com/google/highwayhash/archive/fd3d9af80465e4383162e4a7c5e2f406e82dd968.tar.gz",
+ "https://github.com/google/highwayhash/archive/fd3d9af80465e4383162e4a7c5e2f406e82dd968.tar.gz",
],
- sha256 = "0f30a15b1566d93f146c8d149878a06e91d9bb7ec2cfd76906df62a82be4aac9",
- strip_prefix = "highwayhash-dfcb97ca4fe9277bf9dc1802dd979b071896453b",
+ sha256 = "9c3e0e87d581feeb0c18d814d98f170ff23e62967a2bd6855847f0b2fe598a37",
+ strip_prefix = "highwayhash-fd3d9af80465e4383162e4a7c5e2f406e82dd968",
build_file = clean_dep("//third_party:highwayhash.BUILD"),
)