aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--WORKSPACE4
-rwxr-xr-xconfigure65
-rw-r--r--tensorflow/c/BUILD6
-rw-r--r--tensorflow/cc/BUILD2
-rw-r--r--tensorflow/cc/client/client_session.cc12
-rw-r--r--tensorflow/cc/client/client_session.h79
-rw-r--r--tensorflow/cc/framework/cc_op_gen.cc32
-rw-r--r--tensorflow/cc/framework/cc_op_gen.h2
-rw-r--r--tensorflow/cc/framework/grad_op_registry.h28
-rw-r--r--tensorflow/cc/framework/gradient_checker.cc55
-rw-r--r--tensorflow/cc/framework/gradient_checker.h16
-rw-r--r--tensorflow/cc/framework/gradients.cc28
-rw-r--r--tensorflow/cc/framework/gradients.h36
-rw-r--r--tensorflow/cc/framework/gradients_test.cc18
-rw-r--r--tensorflow/cc/framework/ops.h68
-rw-r--r--tensorflow/cc/framework/scope.cc27
-rw-r--r--tensorflow/cc/framework/scope.h232
-rw-r--r--tensorflow/cc/framework/scope_test.cc6
-rw-r--r--tensorflow/cc/framework/testutil.cc2
-rw-r--r--tensorflow/cc/framework/testutil.h8
-rw-r--r--tensorflow/cc/gradients/grad_testutil.h12
-rw-r--r--tensorflow/cc/saved_model/constants.h14
-rw-r--r--tensorflow/cc/saved_model/loader.h26
-rw-r--r--tensorflow/cc/saved_model/signature_constants.h32
-rw-r--r--tensorflow/cc/saved_model/tag_constants.h4
-rw-r--r--tensorflow/cc/training/coordinator.h82
-rw-r--r--tensorflow/cc/training/queue_runner.h32
-rw-r--r--tensorflow/compiler/jit/graph_to_functiondef_test.cc3
-rw-r--r--tensorflow/compiler/jit/xla_local_launch_op.cc10
-rw-r--r--tensorflow/compiler/tests/BUILD14
-rw-r--r--tensorflow/compiler/tests/build_defs.bzl13
-rw-r--r--tensorflow/compiler/tests/random_ops_test.py67
-rw-r--r--tensorflow/compiler/tf2xla/BUILD2
-rw-r--r--tensorflow/compiler/tf2xla/op_registrations.cc14
-rw-r--r--tensorflow/compiler/tf2xla/xla_compiler.cc2
-rw-r--r--tensorflow/compiler/xla/legacy_flags/parse_flags_from_env_test.cc1
-rw-r--r--tensorflow/compiler/xla/reference_util.cc32
-rw-r--r--tensorflow/compiler/xla/reference_util.h9
-rw-r--r--tensorflow/compiler/xla/service/elemental_ir_emitter.cc2
-rw-r--r--tensorflow/compiler/xla/service/executable.h3
-rw-r--r--tensorflow/compiler/xla/service/gpu/BUILD33
-rw-r--r--tensorflow/compiler/xla/service/gpu/fusion_merger.cc270
-rw-r--r--tensorflow/compiler/xla/service/gpu/fusion_merger.h47
-rw-r--r--tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc456
-rw-r--r--tensorflow/compiler/xla/service/gpu/gpu_compiler.cc2
-rw-r--r--tensorflow/compiler/xla/service/hlo_instruction.cc31
-rw-r--r--tensorflow/compiler/xla/service/hlo_instruction.h17
-rw-r--r--tensorflow/compiler/xla/service/local_service.cc7
-rw-r--r--tensorflow/compiler/xla/shape_util.cc62
-rw-r--r--tensorflow/compiler/xla/shape_util.h6
-rw-r--r--tensorflow/compiler/xla/shape_util_test.cc20
-rw-r--r--tensorflow/contrib/distributions/python/kernel_tests/categorical_test.py28
-rw-r--r--tensorflow/contrib/distributions/python/kernel_tests/distribution_util_test.py7
-rw-r--r--tensorflow/contrib/distributions/python/ops/bijector.py2
-rw-r--r--tensorflow/contrib/distributions/python/ops/categorical.py18
-rw-r--r--tensorflow/contrib/graph_editor/transform.py37
-rw-r--r--tensorflow/contrib/graph_editor/util.py73
-rw-r--r--tensorflow/contrib/hvx/hexagon_controller/Makefile19
-rw-r--r--tensorflow/contrib/hvx/hexagon_controller/src_dummy_data/inception_v1_graph_init.c16
-rw-r--r--tensorflow/contrib/hvx/hexagon_controller/src_dummy_data/inception_v3_dummy_float_data.c16
-rw-r--r--tensorflow/contrib/hvx/hexagon_controller/src_dummy_data/inception_v3_dummy_int_data.c17
-rw-r--r--tensorflow/contrib/hvx/hexagon_controller/src_dummy_data/inception_v3_graph_init.c16
-rw-r--r--tensorflow/contrib/hvx/hexagon_controller/src_impl/graph_functions_wrapper.c355
-rw-r--r--tensorflow/contrib/hvx/hexagon_controller/src_impl/hexagon_controller.c374
-rw-r--r--tensorflow/contrib/hvx/hexagon_controller/src_impl/include/hexagon_controller.h124
-rw-r--r--tensorflow/contrib/hvx/hexagon_controller/src_log/include/tfm_log.h74
-rw-r--r--tensorflow/contrib/hvx/hexagon_controller/src_soc_interface/include/node_data_float.h41
-rwxr-xr-xtensorflow/contrib/hvx/hexagon_controller/src_soc_interface/soc_interface.c204
-rw-r--r--tensorflow/contrib/hvx/hexagon_controller/target/make/android.min70
-rw-r--r--tensorflow/contrib/layers/python/layers/layers.py9
-rw-r--r--tensorflow/contrib/layers/python/layers/layers_test.py2
-rw-r--r--tensorflow/contrib/layers/python/layers/optimizers.py5
-rw-r--r--tensorflow/contrib/layers/python/layers/optimizers_test.py8
-rw-r--r--tensorflow/contrib/learn/python/learn/__init__.py1
-rw-r--r--tensorflow/contrib/learn/python/learn/estimators/estimator.py41
-rw-r--r--tensorflow/contrib/learn/python/learn/estimators/estimator_test.py22
-rw-r--r--tensorflow/contrib/learn/python/learn/estimators/svm.py139
-rw-r--r--tensorflow/contrib/learn/python/learn/experiment.py23
-rw-r--r--tensorflow/contrib/learn/python/learn/experiment_test.py64
-rw-r--r--tensorflow/contrib/learn/python/learn/monitors.py9
-rw-r--r--tensorflow/contrib/learn/python/learn/utils/__init__.py4
-rw-r--r--tensorflow/contrib/learn/python/learn/utils/export_test.py20
-rw-r--r--tensorflow/contrib/learn/python/learn/utils/input_fn_utils.py6
-rw-r--r--tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py59
-rw-r--r--tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils_test.py20
-rw-r--r--tensorflow/contrib/linalg/python/kernel_tests/linear_operator_composition_test.py8
-rw-r--r--tensorflow/contrib/linalg/python/kernel_tests/linear_operator_test.py18
-rw-r--r--tensorflow/contrib/linalg/python/kernel_tests/linear_operator_util_test.py2
-rw-r--r--tensorflow/contrib/linalg/python/ops/linear_operator.py76
-rw-r--r--tensorflow/contrib/linalg/python/ops/linear_operator_composition.py10
-rw-r--r--tensorflow/contrib/linalg/python/ops/linear_operator_diag.py2
-rw-r--r--tensorflow/contrib/linalg/python/ops/linear_operator_identity.py10
-rw-r--r--tensorflow/contrib/linalg/python/ops/linear_operator_matrix.py2
-rw-r--r--tensorflow/contrib/linalg/python/ops/linear_operator_test_util.py35
-rw-r--r--tensorflow/contrib/linalg/python/ops/linear_operator_tril.py2
-rw-r--r--tensorflow/contrib/linalg/python/ops/linear_operator_util.py4
-rw-r--r--tensorflow/contrib/makefile/sub_makefiles/hexagon_graph_execution/Makefile.in1
-rw-r--r--tensorflow/contrib/makefile/tf_op_files.txt1
-rw-r--r--tensorflow/contrib/metrics/python/ops/metric_ops.py40
-rw-r--r--tensorflow/contrib/metrics/python/ops/metric_ops_test.py232
-rw-r--r--tensorflow/contrib/seq2seq/BUILD40
-rw-r--r--tensorflow/contrib/seq2seq/python/kernel_tests/decoder_test.py156
-rw-r--r--tensorflow/contrib/seq2seq/python/kernel_tests/sampling_decoder_test.py109
-rw-r--r--tensorflow/contrib/seq2seq/python/ops/decoder.py237
-rw-r--r--tensorflow/contrib/seq2seq/python/ops/sampling_decoder.py190
-rw-r--r--tensorflow/contrib/tensor_forest/client/random_forest.py12
-rw-r--r--tensorflow/contrib/training/python/training/batch_sequences_with_states_test.py287
-rw-r--r--tensorflow/contrib/training/python/training/sequence_queueing_state_saver.py298
-rw-r--r--tensorflow/contrib/util/convert_graphdef_memmapped_format_test.cc4
-rw-r--r--tensorflow/core/BUILD1
-rw-r--r--tensorflow/core/common_runtime/direct_session.cc8
-rw-r--r--tensorflow/core/common_runtime/gpu/gpu_stream_util_test.cc2
-rw-r--r--tensorflow/core/common_runtime/graph_optimizer.cc128
-rw-r--r--tensorflow/core/common_runtime/parallel_concat_optimizer.cc126
-rw-r--r--tensorflow/core/common_runtime/shape_refiner_test.cc70
-rw-r--r--tensorflow/core/distributed_runtime/BUILD2
-rw-r--r--tensorflow/core/graph/graph_constructor.cc68
-rw-r--r--tensorflow/core/graph/graph_constructor.h23
-rw-r--r--tensorflow/core/graph/graph_constructor_test.cc114
-rw-r--r--tensorflow/core/graph/graph_partition_test.cc78
-rw-r--r--tensorflow/core/kernels/BUILD18
-rw-r--r--tensorflow/core/kernels/hexagon/BUILD2
-rw-r--r--tensorflow/core/kernels/hexagon/graph_transfer_utils.cc49
-rw-r--r--tensorflow/core/kernels/hexagon/graph_transfer_utils.h41
-rw-r--r--tensorflow/core/kernels/hexagon/graph_transferer.cc92
-rw-r--r--tensorflow/core/kernels/hexagon/graph_transferer.h15
-rw-r--r--tensorflow/core/kernels/hexagon/graph_transferer_test.cc39
-rw-r--r--tensorflow/core/kernels/hexagon/hexagon_control_wrapper.cc36
-rw-r--r--tensorflow/core/kernels/hexagon/hexagon_control_wrapper.h3
-rw-r--r--tensorflow/core/kernels/hexagon/hexagon_graph_execution_test.cc66
-rw-r--r--tensorflow/core/kernels/image_resizer_state.h12
-rw-r--r--tensorflow/core/kernels/inplace_ops.cc108
-rw-r--r--tensorflow/core/kernels/inplace_ops_functor.h14
-rw-r--r--tensorflow/core/kernels/inplace_ops_functor_gpu.cu.cc61
-rw-r--r--tensorflow/core/kernels/record_input_op.cc67
-rw-r--r--tensorflow/core/kernels/record_yielder.cc216
-rw-r--r--tensorflow/core/kernels/record_yielder.h157
-rw-r--r--tensorflow/core/kernels/resize_bilinear_op.cc257
-rw-r--r--tensorflow/core/kernels/resize_bilinear_op_test.cc142
-rw-r--r--tensorflow/core/kernels/sparse_matmul_op.cc26
-rw-r--r--tensorflow/core/kernels/variable_ops.h38
-rw-r--r--tensorflow/core/ops/array_ops.cc31
-rw-r--r--tensorflow/core/ops/compat/ops_history.v0.pbtxt47
-rw-r--r--tensorflow/core/ops/data_flow_ops.cc23
-rw-r--r--tensorflow/core/ops/ops.pbtxt59
-rw-r--r--tensorflow/core/platform/profile_utils/clock_cycle_profiler.cc37
-rw-r--r--tensorflow/core/platform/profile_utils/clock_cycle_profiler.h104
-rw-r--r--tensorflow/core/platform/profile_utils/cpu_utils_test.cc14
-rw-r--r--tensorflow/examples/android/BUILD18
-rw-r--r--tensorflow/examples/android/jni/box_coder_jni.cc92
-rw-r--r--tensorflow/examples/android/proto/box_coder.proto42
-rw-r--r--tensorflow/examples/android/src/org/tensorflow/demo/DetectorActivity.java2
-rw-r--r--tensorflow/examples/android/src/org/tensorflow/demo/TensorFlowMultiBoxDetector.java55
-rw-r--r--tensorflow/examples/label_image/main.cc2
-rw-r--r--tensorflow/g3doc/api_docs/python/array_ops.md4
-rw-r--r--tensorflow/g3doc/api_docs/python/contrib.graph_editor.md3
-rw-r--r--tensorflow/g3doc/api_docs/python/contrib.learn.md60
-rw-r--r--tensorflow/g3doc/api_docs/python/contrib.learn.monitors.md4
-rw-r--r--tensorflow/g3doc/api_docs/python/contrib.linalg.md70
-rw-r--r--tensorflow/g3doc/api_docs/python/contrib.metrics.md32
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.learn.LinearRegressor.md12
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.linalg.LinearOperatorDiag.md10
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.reverse_sequence.md4
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.learn.LinearClassifier.md12
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.learn.monitors.ValidationMonitor.md4
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.linalg.LinearOperatorComposition.md10
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.linalg.LinearOperatorIdentity.md10
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.train.LoggingTensorHook.md4
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.train.MonitoredTrainingSession.md10
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf_debug.LocalCLIDebugHook.md12
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.neg.md16
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.learn.Estimator.md12
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.linalg.LinearOperatorScaledIdentity.md10
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.learn.DNNClassifier.md12
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.metrics.streaming_true_positives.md8
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.sparse_softmax_cross_entropy_with_logits.md14
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.train.write_graph.md4
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.metrics.streaming_false_positives.md8
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.graph_editor.assign_renamed_collections_handler.md3
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.linalg.LinearOperatorMatrix.md10
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.linalg.LinearOperatorTriL.md10
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.metrics.streaming_true_negatives.md8
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.learn.DNNRegressor.md12
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.linalg.LinearOperator.md10
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.metrics.streaming_false_negatives.md8
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.get_local_variable.md2
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.get_variable.md2
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.train.FeedFnHook.md88
-rw-r--r--tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.train.FinalOpsHook.md111
-rw-r--r--tensorflow/g3doc/api_docs/python/index.md3
-rw-r--r--tensorflow/g3doc/api_docs/python/math_ops.md21
-rw-r--r--tensorflow/g3doc/api_docs/python/nn.md14
-rw-r--r--tensorflow/g3doc/api_docs/python/state_ops.md4
-rw-r--r--tensorflow/g3doc/api_docs/python/tf_debug.md12
-rw-r--r--tensorflow/g3doc/api_docs/python/train.md964
-rw-r--r--tensorflow/g3doc/tutorials/tflearn/index.md4
-rw-r--r--tensorflow/go/genop/internal/genop.go2
-rw-r--r--tensorflow/go/graph.go35
-rw-r--r--tensorflow/go/op/op_test.go33
-rw-r--r--tensorflow/go/operation_test.go15
-rw-r--r--tensorflow/go/shape.go102
-rw-r--r--tensorflow/go/shape_test.go83
-rw-r--r--tensorflow/java/src/test/java/org/tensorflow/OperationBuilderTest.java4
-rw-r--r--tensorflow/python/client/session.py7
-rw-r--r--tensorflow/python/debug/BUILD12
-rw-r--r--tensorflow/python/debug/examples/debug_fibonacci.py7
-rw-r--r--tensorflow/python/debug/examples/debug_mnist.py16
-rw-r--r--tensorflow/python/debug/examples/debug_tflearn_iris.py37
-rwxr-xr-xtensorflow/python/debug/examples/examples_test.sh54
-rw-r--r--tensorflow/python/debug/wrappers/hooks.py29
-rw-r--r--tensorflow/python/framework/meta_graph.py3
-rw-r--r--tensorflow/python/kernel_tests/BUILD12
-rw-r--r--tensorflow/python/kernel_tests/argmax_op_test.py6
-rw-r--r--tensorflow/python/kernel_tests/confusion_matrix_test.py235
-rw-r--r--tensorflow/python/kernel_tests/losses_test.py391
-rw-r--r--tensorflow/python/kernel_tests/metrics_test.py146
-rw-r--r--tensorflow/python/kernel_tests/record_input_test.py80
-rw-r--r--tensorflow/python/layers/base.py8
-rw-r--r--tensorflow/python/layers/convolutional.py10
-rw-r--r--tensorflow/python/layers/convolutional_test.py127
-rw-r--r--tensorflow/python/layers/core.py62
-rw-r--r--tensorflow/python/layers/core_test.py49
-rw-r--r--tensorflow/python/layers/normalization.py2
-rw-r--r--tensorflow/python/layers/normalization_test.py15
-rw-r--r--tensorflow/python/ops/confusion_matrix.py26
-rw-r--r--tensorflow/python/ops/data_flow_ops.py62
-rw-r--r--tensorflow/python/ops/hidden_ops.txt1
-rw-r--r--tensorflow/python/ops/image_ops_test.py36
-rw-r--r--tensorflow/python/ops/losses/BUILD3
-rw-r--r--tensorflow/python/ops/losses/losses_impl.py339
-rw-r--r--tensorflow/python/ops/math_ops.py10
-rw-r--r--tensorflow/python/ops/metrics_impl.py89
-rw-r--r--tensorflow/python/ops/nn_ops.py14
-rw-r--r--tensorflow/python/tools/freeze_graph.py18
-rw-r--r--tensorflow/python/training/basic_session_run_hooks.py41
-rw-r--r--tensorflow/python/training/basic_session_run_hooks_test.py26
-rw-r--r--tensorflow/python/training/monitored_session.py13
-rw-r--r--tensorflow/python/training/monitored_session_test.py30
-rw-r--r--tensorflow/python/training/training.py31
-rw-r--r--tensorflow/stream_executor/cuda/cuda_driver.cc51
-rw-r--r--tensorflow/stream_executor/cuda/cuda_driver.h14
-rw-r--r--tensorflow/stream_executor/cuda/cuda_gpu_executor.cc13
-rw-r--r--tensorflow/stream_executor/cuda/cuda_gpu_executor.h15
-rw-r--r--tensorflow/stream_executor/dnn.h223
-rw-r--r--tensorflow/stream_executor/host/host_gpu_executor.cc19
-rw-r--r--tensorflow/stream_executor/host/host_gpu_executor.h15
-rw-r--r--tensorflow/stream_executor/host_buffer.h48
-rw-r--r--tensorflow/stream_executor/lib/status.h12
-rw-r--r--tensorflow/stream_executor/stream.cc356
-rw-r--r--tensorflow/stream_executor/stream.h68
-rw-r--r--tensorflow/stream_executor/stream_executor_internal.h16
-rw-r--r--tensorflow/stream_executor/stream_executor_pimpl.cc39
-rw-r--r--tensorflow/tensorboard/BUILD1
-rw-r--r--tensorflow/tensorboard/tensorboard.py1
-rw-r--r--tensorflow/tensorflow.bzl20
-rwxr-xr-xtensorflow/tools/ci_build/builds/libtensorflow.sh27
-rwxr-xr-xtensorflow/tools/ci_build/ci_parameterized_build.sh5
-rwxr-xr-xtensorflow/tools/ci_build/ci_sanity.sh33
-rwxr-xr-xtensorflow/tools/ci_build/install/install_deb_packages.sh10
-rw-r--r--tensorflow/tools/graph_transforms/summarize_graph_main.cc3
-rw-r--r--tensorflow/tools/lib_package/BUILD107
-rw-r--r--tensorflow/tools/lib_package/README.md31
-rwxr-xr-xtensorflow/tools/lib_package/concat_licenses.sh28
-rw-r--r--tensorflow/tools/lib_package/libtensorflow_test.c28
-rwxr-xr-xtensorflow/tools/lib_package/libtensorflow_test.sh48
-rw-r--r--tensorflow/tools/pip_package/BUILD1
-rw-r--r--tensorflow/workspace.bzl24
-rw-r--r--third_party/jemalloc.BUILD2
-rw-r--r--third_party/llvm/llvm.BUILD8
-rw-r--r--third_party/werkzeug.BUILD14
270 files changed, 10234 insertions, 3353 deletions
diff --git a/WORKSPACE b/WORKSPACE
index e0931512f4..a0c936af06 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -34,8 +34,8 @@ new_http_archive(
new_http_archive(
name = "mobile_multibox",
build_file = "models.BUILD",
- url = "https://storage.googleapis.com/download.tensorflow.org/models/mobile_multibox_v1.zip",
- sha256 = "b4c178fd6236dcf0a20d25d07c45eebe85281263978c6a6f1dfc49d75befc45f"
+ url = "https://storage.googleapis.com/download.tensorflow.org/models/mobile_multibox_v1a.zip",
+ sha256 = "859edcddf84dddb974c36c36cfc1f74555148e9c9213dedacf1d6b613ad52b96"
)
new_http_archive(
diff --git a/configure b/configure
index ff4ec262e3..a8e7bb7738 100755
--- a/configure
+++ b/configure
@@ -9,6 +9,23 @@ SOURCE_BASE_DIR=`pwd -P`
popd > /dev/null
PLATFORM="$(uname -s | tr 'A-Z' 'a-z')"
+
+function is_linux() {
+ if [[ "${PLATFORM}" == "linux" ]]; then
+ true
+ else
+ false
+ fi
+}
+
+function is_macos() {
+ if [[ "${PLATFORM}" == "darwin" ]]; then
+ true
+ else
+ false
+ fi
+}
+
function is_windows() {
# On windows, the shell script is actually running in msys
if [[ "${PLATFORM}" =~ msys_nt*|mingw*|cygwin*|uwin* ]]; then
@@ -65,16 +82,20 @@ if is_windows; then
TF_NEED_OPENCL=0
fi
-while [ "$TF_NEED_JEMALLOC" == "" ]; do
- read -p "Do you wish to use jemalloc as the malloc implementation? "\
-"(Linux only) [Y/n] " INPUT
- case $INPUT in
- [Yy]* ) echo "jemalloc enabled on Linux"; TF_NEED_JEMALLOC=1;;
- [Nn]* ) echo "jemalloc disabled on Linux"; TF_NEED_JEMALLOC=0;;
- "" ) echo "jemalloc enabled on Linux"; TF_NEED_JEMALLOC=1;;
- * ) echo "Invalid selection: " $INPUT;;
- esac
-done
+if is_linux; then
+ while [ "$TF_NEED_JEMALLOC" == "" ]; do
+ read -p "Do you wish to use jemalloc as the malloc implementation? [Y/n] "\
+ INPUT
+ case $INPUT in
+ [Yy]* ) echo "jemalloc enabled"; TF_NEED_JEMALLOC=1;;
+ [Nn]* ) echo "jemalloc disabled"; TF_NEED_JEMALLOC=0;;
+ "" ) echo "jemalloc enabled"; TF_NEED_JEMALLOC=1;;
+ * ) echo "Invalid selection: " $INPUT;;
+ esac
+ done
+else
+ TF_NEED_JEMALLOC=0
+fi
if [ "$TF_NEED_JEMALLOC" == "1" ]; then
sed -i -e "s/WITH_JEMALLOC = False/WITH_JEMALLOC = True/" tensorflow/core/platform/default/build_config.bzl
@@ -99,7 +120,7 @@ done
if [ "$TF_NEED_GCP" == "1" ]; then
## Verify that libcurl header files are available.
# Only check Linux, since on MacOS the header files are installed with XCode.
- if [[ $(uname -a) =~ Linux ]] && [[ ! -f "/usr/include/curl/curl.h" ]]; then
+ if is_linux && [[ ! -f "/usr/include/curl/curl.h" ]]; then
echo "ERROR: It appears that the development version of libcurl is not "\
"available. Please install the libcurl3-dev package."
exit 1
@@ -226,8 +247,6 @@ while ! is_windows && true; do
done
# Find out where the CUDA toolkit is installed
-OSNAME=`uname -s`
-
while true; do
# Configure the Cuda SDK version to use.
if [ -z "$TF_CUDA_VERSION" ]; then
@@ -259,9 +278,9 @@ while true; do
if is_windows; then
CUDA_RT_LIB_PATH="lib/x64/cudart.lib"
- elif [ "$OSNAME" == "Linux" ]; then
+ elif is_linux; then
CUDA_RT_LIB_PATH="lib64/libcudart.so${TF_CUDA_EXT}"
- elif [ "$OSNAME" == "Darwin" ]; then
+ elif is_macos; then
CUDA_RT_LIB_PATH="lib/libcudart${TF_CUDA_EXT}.dylib"
fi
@@ -307,10 +326,10 @@ while true; do
if is_windows; then
cudnn_lib_path="${CUDNN_INSTALL_PATH}/lib/x64/cudnn.lib"
cudnn_alt_lib_path="${CUDNN_INSTALL_PATH}/lib/x64/cudnn.lib"
- elif [ "$OSNAME" == "Linux" ]; then
+ elif is_linux; then
cudnn_lib_path="${CUDNN_INSTALL_PATH}/lib64/libcudnn.so"
cudnn_alt_lib_path="${CUDNN_INSTALL_PATH}/libcudnn.so"
- elif [ "$OSNAME" == "Darwin" ]; then
+ elif is_macos; then
cudnn_lib_path="${CUDNN_INSTALL_PATH}/lib/libcudnn.dylib"
cudnn_alt_lib_path="${CUDNN_INSTALL_PATH}/libcudnn.dylib"
fi
@@ -337,7 +356,7 @@ while true; do
echo "libcudnn.dylib resolves to libcudnn${TF_CUDNN_EXT}"
fi
else
- if [ "$OSNAME" == "Darwin" ]; then
+ if is_macos; then
TF_CUDNN_EXT=".${TF_CUDNN_VERSION}.dylib"
else
TF_CUDNN_EXT=".$TF_CUDNN_VERSION"
@@ -347,10 +366,10 @@ while true; do
if is_windows; then
CUDA_DNN_LIB_PATH="lib/x64/cudnn.lib"
CUDA_DNN_LIB_ALT_PATH="lib/x64/cudnn.lib"
- elif [ "$OSNAME" == "Linux" ]; then
+ elif is_linux; then
CUDA_DNN_LIB_PATH="lib64/libcudnn.so${TF_CUDNN_EXT}"
CUDA_DNN_LIB_ALT_PATH="libcudnn.so${TF_CUDNN_EXT}"
- elif [ "$OSNAME" == "Darwin" ]; then
+ elif is_macos; then
CUDA_DNN_LIB_PATH="lib/libcudnn${TF_CUDNN_EXT}"
CUDA_DNN_LIB_ALT_PATH="libcudnn${TF_CUDNN_EXT}"
fi
@@ -361,7 +380,7 @@ while true; do
break
fi
- if [ "$OSNAME" == "Linux" ]; then
+ if is_linux; then
CUDNN_PATH_FROM_LDCONFIG="$(ldconfig -p | sed -n 's/.*libcudnn.so .* => \(.*\)/\1/p')"
if [ -e "${CUDNN_PATH_FROM_LDCONFIG}${TF_CUDNN_EXT}" ]; then
export TF_CUDNN_VERSION
@@ -372,7 +391,7 @@ while true; do
echo "Invalid path to cuDNN ${CUDNN_VERSION} toolkit. Neither of the following two files can be found:"
echo "${CUDNN_INSTALL_PATH}/${CUDA_DNN_LIB_PATH}"
echo "${CUDNN_INSTALL_PATH}/${CUDA_DNN_LIB_ALT_PATH}"
- if [ "$OSNAME" == "Linux" ]; then
+ if is_linux; then
echo "${CUDNN_PATH_FROM_LDCONFIG}${TF_CUDNN_EXT}"
fi
@@ -499,7 +518,7 @@ while true; do
fi
fi
- if [ "$OSNAME" == "Linux" ]; then
+ if is_linux; then
SYCL_RT_LIB_PATH="lib/libComputeCpp.so"
fi
diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD
index a6bc8fdc49..9e8ea84baf 100644
--- a/tensorflow/c/BUILD
+++ b/tensorflow/c/BUILD
@@ -20,6 +20,12 @@ load(
# -----------------------------------------------------------------------------
# Public targets
+filegroup(
+ name = "headers",
+ srcs = ["c_api.h"],
+ visibility = ["//tensorflow:__subpackages__"],
+)
+
tf_cuda_library(
name = "c_api",
srcs = ["c_api.cc"],
diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD
index d3a16c57f6..38117d388f 100644
--- a/tensorflow/cc/BUILD
+++ b/tensorflow/cc/BUILD
@@ -355,6 +355,7 @@ tf_cc_test(
tf_gen_op_wrappers_cc(
name = "sendrecv_ops",
+ include_internal_ops = 1,
op_lib_names = [
"sendrecv_ops",
],
@@ -363,6 +364,7 @@ tf_gen_op_wrappers_cc(
tf_gen_op_wrappers_cc(
name = "function_ops",
+ include_internal_ops = 1,
op_lib_names = [
"function_ops",
],
diff --git a/tensorflow/cc/client/client_session.cc b/tensorflow/cc/client/client_session.cc
index 5a98deb259..b407d3ab03 100644
--- a/tensorflow/cc/client/client_session.cc
+++ b/tensorflow/cc/client/client_session.cc
@@ -45,20 +45,20 @@ SessionOptions ClientSession::MakeDefaultSessionOptions(
return options;
}
-Status ClientSession::Run(const std::vector<ops::Output>& fetch_outputs,
+Status ClientSession::Run(const std::vector<Output>& fetch_outputs,
std::vector<Tensor>* outputs) const {
return Run(FeedType{}, fetch_outputs, {}, outputs);
}
Status ClientSession::Run(const FeedType& inputs,
- const std::vector<ops::Output>& fetch_outputs,
+ const std::vector<Output>& fetch_outputs,
std::vector<Tensor>* outputs) const {
return Run(inputs, fetch_outputs, {}, outputs);
}
Status ClientSession::Run(const FeedType& inputs,
- const std::vector<ops::Output>& fetch_outputs,
- const std::vector<ops::Operation>& run_outputs,
+ const std::vector<Output>& fetch_outputs,
+ const std::vector<Operation>& run_outputs,
std::vector<Tensor>* outputs) const {
return Run(RunOptions(), inputs, fetch_outputs, run_outputs, outputs,
nullptr);
@@ -77,8 +77,8 @@ Status ClientSession::MaybeExtendGraph() const {
}
Status ClientSession::Run(const RunOptions& run_options, const FeedType& inputs,
- const std::vector<ops::Output>& fetch_outputs,
- const std::vector<ops::Operation>& run_outputs,
+ const std::vector<Output>& fetch_outputs,
+ const std::vector<Operation>& run_outputs,
std::vector<Tensor>* outputs,
RunMetadata* run_metadata) const {
std::vector<std::pair<string, Tensor>> feeds;
diff --git a/tensorflow/cc/client/client_session.h b/tensorflow/cc/client/client_session.h
index 9d480477f6..28ff3ec964 100644
--- a/tensorflow/cc/client/client_session.h
+++ b/tensorflow/cc/client/client_session.h
@@ -31,62 +31,59 @@ limitations under the License.
namespace tensorflow {
-// A `ClientSession` object lets the caller drive the evaluation of the
-// TensorFlow graph constructed with the C++ API.
-//
-// Example:
-//
-// Scope root = Scope::NewRootScope();
-// auto a = Placeholder(root, DT_INT32);
-// auto c = Add(root, a, {41});
-//
-// ClientSession session(root);
-// std::vector<Tensor> outputs;
-//
-// Status s = session.Run({{a, {1}}}, {c}, &outputs);
-// if (!s.ok()) { /* Handle error */ }
+/// A `ClientSession` object lets the caller drive the evaluation of the
+/// TensorFlow graph constructed with the C++ API.
+///
+/// Example:
+///
+/// Scope root = Scope::NewRootScope();
+/// auto a = Placeholder(root, DT_INT32);
+/// auto c = Add(root, a, {41});
+///
+/// ClientSession session(root);
+/// std::vector<Tensor> outputs;
+///
+/// Status s = session.Run({ {a, {1}} }, {c}, &outputs);
+/// if (!s.ok()) { ... }
class ClientSession {
public:
- // A data type to represent feeds to a Run call.
- // This is a map of `Output` objects returned by op-constructors to the value
- // to feed them with. See `ops::Input::Initializer` for details on what can be
- // used as feed values.
- typedef std::unordered_map<ops::Output, ops::Input::Initializer,
- ops::OutputHash>
- FeedType;
-
- // Create a new session to evaluate the graph contained in `scope` by
- // connecting to the TensorFlow runtime specified by `target`.
+ /// A data type to represent feeds to a Run call.
+ ///
+ /// This is a map of `Output` objects returned by op-constructors to the value
+ /// to feed them with. See `Input::Initializer` for details on what can be
+ /// used as feed values.
+ typedef std::unordered_map<Output, Input::Initializer, OutputHash> FeedType;
+
+ /// Create a new session to evaluate the graph contained in `scope` by
+ /// connecting to the TensorFlow runtime specified by `target`.
ClientSession(const Scope& scope, const string& target);
- // Same as above, but use the empty string ("") as the target specification.
+ /// Same as above, but use the empty string ("") as the target specification.
ClientSession(const Scope& scope);
- // Create a new session, configuring it with `session_options`.
+ /// Create a new session, configuring it with `session_options`.
ClientSession(const Scope& scope, const SessionOptions& session_options);
- // Evaluate the tensors in `fetch_outputs`. The values are returned as
- // `Tensor` objects in `outputs`. The number and order of `outputs` will match
- // `fetch_outputs`.
- Status Run(const std::vector<ops::Output>& fetch_outputs,
+ /// Evaluate the tensors in `fetch_outputs`. The values are returned as
+ /// `Tensor` objects in `outputs`. The number and order of `outputs` will
+ /// match `fetch_outputs`.
+ Status Run(const std::vector<Output>& fetch_outputs,
std::vector<Tensor>* outputs) const;
- // Same as above, but use the mapping in `inputs` as feeds.
- Status Run(const FeedType& inputs,
- const std::vector<ops::Output>& fetch_outputs,
+ /// Same as above, but use the mapping in `inputs` as feeds.
+ Status Run(const FeedType& inputs, const std::vector<Output>& fetch_outputs,
std::vector<Tensor>* outputs) const;
- // Same as above. Additionally runs the operations ins `run_outputs`.
- Status Run(const FeedType& inputs,
- const std::vector<ops::Output>& fetch_outputs,
- const std::vector<ops::Operation>& run_outputs,
+ /// Same as above. Additionally runs the operations ins `run_outputs`.
+ Status Run(const FeedType& inputs, const std::vector<Output>& fetch_outputs,
+ const std::vector<Operation>& run_outputs,
std::vector<Tensor>* outputs) const;
- // Use `run_options` to turn on performance profiling. `run_metadata`, if not
- // null, is filled in with the profiling results.
+ /// Use `run_options` to turn on performance profiling. `run_metadata`, if not
+ /// null, is filled in with the profiling results.
Status Run(const RunOptions& run_options, const FeedType& inputs,
- const std::vector<ops::Output>& fetch_outputs,
- const std::vector<ops::Operation>& run_outputs,
+ const std::vector<Output>& fetch_outputs,
+ const std::vector<Operation>& run_outputs,
std::vector<Tensor>* outputs, RunMetadata* run_metadata) const;
// TODO(keveman): Add support for partial run.
diff --git a/tensorflow/cc/framework/cc_op_gen.cc b/tensorflow/cc/framework/cc_op_gen.cc
index d191a73547..a4da3aa8e2 100644
--- a/tensorflow/cc/framework/cc_op_gen.cc
+++ b/tensorflow/cc/framework/cc_op_gen.cc
@@ -76,9 +76,9 @@ string ToGuard(const std::string& path) {
}
// Change: Into:
-// ABC // ABC
-// //
-// DEF // DEF
+// ABC /// ABC
+// ///
+// DEF /// DEF
string MakeComment(StringPiece text, StringPiece indent) {
string ret;
while (!text.empty()) {
@@ -89,9 +89,9 @@ string MakeComment(StringPiece text, StringPiece indent) {
if (text[newline] != ' ') last_non_space = newline;
}
if (last_non_space == -1) {
- strings::StrAppend(&ret, indent, "//\n");
+ strings::StrAppend(&ret, indent, "///\n");
} else {
- strings::StrAppend(&ret, indent, "// ",
+ strings::StrAppend(&ret, indent, "/// ",
text.substr(0, last_non_space + 1), "\n");
}
text.remove_prefix(newline + 1);
@@ -406,7 +406,7 @@ OpInfo::OpInfo(const OpDef& op_def) : op_def(op_def) {
for (int i = 0; i < op_def.input_arg_size(); ++i) {
const auto& arg(op_def.input_arg(i));
arg_types.push_back(strings::StrCat(
- "::tensorflow::ops::", ArgIsList(arg) ? "InputList" : "Input"));
+ "::tensorflow::", ArgIsList(arg) ? "InputList" : "Input"));
arg_names.push_back(AvoidCPPKeywords(arg.name()));
// TODO(keveman): Include input type information.
@@ -445,8 +445,8 @@ OpInfo::OpInfo(const OpDef& op_def) : op_def(op_def) {
for (int i = 0; i < op_def.output_arg_size(); ++i) {
const auto& arg = op_def.output_arg(i);
bool is_list = ArgIsList(arg);
- output_types.push_back(strings::StrCat("::tensorflow::ops::",
- is_list ? "OutputList" : "Output"));
+ output_types.push_back(
+ strings::StrCat("::tensorflow::", is_list ? "OutputList" : "Output"));
output_names.push_back(AvoidCPPKeywords(arg.name()));
is_list_output.push_back(is_list);
}
@@ -537,26 +537,26 @@ void OpInfo::WriteClassDecl(WritableFile* h) const {
if (output_types.empty()) {
// Allow casting this class to Operation.
strings::StrAppend(&class_decl,
- " operator ::tensorflow::ops::Operation() const { "
+ " operator ::tensorflow::Operation() const { "
"return operation; }\n");
} else if (output_types.size() == 1) {
if (is_list_output[0]) {
// Write the subscript operator, allowing out[i] for the list-typed
// output.
strings::StrAppend(&class_decl,
- " ::tensorflow::ops::Output operator[](size_t index) "
+ " ::tensorflow::Output operator[](size_t index) "
"const { return ",
output_names[0], "[index]; }\n\n");
} else {
// Write type cast functions, allowing casting this class to Input and
// Output.
- strings::StrAppend(
- &class_decl, " operator ::tensorflow::ops::Output() const { return ",
- output_names[0], "; }\n");
- strings::StrAppend(
- &class_decl, " operator ::tensorflow::ops::Input() const { return ",
- output_names[0], "; }\n");
+ strings::StrAppend(&class_decl,
+ " operator ::tensorflow::Output() const { return ",
+ output_names[0], "; }\n");
+ strings::StrAppend(&class_decl,
+ " operator ::tensorflow::Input() const { return ",
+ output_names[0], "; }\n");
// Write node() to get the Node* directly.
strings::StrAppend(&class_decl,
" ::tensorflow::Node* node() const { return ",
diff --git a/tensorflow/cc/framework/cc_op_gen.h b/tensorflow/cc/framework/cc_op_gen.h
index d1e83a87c3..3d35d0ef32 100644
--- a/tensorflow/cc/framework/cc_op_gen.h
+++ b/tensorflow/cc/framework/cc_op_gen.h
@@ -20,7 +20,7 @@ limitations under the License.
namespace tensorflow {
-// Result is written to files dot_h and dot_cc.
+/// Result is written to files dot_h and dot_cc.
void WriteCCOps(const OpList& ops, const std::string& dot_h_fname,
const std::string& dot_cc_fname);
diff --git a/tensorflow/cc/framework/grad_op_registry.h b/tensorflow/cc/framework/grad_op_registry.h
index e4da8570f1..190b96f685 100644
--- a/tensorflow/cc/framework/grad_op_registry.h
+++ b/tensorflow/cc/framework/grad_op_registry.h
@@ -24,30 +24,30 @@ limitations under the License.
namespace tensorflow {
namespace ops {
-// GradFunc is the signature for all gradient functions in GradOpRegistry.
-// Implementations should add operations to compute the gradient outputs of 'op'
-// (returned in 'grad_outputs') using 'scope' and 'grad_inputs'.
+/// GradFunc is the signature for all gradient functions in GradOpRegistry.
+/// Implementations should add operations to compute the gradient outputs of
+/// 'op' (returned in 'grad_outputs') using 'scope' and 'grad_inputs'.
typedef Status (*GradFunc)(const Scope& scope, const Operation& op,
const std::vector<Output>& grad_inputs,
std::vector<Output>* grad_outputs);
-// GradOpRegistry maintains a static registry of gradient functions.
-// Gradient functions are indexed in the registry by the forward op name (i.e.
-// "MatMul" -> MatMulGrad func).
+/// GradOpRegistry maintains a static registry of gradient functions.
+/// Gradient functions are indexed in the registry by the forward op name (i.e.
+/// "MatMul" -> MatMulGrad func).
class GradOpRegistry {
public:
- // Registers 'func' as the gradient function for 'op'.
- // Returns true if registration was successful, check fails otherwise.
+ /// Registers 'func' as the gradient function for 'op'.
+ /// Returns true if registration was successful, check fails otherwise.
bool Register(const string& op, GradFunc func);
- // Sets 'func' to the gradient function for 'op' and returns Status OK if
- // the gradient function for 'op' exists in the registry.
- // Note that 'func' can be null for ops that have registered no-gradient with
- // the registry.
- // Returns error status otherwise.
+ /// Sets 'func' to the gradient function for 'op' and returns Status OK if
+ /// the gradient function for 'op' exists in the registry.
+ /// Note that 'func' can be null for ops that have registered no-gradient with
+ /// the registry.
+ /// Returns error status otherwise.
Status Lookup(const string& op, GradFunc* func) const;
- // Returns a pointer to the global gradient function registry.
+ /// Returns a pointer to the global gradient function registry.
static GradOpRegistry* Global();
private:
diff --git a/tensorflow/cc/framework/gradient_checker.cc b/tensorflow/cc/framework/gradient_checker.cc
index 89baa1a5bb..849a8eed6f 100644
--- a/tensorflow/cc/framework/gradient_checker.cc
+++ b/tensorflow/cc/framework/gradient_checker.cc
@@ -35,20 +35,20 @@ namespace {
template <typename T>
Status ComputeTheoreticalJacobianTranspose(
- const Scope& scope, const ops::OutputList& xs,
+ const Scope& scope, const OutputList& xs,
const std::vector<TensorShape>& x_shapes,
- const std::vector<Tensor>& x_datas, const ops::OutputList& ys,
+ const std::vector<Tensor>& x_datas, const OutputList& ys,
const std::vector<TensorShape>& y_shapes,
std::vector<Tensor>& jacobian_ts) {
int y_num = y_shapes.size();
int x_num = x_shapes.size();
// Call AddSymbolicGradients to get 'dxs' (we will feed 'dys').
- ops::OutputList dys;
+ OutputList dys;
for (const auto& y_shape : y_shapes) {
// TODO(suharshs): This currently assumes that all x's are the same type.
dys.push_back(Cast(scope, Const(scope, 1.0, y_shape), xs[0].type()));
}
- ops::OutputList dxs;
+ OutputList dxs;
TF_RETURN_IF_ERROR(AddSymbolicGradients(scope, ys, xs, dys, &dxs));
// Initialize 'dy_data' to zeros.
@@ -97,8 +97,8 @@ Status ComputeTheoreticalJacobianTranspose(
return Status::OK();
}
-Status EvaluateGraph(ClientSession& session, const ops::OutputList& xs,
- const ops::OutputList& ys, std::vector<Tensor>& x_datas,
+Status EvaluateGraph(ClientSession& session, const OutputList& xs,
+ const OutputList& ys, std::vector<Tensor>& x_datas,
std::vector<Tensor>* y_datas) {
// Create the feed list.
ClientSession::FeedType feed_list;
@@ -123,11 +123,13 @@ Status EvaluateGraph(ClientSession& session, const ops::OutputList& xs,
}
template <typename T>
-Status ComputeNumericJacobianTranspose(
- const Scope& scope, const ops::OutputList& xs,
- const std::vector<TensorShape>& x_shapes, const ops::OutputList& ys,
- const std::vector<TensorShape>& y_shapes, const T delta,
- std::vector<Tensor>& x_datas, std::vector<Tensor>& jacobian_ts) {
+Status ComputeNumericJacobianTranspose(const Scope& scope, const OutputList& xs,
+ const std::vector<TensorShape>& x_shapes,
+ const OutputList& ys,
+ const std::vector<TensorShape>& y_shapes,
+ const T delta,
+ std::vector<Tensor>& x_datas,
+ std::vector<Tensor>& jacobian_ts) {
int y_num = y_shapes.size();
int x_num = x_shapes.size();
@@ -170,7 +172,7 @@ Status ComputeNumericJacobianTranspose(
}
template <typename T>
-void InitJacobians(const ops::OutputList& xs,
+void InitJacobians(const OutputList& xs,
const std::vector<TensorShape>& x_shapes,
const std::vector<TensorShape>& y_shapes,
std::vector<Tensor>& jacobians) {
@@ -191,10 +193,9 @@ void InitJacobians(const ops::OutputList& xs,
}
template <typename T>
-Status ComputeGradientErrorInternal(const Scope& scope,
- const ops::OutputList& xs,
+Status ComputeGradientErrorInternal(const Scope& scope, const OutputList& xs,
const std::vector<TensorShape>& x_shapes,
- const ops::OutputList& ys,
+ const OutputList& ys,
const std::vector<TensorShape>& y_shapes,
std::vector<Tensor>& x_datas,
T* max_error) {
@@ -231,9 +232,9 @@ Status ComputeGradientErrorInternal(const Scope& scope,
} // namespace
template <typename T>
-Status ComputeGradientError(const Scope& scope, const ops::OutputList& xs,
+Status ComputeGradientError(const Scope& scope, const OutputList& xs,
const std::vector<TensorShape>& x_shapes,
- const ops::OutputList& ys,
+ const OutputList& ys,
const std::vector<TensorShape>& y_shapes,
T* max_error) {
if (xs.size() != x_shapes.size()) {
@@ -259,8 +260,8 @@ Status ComputeGradientError(const Scope& scope, const ops::OutputList& xs,
}
template <typename T>
-Status ComputeGradientError(const Scope& scope, const ops::Output& x,
- const Tensor& x_init_value, const ops::Output& y,
+Status ComputeGradientError(const Scope& scope, const Output& x,
+ const Tensor& x_init_value, const Output& y,
const TensorShape& y_shape, T* max_error) {
// Initialize 'x_data' from 'x_init_value'.
std::vector<Tensor> x_datas(1, Tensor(x_init_value));
@@ -269,14 +270,14 @@ Status ComputeGradientError(const Scope& scope, const ops::Output& x,
{y_shape}, x_datas, max_error);
}
-#define INSTANTIATE_GRAD_ERR_TYPE(T) \
- template Status ComputeGradientError<T>( \
- const Scope& scope, const ops::OutputList& xs, \
- const std::vector<TensorShape>& x_shapes, const ops::OutputList& ys, \
- const std::vector<TensorShape>& y_shapes, T* max_error); \
- template Status ComputeGradientError<T>( \
- const Scope& scope, const ops::Output& x, const Tensor& x_init_value, \
- const ops::Output& y, const TensorShape& y_shape, T* max_error);
+#define INSTANTIATE_GRAD_ERR_TYPE(T) \
+ template Status ComputeGradientError<T>( \
+ const Scope& scope, const OutputList& xs, \
+ const std::vector<TensorShape>& x_shapes, const OutputList& ys, \
+ const std::vector<TensorShape>& y_shapes, T* max_error); \
+ template Status ComputeGradientError<T>( \
+ const Scope& scope, const Output& x, const Tensor& x_init_value, \
+ const Output& y, const TensorShape& y_shape, T* max_error);
INSTANTIATE_GRAD_ERR_TYPE(float);
INSTANTIATE_GRAD_ERR_TYPE(double);
diff --git a/tensorflow/cc/framework/gradient_checker.h b/tensorflow/cc/framework/gradient_checker.h
index 66a2b3040c..2e61213615 100644
--- a/tensorflow/cc/framework/gradient_checker.h
+++ b/tensorflow/cc/framework/gradient_checker.h
@@ -22,20 +22,20 @@ limitations under the License.
namespace tensorflow {
-// Returns in 'max_error' the maximum element-wise error for dy/dx between the
-// computed and numeric Jacobian matrices where 'xs' and 'ys' are tensors.
-// This function adds operations to the graph associated with 'scope'.
+/// Returns in 'max_error' the maximum element-wise error for dy/dx between the
+/// computed and numeric Jacobian matrices where 'xs' and 'ys' are tensors.
+/// This function adds operations to the graph associated with 'scope'.
template <typename T>
-Status ComputeGradientError(const Scope& scope, const ops::OutputList& xs,
+Status ComputeGradientError(const Scope& scope, const OutputList& xs,
const std::vector<TensorShape>& x_shapes,
- const ops::OutputList& ys,
+ const OutputList& ys,
const std::vector<TensorShape>& y_shapes,
T* max_error);
-// Overload of ComputeGradientError which takes an initial value for 'x'.
+/// Overload of ComputeGradientError which takes an initial value for 'x'.
template <typename T>
-Status ComputeGradientError(const Scope& scope, const ops::Output& x,
- const Tensor& x_init_value, const ops::Output& y,
+Status ComputeGradientError(const Scope& scope, const Output& x,
+ const Tensor& x_init_value, const Output& y,
const TensorShape& y_shape, T* max_error);
} // namespace tensorflow
diff --git a/tensorflow/cc/framework/gradients.cc b/tensorflow/cc/framework/gradients.cc
index 0059bdd6d1..2c60f947a5 100644
--- a/tensorflow/cc/framework/gradients.cc
+++ b/tensorflow/cc/framework/gradients.cc
@@ -29,8 +29,6 @@ limitations under the License.
#include "tensorflow/core/platform/macros.h"
namespace tensorflow {
-using namespace ops; // NOLINT(build/namespaces)
-
namespace {
struct OutputHash {
@@ -48,7 +46,7 @@ struct OutputEq {
class SymbolicGradientBuilder {
public:
SymbolicGradientBuilder(const Scope& scope,
- const GradOpRegistry* registry,
+ const ops::GradOpRegistry* registry,
const std::vector<Output>& outputs,
const std::vector<Output>& inputs,
const std::vector<Output>& grad_inputs,
@@ -81,7 +79,7 @@ class SymbolicGradientBuilder {
std::vector<Output>* grad_outputs);
const Scope& scope_;
- const GradOpRegistry* registry_;
+ const ops::GradOpRegistry* registry_;
const std::vector<Output>& outputs_;
const std::vector<Output>& inputs_;
const std::vector<Output>& grad_inputs_;
@@ -119,19 +117,15 @@ class SymbolicGradientBuilder {
};
SymbolicGradientBuilder::SymbolicGradientBuilder(
- const Scope& scope,
- const GradOpRegistry* registry,
- const std::vector<Output>& outputs,
- const std::vector<Output>& inputs,
- const std::vector<Output>& grad_inputs,
- std::vector<Output>* grad_outputs)
+ const Scope& scope, const ops::GradOpRegistry* registry,
+ const std::vector<Output>& outputs, const std::vector<Output>& inputs,
+ const std::vector<Output>& grad_inputs, std::vector<Output>* grad_outputs)
: scope_(scope),
registry_(registry),
outputs_(outputs),
inputs_(inputs),
grad_inputs_(grad_inputs),
- grad_outputs_(grad_outputs) {
-}
+ grad_outputs_(grad_outputs) {}
Status SymbolicGradientBuilder::BackpropAlongEdge(const Output& dst_grad,
const Output& src) {
@@ -249,14 +243,14 @@ Status SymbolicGradientBuilder::SumGradients(const Output& src, Output* grad) {
} else {
// Otherwise, adds backprop-ed gradients.
// TODO(andydavis) Use a better accumulator here.
- *grad = AddN(scope_, grads_to_keep);
+ *grad = ops::AddN(scope_, grads_to_keep);
}
return Status::OK();
}
bool SymbolicGradientBuilder::IsPrimitiveOpWithNoGrad(const string& opname) {
- GradFunc grad_fn;
+ ops::GradFunc grad_fn;
Status s = registry_->Lookup(opname, &grad_fn);
return s.ok() && (grad_fn == nullptr);
}
@@ -265,7 +259,7 @@ Status SymbolicGradientBuilder::CallGradFunction(
const Operation& op,
const std::vector<Output>& grad_inputs,
std::vector<Output>* grad_outputs) {
- GradFunc grad_fn;
+ ops::GradFunc grad_fn;
TF_RETURN_IF_ERROR(registry_->Lookup(op.node()->type_string(), &grad_fn));
TF_RETURN_IF_ERROR(grad_fn(scope_, op, grad_inputs, grad_outputs));
TF_RETURN_IF_ERROR(scope_.status());
@@ -333,7 +327,7 @@ Status SymbolicGradientBuilder::AddGradients() {
// TODO(andydavis) If static shapes are known, replace 'ZerosLike' with
// zero-filled Constant node of appropriate shape.
for (const int dy_index : no_grad_dy_indices) {
- dy[dy_index] = ZerosLike(scope_, Output(n, dy_index));
+ dy[dy_index] = ops::ZerosLike(scope_, Output(n, dy_index));
}
}
@@ -368,7 +362,7 @@ Status AddSymbolicGradients(const Scope& scope,
const std::vector<Output>& inputs,
const std::vector<Output>& grad_inputs,
std::vector<Output>* grad_outputs) {
- SymbolicGradientBuilder builder(scope, GradOpRegistry::Global(), outputs,
+ SymbolicGradientBuilder builder(scope, ops::GradOpRegistry::Global(), outputs,
inputs, grad_inputs, grad_outputs);
return builder.AddGradients();
}
diff --git a/tensorflow/cc/framework/gradients.h b/tensorflow/cc/framework/gradients.h
index fa5e608bd4..d076bc43b4 100644
--- a/tensorflow/cc/framework/gradients.h
+++ b/tensorflow/cc/framework/gradients.h
@@ -21,28 +21,28 @@ limitations under the License.
namespace tensorflow {
-// NOTE: This API is a work in progress and will likely be changing frequently.
-//
-// Given initial gradients 'grad_inputs' (which represent the symbolic partial
-// derivatives of some loss function 'L' w.r.t 'outputs'), adds gradient nodes
-// to the graph associated with 'scope', which compute (and return in
-// 'grad_outputs') the symbolic partial derivatives of 'L' w.r.t 'inputs'.
-//
+/// NOTE: This API is a work in progress and will likely be changing frequently.
+///
+/// Given initial gradients 'grad_inputs' (which represent the symbolic partial
+/// derivatives of some loss function 'L' w.r.t 'outputs'), adds gradient nodes
+/// to the graph associated with 'scope', which compute (and return in
+/// 'grad_outputs') the symbolic partial derivatives of 'L' w.r.t 'inputs'.
+///
// TODO(andydavis) Add overload of this function with no 'grad_inputs' arg.
// Implementation will fill in 'OnesLike' for all shapes in 'outputs'.
Status AddSymbolicGradients(const Scope& scope,
- const std::vector<ops::Output>& outputs,
- const std::vector<ops::Output>& inputs,
- const std::vector<ops::Output>& grad_inputs,
- std::vector<ops::Output>* grad_outputs);
-
-// Returns a sentinel Output that represents 'no gradient' (i.e. no gradient
-// flows along some graph edge during backpropagation).
-// Can be returned in 'grad_outputs' by an invocation of 'AddSymbolicGradients'
-// (note that gradient flow through an Output can be stopped through the use of
-// the StopGradient node).
-ops::Output NoGradient();
+ const std::vector<Output>& outputs,
+ const std::vector<Output>& inputs,
+ const std::vector<Output>& grad_inputs,
+ std::vector<Output>* grad_outputs);
+
+/// Returns a sentinel Output that represents 'no gradient' (i.e. no gradient
+/// flows along some graph edge during backpropagation).
+/// Can be returned in 'grad_outputs' by an invocation of 'AddSymbolicGradients'
+/// (note that gradient flow through an Output can be stopped through the use of
+/// the StopGradient node).
+Output NoGradient();
} // namespace tensorflow
diff --git a/tensorflow/cc/framework/gradients_test.cc b/tensorflow/cc/framework/gradients_test.cc
index 9ae927a762..6e9ff3e01c 100644
--- a/tensorflow/cc/framework/gradients_test.cc
+++ b/tensorflow/cc/framework/gradients_test.cc
@@ -90,7 +90,7 @@ TEST_F(GradientsTest, OneMatMul) {
} else {
// Call AddSymbolicGradients.
auto dz = Const(scope, {{1.0, 1.0}, {1.0, 1.0}});
- std::vector<ops::Output> grad_outputs;
+ std::vector<Output> grad_outputs;
TF_ASSERT_OK(
AddSymbolicGradients(scope, {z}, {x, y}, {dz}, &grad_outputs));
}
@@ -123,7 +123,7 @@ TEST_F(GradientsTest, TwoMatMuls_Chained) {
} else {
// Call AddSymbolicGradients.
auto dz = Const(scope, {{1.0, 1.0}, {1.0, 1.0}});
- std::vector<ops::Output> grad_outputs;
+ std::vector<Output> grad_outputs;
TF_ASSERT_OK(
AddSymbolicGradients(scope, {z}, {u, v}, {dz}, &grad_outputs));
}
@@ -160,7 +160,7 @@ TEST_F(GradientsTest, TwoMatMuls_Independent) {
// Call AddSymbolicGradients.
auto dv = Const(scope, {{1.0, 1.0}, {1.0, 1.0}});
auto dz = Const(scope, {{1.0, 1.0}, {1.0, 1.0}});
- std::vector<ops::Output> grad_outputs;
+ std::vector<Output> grad_outputs;
TF_ASSERT_OK(AddSymbolicGradients(scope, {v, z}, {t, u, x, y}, {dv, dz},
&grad_outputs));
}
@@ -191,7 +191,7 @@ TEST_F(GradientsTest, PackUnpack_Chained) {
auto pack_grad = Unpack(scope, unpack_grad.output, 3);
} else {
// Call AddSymbolicGradients.
- std::vector<ops::Output> grad_outputs;
+ std::vector<Output> grad_outputs;
TF_ASSERT_OK(AddSymbolicGradients(scope, unpack.output, {a, b, c},
{dx, dy, dz}, &grad_outputs));
}
@@ -225,7 +225,7 @@ TEST_F(GradientsTest, PackUnpack_StopBackprop) {
auto unpack_grad = Pack(scope, {dx, dy, dz});
} else {
// Call AddSymbolicGradients.
- std::vector<ops::Output> grad_outputs;
+ std::vector<Output> grad_outputs;
TF_ASSERT_OK(AddSymbolicGradients(scope, unpack.output, {pack},
{dx, dy, dz}, &grad_outputs));
}
@@ -252,7 +252,7 @@ TEST_F(GradientsTest, DependentGradOutputs) {
// The gradient w.r.t to 'v' (returned in grad_outputs[0]) is dependent on
// the gradient w.r.t. to 'x' (returned in grad_outputs[1]).
auto dz = Const(scope_test_, {{5}});
- std::vector<ops::Output> grad_outputs;
+ std::vector<Output> grad_outputs;
TF_ASSERT_OK(
AddSymbolicGradients(scope_test_, {z}, {v, x}, {dz}, &grad_outputs));
@@ -281,7 +281,7 @@ TEST_F(GradientsTest, MultipleNodeOutputGrads) {
{3, 4, 2});
// clang-format on
- std::vector<ops::Output> grad_outputs;
+ std::vector<Output> grad_outputs;
TF_ASSERT_OK(AddSymbolicGradients(scope_test_, {pack}, unpack.output, {dx},
&grad_outputs));
@@ -333,7 +333,7 @@ class StopGradientSingleOutputMultiEdgeTest : public ::testing::Test {
auto g2 = Const(scope_, {{9, 10}, {11, 12}});
// Call AddSymbolicGradients and compare against 'expected_grad'.
- std::vector<ops::Output> grad_outputs;
+ std::vector<Output> grad_outputs;
TF_EXPECT_OK(AddSymbolicGradients(scope_, {out0, out1, out2}, {z},
{g0, g1, g2}, &grad_outputs));
@@ -410,7 +410,7 @@ class StopGradientMultiOutputTest : public ::testing::Test {
auto g2 = Const(scope_, {17, 18, 19, 20, 21, 22, 23, 24}, {2, 4});
// Call AddSymbolicGradients and compare against 'expected_grad'.
- std::vector<ops::Output> grad_outputs;
+ std::vector<Output> grad_outputs;
TF_EXPECT_OK(AddSymbolicGradients(scope_, {out0, out1, out2}, {x},
{g0, g1, g2}, &grad_outputs));
diff --git a/tensorflow/cc/framework/ops.h b/tensorflow/cc/framework/ops.h
index 82ba9c68f0..32086d4123 100644
--- a/tensorflow/cc/framework/ops.h
+++ b/tensorflow/cc/framework/ops.h
@@ -28,7 +28,7 @@ namespace tensorflow {
class Output;
-// Represents a node in the computation graph.
+/// Represents a node in the computation graph.
class Operation {
public:
Operation() : node_(nullptr) {}
@@ -56,7 +56,7 @@ class Operation {
Node* node_;
};
-// Represents a tensor value produced by an Operation.
+/// Represents a tensor value produced by an Operation.
class Output {
public:
Output() = default;
@@ -87,18 +87,18 @@ struct OutputHash {
}
};
-// Represents a tensor value that can be used as an operand to an Operation.
+/// Represents a tensor value that can be used as an operand to an Operation.
class Input {
public:
- // Initializer enables constructing an Input object from various kinds of C++
- // constants such as simple primitive constants and nested initializer lists
- // representing a multi-dimensional array. Initializer constructors are all
- // templates, so the aforementioned kinds of C++ constants can be used to
- // construct an Initializer. Initializer stores the value it got constructed
- // with in a Tensor object.
+ /// Initializer enables constructing an Input object from various kinds of C++
+ /// constants such as simple primitive constants and nested initializer lists
+ /// representing a multi-dimensional array. Initializer constructors are all
+ /// templates, so the aforementioned kinds of C++ constants can be used to
+ /// construct an Initializer. Initializer stores the value it got constructed
+ /// with in a Tensor object.
struct Initializer {
- // Construct from a scalar value of an arithmetic type or a type that can be
- // converted to a string (eg. a string literal).
+ /// Construct from a scalar value of an arithmetic type or a type that can
+ /// be converted to a string (eg. a string literal).
template <typename T, typename = typename std::enable_if<
std::is_arithmetic<T>::value ||
std::is_convertible<T, string>::value>::type>
@@ -111,7 +111,7 @@ class Input {
Initializer(const Tensor& t) : tensor(t) {} // NOLINT(runtime/explicit)
- // Construct from a scalar value and an explicit shape
+ /// Construct from a scalar value and an explicit shape
template <typename T, typename = typename std::enable_if<
std::is_arithmetic<T>::value ||
std::is_convertible<T, string>::value>::type>
@@ -124,7 +124,7 @@ class Input {
tensor = t;
}
- // Construct from a initializer list of scalars (a one-dimensional tensor).
+ /// Construct from a initializer list of scalars (a one-dimensional tensor).
template <typename T, typename = typename std::enable_if<
std::is_arithmetic<T>::value ||
std::is_convertible<T, string>::value>::type>
@@ -137,7 +137,7 @@ class Input {
tensor = t;
}
- // Construct from a initializer list of scalars and an explicit shape.
+ /// Construct from a initializer list of scalars and an explicit shape.
template <typename T, typename = typename std::enable_if<
std::is_arithmetic<T>::value ||
std::is_convertible<T, string>::value>::type>
@@ -154,11 +154,11 @@ class Input {
tensor = t;
}
- // Construct a multi-dimensional tensor from a nested initializer list. Note
- // that C++ syntax allows nesting of arbitrarily typed initializer lists, so
- // such invalid initializers cannot be disallowed at compile time. This
- // function performs checks to make sure that the nested initializer list is
- // indeed a valid multi-dimensional tensor.
+ /// Construct a multi-dimensional tensor from a nested initializer
+ /// list. Note that C++ syntax allows nesting of arbitrarily typed
+ /// initializer lists, so such invalid initializers cannot be disallowed at
+ /// compile time. This function performs checks to make sure that the nested
+ /// initializer list is indeed a valid multi-dimensional tensor.
Initializer(const std::initializer_list<Initializer>& v);
template <typename T, bool = std::is_convertible<T, string>::value>
@@ -185,14 +185,14 @@ class Input {
Tensor tensor;
};
- // All of Input's constructors are implicit. Input can be implicitly
- // constructed from the following objects :
- // * Output: This is so that the output of an Operation can be directly used
- // as the input to a op wrapper, which takes Inputs.
- // * A scalar, or a multi-dimensional tensor specified as a recursive
- // initializer list. This enables directly passing constants as
- // inputs to op wrappers.
- // * A Tensor object.
+ /// All of Input's constructors are implicit. Input can be implicitly
+ /// constructed from the following objects :
+ /// * Output: This is so that the output of an Operation can be directly used
+ /// as the input to a op wrapper, which takes Inputs.
+ /// * A scalar, or a multi-dimensional tensor specified as a recursive
+ /// initializer list. This enables directly passing constants as
+ /// inputs to op wrappers.
+ /// * A Tensor object.
Input(const Output& o) : output_(o) {} // NOLINT(runtime/explicit)
template <typename T, typename = typename std::enable_if<
@@ -220,8 +220,8 @@ class Input {
tensor_ = Initializer(init).tensor;
}
- // Constructor specifying a node name, index and datatype. This should only be
- // used for specifying a backward edge, needed by control flow.
+ /// Constructor specifying a node name, index and datatype. This should only
+ /// be used for specifying a backward edge, needed by control flow.
Input(const string& name, int i, DataType dt)
: node_name_(name), index_(i), data_type_(dt) {}
@@ -241,15 +241,15 @@ class Input {
DataType data_type_ = DT_INVALID;
};
-// A type for representing the output of ops that produce more than one output,
-// or a list of tensors.
+/// A type for representing the output of ops that produce more than one output,
+/// or a list of tensors.
typedef std::vector<Output> OutputList;
-// A type for representing the input to ops that require a list of tensors.
+/// A type for representing the input to ops that require a list of tensors.
class InputList {
public:
- // Implicitly convert a list of outputs to a list of inputs. This is useful to
- // write code such as ops::Concat(ops::Split(x, 4)).
+ /// Implicitly convert a list of outputs to a list of inputs. This is useful
+ /// to write code such as ops::Concat(ops::Split(x, 4)).
InputList(const OutputList& out) { // NOLINT(runtime/explicit)
for (auto const& x : out) {
inputs_.push_back(x);
diff --git a/tensorflow/cc/framework/scope.cc b/tensorflow/cc/framework/scope.cc
index 2bce24f2fc..e1af5b36e8 100644
--- a/tensorflow/cc/framework/scope.cc
+++ b/tensorflow/cc/framework/scope.cc
@@ -70,14 +70,14 @@ Scope::Scope(const Scope& other, Scope::Tags::OpName, const string& name,
colocation_constraints_(other.colocation_constraints_) {}
Scope::Scope(const Scope& other, Scope::Tags::ControlDeps,
- std::vector<ops::Operation> control_deps, bool clear_control_deps)
+ std::vector<Operation> control_deps, bool clear_control_deps)
: graph_(other.graph_),
status_(other.status_),
name_map_(other.name_map_),
refiner_(other.refiner_),
scope_used_(other.scope_used_),
control_deps_(clear_control_deps
- ? std::vector<ops::Operation>()
+ ? std::vector<Operation>()
: (control_deps.insert(control_deps.begin(),
other.control_deps_.begin(),
other.control_deps_.end()),
@@ -148,7 +148,7 @@ Scope::Scope(const Scope& other, Scope::Tags::KernelLabel,
colocation_constraints_(other.colocation_constraints_) {}
Scope::Scope(const Scope& other, Scope::Tags::Colocate,
- const ops::Operation& colocate_with_op, bool clear_colocations)
+ const Operation& colocate_with_op, bool clear_colocations)
: graph_(other.graph_),
status_(other.status_),
name_map_(other.name_map_),
@@ -166,7 +166,7 @@ Scope::Scope(const Scope& other, Scope::Tags::Colocate,
: other.GetColocationConstraints(colocate_with_op)) {}
std::unordered_set<string> Scope::GetColocationConstraints(
- const ops::Operation& colocate_with_op) const {
+ const Operation& colocate_with_op) const {
std::unordered_set<string> current_constraints(colocation_constraints_);
const NodeDef& node_def = colocate_with_op.node()->def();
std::vector<string> node_constraints;
@@ -298,21 +298,20 @@ Scope Scope::WithOpName(const string& op_name) const {
}
Scope Scope::WithControlDependencies(
- const gtl::ArraySlice<ops::Operation>& control_deps) const {
- return Scope(
- *this, Scope::Tags::ControlDeps(),
- std::vector<ops::Operation>(control_deps.begin(), control_deps.end()),
- /* clear_control_deps */ false);
+ const gtl::ArraySlice<Operation>& control_deps) const {
+ return Scope(*this, Scope::Tags::ControlDeps(),
+ std::vector<Operation>(control_deps.begin(), control_deps.end()),
+ /* clear_control_deps */ false);
}
-Scope Scope::WithControlDependencies(const ops::Output& control_dep) const {
+Scope Scope::WithControlDependencies(const Output& control_dep) const {
return Scope(*this, Scope::Tags::ControlDeps(),
- std::vector<ops::Operation>(1, control_dep.op()),
+ std::vector<Operation>(1, control_dep.op()),
/* clear_control_deps */ false);
}
Scope Scope::WithNoControlDependencies() const {
- return Scope(*this, Scope::Tags::ControlDeps(), std::vector<ops::Operation>(),
+ return Scope(*this, Scope::Tags::ControlDeps(), std::vector<Operation>(),
/* clear_control_deps */ true);
}
@@ -320,13 +319,13 @@ Scope Scope::WithDevice(const string& device) const {
return Scope(*this, Scope::Tags::Device(), device);
}
-Scope Scope::ColocateWith(const ops::Operation& op) const {
+Scope Scope::ColocateWith(const Operation& op) const {
return Scope(*this, Scope::Tags::Colocate(), op,
/* clear_colocations */ false);
}
Scope Scope::ClearColocation() const {
- return Scope(*this, Scope::Tags::Colocate(), ops::Operation(),
+ return Scope(*this, Scope::Tags::Colocate(), Operation(),
/* clear_colocations */ true);
}
diff --git a/tensorflow/cc/framework/scope.h b/tensorflow/cc/framework/scope.h
index edf25e2227..47d1026bb2 100644
--- a/tensorflow/cc/framework/scope.h
+++ b/tensorflow/cc/framework/scope.h
@@ -33,129 +33,136 @@ class GraphDef;
class NodeBuilder;
struct CompositeOpScopes;
-// A `Scope` object represents a set of related TensorFlow ops that have the
-// same properties such as a common name prefix.
-// A Scope object is a container for TensorFlow Op properties. Op constructors
-// get a Scope object as a mandatory first argument and the constructed op
-// acquires the properties in the object.
-//
-// A simple example:
-//
-// using namespace ops;
-// Scope root = Scope::NewRootScope();
-// auto c1 = Const(root, {{1, 1}});
-// auto m = MatMul(root, c1, {{41}, {1}});
-// GraphDef gdef;
-// Status s = root.ToGraphDef(&gdef);
-// if (!s.ok()) { /* Handle error */ }
-//
-// Scope hierarchy:
-// The Scope class provides various With<> functions that create a new scope.
-// The new scope typically has one property changed while other properties are
-// inherited from the parent scope.
-// NewSubScope(name) method appends `name` to the prefix of names for ops
-// created within the scope, and WithOpName() changes the suffix which
-// otherwise defaults to the type of the op.
-//
-// Name examples:
-// Scope root = Scope::NewRootScope();
-// Scope linear = root.NewSubScope("linear");
-// /* W will be named "linear/W" */
-// auto W = Variable(linear.WithOpName("W"),
-// {2, 2}, DT_FLOAT);
-// /* b will be named "linear/b" */
-// auto b = Variable(linear.WithOpName("b"),
-// {2}, DT_FLOAT);
-// auto x = Const(linear, {...}); // name: "linear/Const"
-// auto m = MatMul(linear, x, W); // name: "linear/MatMul"
-// auto r = BiasAdd(linear, m, b); // name: "linear/BiasAdd"
-//
-// Scope lifetime:
-// A new scope is created by calling Scope::NewRootScope. This creates some
-// resources that are shared by all the child scopes that inherit from this
-// scope, directly or transitively. For instance, a new scope creates a new
-// Graph object to which operations are added when the new scope or its children
-// are used by an Op constructor. The new scope also has a Status object which
-// will be used to indicate errors by Op-constructor functions called on any
-// child scope. The Op-constructor functions have to check the scope's status by
-// calling the ok() method before proceeding to construct the op.
-//
-// Thread safety:
-// A `Scope` object is NOT thread-safe. Threads cannot concurrently call
-// op-constructor functions on the same `Scope` object.
+/// A `Scope` object represents a set of related TensorFlow ops that have the
+/// same properties such as a common name prefix.
+///
+/// A Scope object is a container for TensorFlow Op properties. Op constructors
+/// get a Scope object as a mandatory first argument and the constructed op
+/// acquires the properties in the object.
+///
+/// A simple example:
+///
+/// using namespace ops;
+/// Scope root = Scope::NewRootScope();
+/// auto c1 = Const(root, { {1, 1} });
+/// auto m = MatMul(root, c1, { {41}, {1} });
+/// GraphDef gdef;
+/// Status s = root.ToGraphDef(&gdef);
+/// if (!s.ok()) { ... }
+///
+/// Scope hierarchy:
+///
+/// The Scope class provides various With<> functions that create a new scope.
+/// The new scope typically has one property changed while other properties are
+/// inherited from the parent scope.
+/// NewSubScope(name) method appends `name` to the prefix of names for ops
+/// created within the scope, and WithOpName() changes the suffix which
+/// otherwise defaults to the type of the op.
+///
+/// Name examples:
+///
+/// Scope root = Scope::NewRootScope();
+/// Scope linear = root.NewSubScope("linear");
+/// // W will be named "linear/W"
+/// auto W = Variable(linear.WithOpName("W"),
+/// {2, 2}, DT_FLOAT);
+/// // b will be named "linear/b"
+/// auto b = Variable(linear.WithOpName("b"),
+/// {2}, DT_FLOAT);
+/// auto x = Const(linear, {...}); // name: "linear/Const"
+/// auto m = MatMul(linear, x, W); // name: "linear/MatMul"
+/// auto r = BiasAdd(linear, m, b); // name: "linear/BiasAdd"
+///
+/// Scope lifetime:
+///
+/// A new scope is created by calling Scope::NewRootScope. This creates some
+/// resources that are shared by all the child scopes that inherit from this
+/// scope, directly or transitively. For instance, a new scope creates a new
+/// Graph object to which operations are added when the new scope or its
+/// children are used by an Op constructor. The new scope also has a Status
+/// object which will be used to indicate errors by Op-constructor functions
+/// called on any child scope. The Op-constructor functions have to check the
+/// scope's status by calling the ok() method before proceeding to construct the
+/// op.
+///
+/// Thread safety:
+///
+/// A `Scope` object is NOT thread-safe. Threads cannot concurrently call
+/// op-constructor functions on the same `Scope` object.
class Scope {
public:
// The following functions are for users making graphs. They return brand new
// scopes, or scopes derived from an existing scope object.
- // Return a new scope.
- // This creates a new graph and all operations constructed in this graph
- // should use the returned object as the "root" scope.
+ /// Return a new scope.
+ /// This creates a new graph and all operations constructed in this graph
+ /// should use the returned object as the "root" scope.
static Scope NewRootScope();
- // Return a new scope. Ops created with this scope will have
- // <name>/<child_scope_name> as the prefix. The actual name will be unique
- // in the current scope. All other properties are inherited from the current
- // scope. If child_scope_name is empty, the '/' is elided.
+ /// Return a new scope. Ops created with this scope will have
+ /// <name>/<child_scope_name> as the prefix. The actual name will be unique
+ /// in the current scope. All other properties are inherited from the current
+ /// scope. If child_scope_name is empty, the '/' is elided.
Scope NewSubScope(const string& child_scope_name) const;
- // Return a new scope. All ops created within the returned scope will have
- // names of the form <name>/<op_name>[_<suffix].
+ /// Return a new scope. All ops created within the returned scope will have
+ /// names of the form <name>/<op_name>[_<suffix].
Scope WithOpName(const string& op_name) const;
- // Return a new scope. All ops created within the returned scope will have as
- // control dependencies the union of operations in the control_deps vector and
- // the control dependencies of the current scope.
+ /// Return a new scope. All ops created within the returned scope will have as
+ /// control dependencies the union of operations in the control_deps vector
+ /// and the control dependencies of the current scope.
Scope WithControlDependencies(
- const gtl::ArraySlice<ops::Operation>& control_deps) const;
- // Same as above, but convenient to add control dependency on the operation
- // producing the control_dep output.
- Scope WithControlDependencies(const ops::Output& control_dep) const;
+ const gtl::ArraySlice<Operation>& control_deps) const;
+ /// Same as above, but convenient to add control dependency on the operation
+ /// producing the control_dep output.
+ Scope WithControlDependencies(const Output& control_dep) const;
- // Return a new scope. All ops created within the returned scope will have no
- // control dependencies on other operations.
+ /// Return a new scope. All ops created within the returned scope will have no
+ /// control dependencies on other operations.
Scope WithNoControlDependencies() const;
- // Return a new scope. All ops created within the returned scope will have the
- // device field set to 'device'.
+ /// Return a new scope. All ops created within the returned scope will have
+ /// the device field set to 'device'.
Scope WithDevice(const string& device) const;
- // Return a new scope. All ops created within the returned scope will be
- // co-located on the device where op is placed.
- // NOTE: This function is intended to be use internal libraries only for
- // controlling placement of ops on to devices. Public use is not encouraged
- // because the implementation of device placement is subject to change.
- Scope ColocateWith(const ops::Operation& op) const;
- // Convenience function for above.
- Scope ColocateWith(const ops::Output& out) const {
- return ColocateWith(out.op());
- }
- // Clear all colocation constraints.
+ /// Return a new scope. All ops created within the returned scope will be
+ /// co-located on the device where op is placed.
+ /// NOTE: This function is intended to be use internal libraries only for
+ /// controlling placement of ops on to devices. Public use is not encouraged
+ /// because the implementation of device placement is subject to change.
+ Scope ColocateWith(const Operation& op) const;
+ /// Convenience function for above.
+ Scope ColocateWith(const Output& out) const { return ColocateWith(out.op()); }
+ /// Clear all colocation constraints.
Scope ClearColocation() const;
- // Return a new scope. The op-constructor functions taking the returned scope
- // as the scope argument will exit as soon as an error is detected, instead of
- // setting the status on the scope.
+ /// Return a new scope. The op-constructor functions taking the returned scope
+ /// as the scope argument will exit as soon as an error is detected, instead
+ /// of setting the status on the scope.
Scope ExitOnError() const;
- // Return a new scope. All ops created with the new scope will have
- // kernel_label as the value for their '_kernel' attribute;
+ /// Return a new scope. All ops created with the new scope will have
+ /// kernel_label as the value for their '_kernel' attribute;
Scope WithKernelLabel(const string& kernel_label) const;
// The following functions are for scope object consumers.
- // Return a unique name, using default_name if an op name has not been
- // specified.
+ /// Return a unique name, using default_name if an op name has not been
+ /// specified.
string GetUniqueNameForOp(const string& default_name) const;
- // Update the status on this scope.
- // Note: The status object is shared between all children of this scope.
- // If the resulting status is not Status::OK() and exit_on_error_ is set on
- // this scope, this function exits by calling LOG(FATAL).
+ /// Update the status on this scope.
+ /// Note: The status object is shared between all children of this scope.
+ /// If the resulting status is not Status::OK() and exit_on_error_ is set on
+ /// this scope, this function exits by calling LOG(FATAL).
void UpdateStatus(const Status s) const;
- // Update the builder with properties accumulated in this scope.
+ // START_SKIP_DOXYGEN
+
+ /// Update the builder with properties accumulated in this scope.
void UpdateBuilder(NodeBuilder* builder) const;
+ // END_SKIP_DOXYGEN
CompositeOpScopes GetCompositeOpScopes(const string& composite_op_name) const;
@@ -169,23 +176,24 @@ class Scope {
Status status() const { return *status_; }
- // If status() is Status::OK(), convert the Graph object stored in this scope
- // to a GraphDef proto and return Status::OK(). Otherwise, return the error
- // status as is without performing GraphDef conversion.
+ /// If status() is Status::OK(), convert the Graph object stored in this scope
+ /// to a GraphDef proto and return Status::OK(). Otherwise, return the error
+ /// status as is without performing GraphDef conversion.
Status ToGraphDef(GraphDef* gdef) const;
- // If status() is Status::OK(), construct a Graph object using the default
- // GraphConstructorOptions, and return Status::OK if graph construction was
- // successful. Otherwise, return the error status.
+ // START_SKIP_DOXYGEN
+
+ /// If status() is Status::OK(), construct a Graph object using the default
+ /// GraphConstructorOptions, and return Status::OK if graph construction was
+ /// successful. Otherwise, return the error status.
// TODO(josh11b, keveman): Make this faster; right now it converts
// Graph->GraphDef->Graph. This cleans up the graph (e.g. adds
// edges from the source and to the sink node, resolves back edges
// by name), and makes sure the resulting graph is valid.
Status ToGraph(Graph* g) const;
+ // END_SKIP_DOXYGEN
- const std::vector<ops::Operation>& control_deps() const {
- return control_deps_;
- }
+ const std::vector<Operation>& control_deps() const { return control_deps_; }
private:
// Tag types to choose the constructor to dispatch.
@@ -214,16 +222,16 @@ class Scope {
Scope(const Scope& other, Tags::OpName, const string& name,
const string& op_name);
Scope(const Scope& other, Tags::ControlDeps,
- std::vector<ops::Operation> control_deps, bool clear_control_deps);
+ std::vector<Operation> control_deps, bool clear_control_deps);
Scope(const Scope& other, Tags::Device, const string& device);
Scope(const Scope& other, Tags::SingleUseScope, const string& op_name);
Scope(const Scope& other, Tags::ExitOnError);
Scope(const Scope& other, Tags::KernelLabel, const string& kernel_label);
- Scope(const Scope& other, Tags::Colocate,
- const ops::Operation& colocate_with_op, bool clear_colocations);
+ Scope(const Scope& other, Tags::Colocate, const Operation& colocate_with_op,
+ bool clear_colocations);
std::unordered_set<string> GetColocationConstraints(
- const ops::Operation& colocate_with_op) const;
+ const Operation& colocate_with_op) const;
// Helper functions to get a unique names.
string GetUniqueName(const string& prefix, bool check_single_use) const;
@@ -245,7 +253,7 @@ class Scope {
// GetUniqueNameForOp will cause an error status to be set on this scope.
std::shared_ptr<bool> scope_used_ = nullptr;
- const std::vector<ops::Operation> control_deps_;
+ const std::vector<Operation> control_deps_;
const string name_ = "";
const string op_name_ = "";
@@ -255,13 +263,13 @@ class Scope {
const std::unordered_set<string> colocation_constraints_;
};
-// A helper struct to hold the scopes that would be used by a function
-// constructing a composite op.
+/// A helper struct to hold the scopes that would be used by a function
+/// constructing a composite op.
struct CompositeOpScopes {
- // Scope to be used for creating the local ops (primitive or other composite
- // ops).
+ /// Scope to be used for creating the local ops (primitive or other composite
+ /// ops).
Scope child;
- // Scope to be used for creating the last op.
+ /// Scope to be used for creating the last op.
Scope last;
};
diff --git a/tensorflow/cc/framework/scope_test.cc b/tensorflow/cc/framework/scope_test.cc
index 3882b5623b..9eca9d3fac 100644
--- a/tensorflow/cc/framework/scope_test.cc
+++ b/tensorflow/cc/framework/scope_test.cc
@@ -127,11 +127,11 @@ TEST(ScopeTest, SingleUseScope) {
TEST(ScopeTest, ControlDeps) {
Scope root = Scope::NewRootScope();
- auto c1 = ops::Operation();
- auto c2 = ops::Operation();
+ auto c1 = Operation();
+ auto c2 = Operation();
Scope c = root.WithControlDependencies({c1, c2});
EXPECT_EQ(c.control_deps().size(), 2);
- Scope c_c = c.WithControlDependencies({ops::Operation()});
+ Scope c_c = c.WithControlDependencies({Operation()});
EXPECT_EQ(c_c.control_deps().size(), 3);
}
diff --git a/tensorflow/cc/framework/testutil.cc b/tensorflow/cc/framework/testutil.cc
index 58afc6b979..b0746913a1 100644
--- a/tensorflow/cc/framework/testutil.cc
+++ b/tensorflow/cc/framework/testutil.cc
@@ -20,8 +20,6 @@ limitations under the License.
#include "tensorflow/core/graph/default_device.h"
namespace tensorflow {
-using namespace ops; // NOLINT(build/namespaces)
-
namespace test {
void GetTensors(const Scope& scope, OutputList tensors,
diff --git a/tensorflow/cc/framework/testutil.h b/tensorflow/cc/framework/testutil.h
index 5e67ede6ab..d027ad3744 100644
--- a/tensorflow/cc/framework/testutil.h
+++ b/tensorflow/cc/framework/testutil.h
@@ -22,12 +22,12 @@ limitations under the License.
namespace tensorflow {
namespace test {
-// Computes the outputs listed in 'tensors', returns the tensors in 'out'.
-void GetTensors(const Scope& scope, ops::OutputList tensors,
+/// Computes the outputs listed in 'tensors', returns the tensors in 'out'.
+void GetTensors(const Scope& scope, OutputList tensors,
std::vector<Tensor>* out);
-// Computes the output 'tensor', returning the resulting tensor in 'out'.
-void GetTensor(const Scope& scope, ops::Output tensor, Tensor* out);
+/// Computes the output 'tensor', returning the resulting tensor in 'out'.
+void GetTensor(const Scope& scope, Output tensor, Tensor* out);
} // namespace test
} // namespace tensorflow
diff --git a/tensorflow/cc/gradients/grad_testutil.h b/tensorflow/cc/gradients/grad_testutil.h
index 7a925f9b0e..d31f412754 100644
--- a/tensorflow/cc/gradients/grad_testutil.h
+++ b/tensorflow/cc/gradients/grad_testutil.h
@@ -22,12 +22,12 @@ limitations under the License.
namespace tensorflow {
namespace test {
-// Calls the gradient function registered for 'op', adding gradient operations
-// to the graph associated with 'scope'. Gradient outputs for each 'op' input
-// are returned in 'grad_outputs'.
-Status CallGradFunction(const Scope& scope, const ops::Operation& op,
- const std::vector<ops::Output>& grad_inputs,
- std::vector<ops::Output>* grad_outputs);
+/// Calls the gradient function registered for 'op', adding gradient operations
+/// to the graph associated with 'scope'. Gradient outputs for each 'op' input
+/// are returned in 'grad_outputs'.
+Status CallGradFunction(const Scope& scope, const Operation& op,
+ const std::vector<Output>& grad_inputs,
+ std::vector<Output>* grad_outputs);
} // namespace test
} // namespace tensorflow
diff --git a/tensorflow/cc/saved_model/constants.h b/tensorflow/cc/saved_model/constants.h
index 654e765170..7f2d560978 100644
--- a/tensorflow/cc/saved_model/constants.h
+++ b/tensorflow/cc/saved_model/constants.h
@@ -18,25 +18,25 @@ limitations under the License.
namespace tensorflow {
-// SavedModel assets directory.
+/// SavedModel assets directory.
constexpr char kSavedModelAssetsDirectory[] = "assets";
-// SavedModel assets key for graph collection-def.
+/// SavedModel assets key for graph collection-def.
constexpr char kSavedModelAssetsKey[] = "saved_model_assets";
-// SavedModel proto filename.
+/// SavedModel proto filename.
constexpr char kSavedModelFilenamePb[] = "saved_model.pb";
-// SavedModel text format proto filename.
+/// SavedModel text format proto filename.
constexpr char kSavedModelFilenamePbTxt[] = "saved_model.pbtxt";
-// SavedModel legacy init op key.
+/// SavedModel legacy init op key.
constexpr char kSavedModelLegacyInitOpKey[] = "legacy_init_op";
-// Directory in which to save the SavedModel variables.
+/// Directory in which to save the SavedModel variables.
constexpr char kSavedModelVariablesDirectory[] = "variables";
-// SavedModel variables filename.
+/// SavedModel variables filename.
constexpr char kSavedModelVariablesFilename[] = "variables";
} // namespace tensorflow
diff --git a/tensorflow/cc/saved_model/loader.h b/tensorflow/cc/saved_model/loader.h
index 10157b0a99..9b9abdbb1f 100644
--- a/tensorflow/cc/saved_model/loader.h
+++ b/tensorflow/cc/saved_model/loader.h
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
-// SavedModel loading functions and SavedModelBundle struct.
+/// SavedModel loading functions and SavedModelBundle struct.
#ifndef THIRD_PARTY_TENSORFLOW_CC_SAVED_MODEL_LOADER_H_
#define THIRD_PARTY_TENSORFLOW_CC_SAVED_MODEL_LOADER_H_
@@ -27,13 +27,13 @@ limitations under the License.
namespace tensorflow {
-// SavedModel representation once the SavedModel is loaded from storage.
+/// SavedModel representation once the SavedModel is loaded from storage.
struct SavedModelBundle {
std::unique_ptr<Session> session;
MetaGraphDef meta_graph_def;
- // A TensorFlow Session does not Close itself on destruction. To avoid
- // resource leaks, we explicitly call Close on Sessions that we create.
+ /// A TensorFlow Session does not Close itself on destruction. To avoid
+ /// resource leaks, we explicitly call Close on Sessions that we create.
~SavedModelBundle() {
if (session) {
session->Close();
@@ -43,20 +43,20 @@ struct SavedModelBundle {
SavedModelBundle() = default;
};
-// Loads a SavedModel from the specified export directory. The meta graph def to
-// be loaded is identified by the supplied tags, corresponding exactly to the
-// set of tags used at SavedModel build time. Returns a SavedModel bundle with a
-// session and the requested meta graph def, if found.
+/// Loads a SavedModel from the specified export directory. The meta graph def
+/// to be loaded is identified by the supplied tags, corresponding exactly to
+/// the set of tags used at SavedModel build time. Returns a SavedModel bundle
+/// with a session and the requested meta graph def, if found.
Status LoadSavedModel(const SessionOptions& session_options,
const RunOptions& run_options, const string& export_dir,
const std::unordered_set<string>& tags,
SavedModelBundle* const bundle);
-// Checks whether the provided directory could contain a SavedModel. Note that
-// the method does not load any data by itself. If the method returns `false`,
-// the export directory definitely does not contain a SavedModel. If the method
-// returns `true`, the export directory may contain a SavedModel but provides no
-// guarantee that it can be loaded.
+/// Checks whether the provided directory could contain a SavedModel. Note that
+/// the method does not load any data by itself. If the method returns `false`,
+/// the export directory definitely does not contain a SavedModel. If the method
+/// returns `true`, the export directory may contain a SavedModel but provides
+/// no guarantee that it can be loaded.
bool MaybeSavedModelDirectory(const string& export_dir);
} // namespace tensorflow
diff --git a/tensorflow/cc/saved_model/signature_constants.h b/tensorflow/cc/saved_model/signature_constants.h
index 5a784874cd..b2d39bd55b 100644
--- a/tensorflow/cc/saved_model/signature_constants.h
+++ b/tensorflow/cc/saved_model/signature_constants.h
@@ -18,48 +18,48 @@ limitations under the License.
namespace tensorflow {
-// Key in the signature def map for `default` serving signatures. The default
-// signature is used in inference requests where a specific signature was not
-// specified.
+/// Key in the signature def map for `default` serving signatures. The default
+/// signature is used in inference requests where a specific signature was not
+/// specified.
static constexpr char kDefaultServingSignatureDefKey[] = "serving_default";
////////////////////////////////////////////////////////////////////////////////
-// Classification API constants.
+/// Classification API constants.
-// Classification inputs.
+/// Classification inputs.
static constexpr char kClassifyInputs[] = "inputs";
-// Classification method name used in a SignatureDef.
+/// Classification method name used in a SignatureDef.
static constexpr char kClassifyMethodName[] = "tensorflow/serving/classify";
-// Classification classes output.
+/// Classification classes output.
static constexpr char kClassifyOutputClasses[] = "classes";
-// Classification scores output.
+/// Classification scores output.
static constexpr char kClassifyOutputScores[] = "scores";
////////////////////////////////////////////////////////////////////////////////
-// Predict API constants.
+/// Predict API constants.
-// Predict inputs.
+/// Predict inputs.
static constexpr char kPredictInputs[] = "inputs";
-// Predict method name used in a SignatureDef.
+/// Predict method name used in a SignatureDef.
static constexpr char kPredictMethodName[] = "tensorflow/serving/predict";
-// Predict outputs.
+/// Predict outputs.
static constexpr char kPredictOutputs[] = "outputs";
////////////////////////////////////////////////////////////////////////////////
-// Regression API constants.
+/// Regression API constants.
-// Regression inputs.
+/// Regression inputs.
static constexpr char kRegressInputs[] = "inputs";
-// Regression method name used in a SignatureDef.
+/// Regression method name used in a SignatureDef.
static constexpr char kRegressMethodName[] = "tensorflow/serving/regress";
-// Regression outputs.
+/// Regression outputs.
static constexpr char kRegressOutputs[] = "outputs";
////////////////////////////////////////////////////////////////////////////////
diff --git a/tensorflow/cc/saved_model/tag_constants.h b/tensorflow/cc/saved_model/tag_constants.h
index 8c4d12a57f..48ab1158e4 100644
--- a/tensorflow/cc/saved_model/tag_constants.h
+++ b/tensorflow/cc/saved_model/tag_constants.h
@@ -18,10 +18,10 @@ limitations under the License.
namespace tensorflow {
-// Tag for the `serving` graph.
+/// Tag for the `serving` graph.
constexpr char kSavedModelTagServe[] = "serve";
-// Tag for the `training` graph.`
+/// Tag for the `training` graph.`
constexpr char kSavedModelTagTrain[] = "train";
} // namespace tensorflow
diff --git a/tensorflow/cc/training/coordinator.h b/tensorflow/cc/training/coordinator.h
index 58e95f40f6..dbcf072015 100644
--- a/tensorflow/cc/training/coordinator.h
+++ b/tensorflow/cc/training/coordinator.h
@@ -28,77 +28,77 @@ limitations under the License.
namespace tensorflow {
-// The abstract interface for runners which must implement the Join function.
+/// The abstract interface for runners which must implement the Join function.
class RunnerInterface {
public:
virtual ~RunnerInterface() {}
virtual Status Join() = 0;
- // Returns true iff the runner is running, i.e. if it is trying to populate
- // its queue.
+ /// Returns true iff the runner is running, i.e. if it is trying to populate
+ /// its queue.
virtual bool IsRunning() const = 0;
};
-// Coordinator class manages the termination of a collection of QueueRunners.
-// Without a coordinator, QueueRunners have to be joined in a specific order;
-// otherwise the QueueRunner::Join() could sometimes hang. The
-// Coordinator::RequestStop() plays the key role which notifies all running
-// threads under a coordinator to stop. This function could be called by any
-// thread or any client.
-// Usage, in the client:
-// Coordinator coord;
-// std::unique_ptr<QueueRunner> qr(&coord, ...);
-// qr.Start(session);
-// coord.RegisterRunner(std::move(qr));
-// // do some work
-// TF_CHECK_OK(coord.Join());
-// In each thread of QueueRunner, the coordinator needs to be used as:
-// void Run() {
-// while (!coord->ShouldStop()) {
-// // do some work
-// if (error) {
-// coord->RequestStop();
-// coord->ReportStatus(error_status);
-// }
-// }
-// }
+/// Coordinator class manages the termination of a collection of QueueRunners.
+/// Without a coordinator, QueueRunners have to be joined in a specific order;
+/// otherwise the QueueRunner::Join() could sometimes hang. The
+/// Coordinator::RequestStop() plays the key role which notifies all running
+/// threads under a coordinator to stop. This function could be called by any
+/// thread or any client.
+/// Usage, in the client:
+/// Coordinator coord;
+/// std::unique_ptr<QueueRunner> qr(&coord, ...);
+/// qr.Start(session);
+/// coord.RegisterRunner(std::move(qr));
+/// /// do some work
+/// TF_CHECK_OK(coord.Join());
+/// In each thread of QueueRunner, the coordinator needs to be used as:
+/// void Run() {
+/// while (!coord->ShouldStop()) {
+/// /// do some work
+/// if (error) {
+/// coord->RequestStop();
+/// coord->ReportStatus(error_status);
+/// }
+/// }
+/// }
class Coordinator {
public:
Coordinator();
- // Constructor with a list of error codes which would not be taken as errors
- // in status reporting.
+ /// Constructor with a list of error codes which would not be taken as errors
+ /// in status reporting.
Coordinator(const std::vector<error::Code>& clean_stop_errors);
- // In the destructor, RequestStop() and Join() would be called.
+ /// In the destructor, RequestStop() and Join() would be called.
~Coordinator();
- // Registers a runner, i.e. a unit of running threads which is usually a
- // QueueRunner. It takes the ownership of runner to avoid lifecycle-related
- // problems. Note, the coordinator would not start these threads; they are
- // supposed to be in running state when they are registered here.
+ /// Registers a runner, i.e. a unit of running threads which is usually a
+ /// QueueRunner. It takes the ownership of runner to avoid lifecycle-related
+ /// problems. Note, the coordinator would not start these threads; they are
+ /// supposed to be in running state when they are registered here.
Status RegisterRunner(std::unique_ptr<RunnerInterface> runner);
- // Returns true iff all the registered runners have been stopped.
+ /// Returns true iff all the registered runners have been stopped.
bool AllRunnersStopped();
- // Requests all running threads to stop.
+ /// Requests all running threads to stop.
Status RequestStop();
- // Returns true if its RequestStop() has been called.
+ /// Returns true if its RequestStop() has been called.
bool ShouldStop();
- // Joins all threads, returns OK or the first reported and unexpected status.
+ /// Joins all threads, returns OK or the first reported and unexpected status.
Status Join();
- // Reports status to the coordinator. This is usually called by threads.
+ /// Reports status to the coordinator. This is usually called by threads.
void ReportStatus(const Status& status);
- // Returns the latest status.
+ /// Returns the latest status.
Status GetStatus();
- // Returns immediately if the coordinator is stopped or blocks until
- // RequestStop() is called.
+ /// Returns immediately if the coordinator is stopped or blocks until
+ /// RequestStop() is called.
void WaitForStop();
private:
diff --git a/tensorflow/cc/training/queue_runner.h b/tensorflow/cc/training/queue_runner.h
index e5aae8219f..bfe6a30593 100644
--- a/tensorflow/cc/training/queue_runner.h
+++ b/tensorflow/cc/training/queue_runner.h
@@ -32,46 +32,46 @@ limitations under the License.
namespace tensorflow {
-// QueueRunner class imitates the behavior of the python version of QueueRunner
-// which creates a thread for each enqueue op, runs close op on completion.
+/// QueueRunner class imitates the behavior of the python version of QueueRunner
+/// which creates a thread for each enqueue op, runs close op on completion.
class QueueRunner : public RunnerInterface {
public:
- // Creates a new QueueRunner from proto.
+ /// Creates a new QueueRunner from proto.
// TODO(yuefengz): we may want to initialize from queues and ops in the
// future.
static Status New(const QueueRunnerDef& queue_runner_def,
std::unique_ptr<QueueRunner>* result);
- // Creates a new QueueRunner with a coordinator, see coordinator.h for usage.
+ /// Creates a new QueueRunner with a coordinator, see coordinator.h for usage.
static Status New(const QueueRunnerDef& queue_runner_def, Coordinator* coord,
std::unique_ptr<QueueRunner>* result);
- // Adds a callback that the queue runner will call when it detects an error.
+ /// Adds a callback that the queue runner will call when it detects an error.
void AddErrorCallback(const std::function<void(Status)>& cb);
- // Delete the previously registered callbacks.
+ /// Delete the previously registered callbacks.
void ClearErrorCallbacks();
- // The destructor would join all the threads.
+ /// The destructor would join all the threads.
~QueueRunner();
- // Starts the queue runner with the given session.
+ /// Starts the queue runner with the given session.
Status Start(Session* sess);
- // Starts the queue runner with the given session, and wait for up to the
- // specified time (in milliseconds) for the queues to start to fill up.
+ /// Starts the queue runner with the given session, and wait for up to the
+ /// specified time (in milliseconds) for the queues to start to fill up.
Status Start(Session* sess, int wait_for_ms);
- // Requests to stop and runs the cancel op. It would be called in a separate
- // thread when coordinator is set. If there is no coordinator it should be
- // called before calling Join.
+ /// Requests to stop and runs the cancel op. It would be called in a separate
+ /// thread when coordinator is set. If there is no coordinator it should be
+ /// called before calling Join.
void Stop(Session* sess);
- // Joins all the threads. Returns okay if all threads run successfully;
- // otherwise returns the first captured failure status.
+ /// Joins all the threads. Returns okay if all threads run successfully;
+ /// otherwise returns the first captured failure status.
Status Join() final;
- // Returns the latest status.
+ /// Returns the latest status.
Status GetStatus();
private:
diff --git a/tensorflow/compiler/jit/graph_to_functiondef_test.cc b/tensorflow/compiler/jit/graph_to_functiondef_test.cc
index df45f455a9..04b2385c9c 100644
--- a/tensorflow/compiler/jit/graph_to_functiondef_test.cc
+++ b/tensorflow/compiler/jit/graph_to_functiondef_test.cc
@@ -50,8 +50,7 @@ TEST(GraphToFunctionDefTest, Basics) {
auto d = ops::Add(root.WithOpName("D"), a, b);
auto e = ops::Add(root.WithOpName("b"), d, c);
auto f = ops::Neg(root.WithOpName("h"), e);
- auto g =
- ops::AddN(root.WithOpName("G"), std::initializer_list<ops::Output>{e, f});
+ auto g = ops::AddN(root.WithOpName("G"), std::initializer_list<Output>{e, f});
auto h = ops::_Retval(root.WithOpName("H"), g, 0);
GraphDef graph_def;
diff --git a/tensorflow/compiler/jit/xla_local_launch_op.cc b/tensorflow/compiler/jit/xla_local_launch_op.cc
index 7945e057cf..acf2ccb890 100644
--- a/tensorflow/compiler/jit/xla_local_launch_op.cc
+++ b/tensorflow/compiler/jit/xla_local_launch_op.cc
@@ -45,6 +45,9 @@ REGISTER_OP("_XlaLaunch")
.Output("results: Tresults")
.Attr("Tresults: list(type) >= 0")
.Attr("function: func")
+ // XLA random-number generation ops are stateful.
+ // TODO(phawkins): create stateful and non-stateful variants of _XlaLaunch.
+ .SetIsStateful()
.Doc("XLA Launch Op. For use by the XLA JIT only.");
// Adapter class that wraps a Tensorflow allocator as an XLA allocator.
@@ -313,9 +316,10 @@ void XlaLocalLaunchOp::Compute(OpKernelContext* ctx) {
}
Tensor output_tensor;
// Looks up the owning Tensor by buffer address.
- OP_REQUIRES_OK(ctx, xla_allocator.MakeTensorFromBuffer(
- buffer, ctx->expected_output_dtype(i), shape,
- &output_tensor));
+ OP_REQUIRES_OK(
+ ctx,
+ xla_allocator.MakeTensorFromBuffer(
+ buffer, ctx->expected_output_dtype(i), shape, &output_tensor));
ctx->set_output(i, output_tensor);
++output_num;
}
diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD
index b4f01de4f2..5c78ab7061 100644
--- a/tensorflow/compiler/tests/BUILD
+++ b/tensorflow/compiler/tests/BUILD
@@ -180,6 +180,20 @@ tf_xla_py_test(
)
tf_xla_py_test(
+ name = "random_ops_test",
+ size = "small",
+ srcs = ["random_ops_test.py"],
+ # TODO(b/31361304): enable RNG ops on GPU when parallelized.
+ disabled_backends = ["gpu"],
+ deps = [
+ ":xla_test",
+ "//tensorflow/python:framework_for_generated_wrappers",
+ "//tensorflow/python:platform_test",
+ "//tensorflow/python:random_ops",
+ ],
+)
+
+tf_xla_py_test(
name = "reduce_ops_test",
size = "medium",
srcs = ["reduce_ops_test.py"],
diff --git a/tensorflow/compiler/tests/build_defs.bzl b/tensorflow/compiler/tests/build_defs.bzl
index 7fb8e0a26d..820db13d0b 100644
--- a/tensorflow/compiler/tests/build_defs.bzl
+++ b/tensorflow/compiler/tests/build_defs.bzl
@@ -9,7 +9,7 @@ def all_backends():
return ["cpu"]
def tf_xla_py_test(name, srcs=[], deps=[], tags=[], data=[], main=None,
- backends=None, **kwargs):
+ disabled_backends=None, **kwargs):
"""Generates py_test targets, one per XLA backend.
This rule generates py_test() targets named name_backend, for each backend
@@ -31,15 +31,16 @@ def tf_xla_py_test(name, srcs=[], deps=[], tags=[], data=[], main=None,
tags: Tags to apply to the generated targets.
data: Data dependencies of the target.
main: Same as py_test's main attribute.
- backends: A list of backends to test. Supported values include "cpu" and
- "gpu". If not specified, defaults to all backends.
+ disabled_backends: A list of backends that should not be tested. Supported
+ values include "cpu" and "gpu". If not specified, defaults to None.
**kwargs: keyword arguments passed onto the generated py_test() rules.
"""
- if backends == None:
- backends = all_backends()
+ if disabled_backends == None:
+ disabled_backends = []
+ enabled_backends = [b for b in all_backends() if b not in disabled_backends]
test_names = []
- for backend in backends:
+ for backend in enabled_backends:
test_name = "{}_{}".format(name, backend)
backend_tags = ["tf_xla_{}".format(backend)]
backend_args = []
diff --git a/tensorflow/compiler/tests/random_ops_test.py b/tensorflow/compiler/tests/random_ops_test.py
new file mode 100644
index 0000000000..31173c717d
--- /dev/null
+++ b/tensorflow/compiler/tests/random_ops_test.py
@@ -0,0 +1,67 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for random-number generation ops in the XLA JIT compiler."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.compiler.tests.xla_test import XLATestCase
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import random_ops
+from tensorflow.python.platform import googletest
+
+
+class RandomOpsTest(XLATestCase):
+ """Test cases for random-number generating operators."""
+
+ def _testRngIsNotConstant(self, rng, dtype):
+ # Tests that 'rng' does not always return the same value.
+ with self.test_session() as sess:
+ with self.test_scope():
+ x = rng(dtype)
+
+ # The random-number generator, if working correctly, should produce the
+ # same output multiple times with low probability.
+ y = sess.run(x)
+ z = sess.run(x)
+ w = sess.run(x)
+
+ # We use exact equality here. If the random-number generator is producing
+ # deterministic output, all three outputs will be bitwise identical.
+ self.assertTrue((not np.array_equal(y, z)) or
+ (not np.array_equal(z, w)) or
+ (not np.array_equal(y, w)))
+
+ def testRandomUniformIsNotConstant(self):
+ def rng(dtype):
+ return random_ops.random_uniform(shape=[2], dtype=dtype,
+ maxval=1000000)
+ for dtype in self.numeric_types:
+ self._testRngIsNotConstant(rng, dtype)
+
+ def testRandomNormalIsNotConstant(self):
+ def rng(dtype):
+ return random_ops.random_normal(shape=[2], dtype=dtype)
+
+ # TODO(b/34339814): implement inverse erf support for non-F32 types.
+ dtype = dtypes.float32
+ self._testRngIsNotConstant(rng, dtype)
+
+
+if __name__ == '__main__':
+ googletest.main()
diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD
index 299b5e98c0..10b4a6d054 100644
--- a/tensorflow/compiler/tf2xla/BUILD
+++ b/tensorflow/compiler/tf2xla/BUILD
@@ -55,8 +55,6 @@ cc_library(
"//tensorflow/core:lib_internal",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:stream_executor_no_cuda",
- "//tensorflow/core:tensorflow_opensource",
- "//tensorflow/core/kernels:cwise_op",
],
alwayslink = 1,
)
diff --git a/tensorflow/compiler/tf2xla/op_registrations.cc b/tensorflow/compiler/tf2xla/op_registrations.cc
index d8a4dad4b3..d1a7abb22c 100644
--- a/tensorflow/compiler/tf2xla/op_registrations.cc
+++ b/tensorflow/compiler/tf2xla/op_registrations.cc
@@ -59,9 +59,10 @@ REGISTER_XLA_KERNEL(DEVICE_CPU_XLA_JIT,
Name("Ceil").TypeConstraint("T", kCpuFloatTypes));
REGISTER_XLA_KERNEL(DEVICE_CPU_XLA_JIT,
Name("Concat").TypeConstraint("T", kCpuAllTypes));
-REGISTER_XLA_KERNEL(DEVICE_CPU_XLA_JIT, Name("ConcatV2")
- .TypeConstraint("T", kCpuAllTypes)
- .TypeConstraint("Tidx", DT_INT32));
+REGISTER_XLA_KERNEL(DEVICE_CPU_XLA_JIT,
+ Name("ConcatV2")
+ .TypeConstraint("T", kCpuAllTypes)
+ .TypeConstraint("Tidx", DT_INT32));
REGISTER_XLA_KERNEL(DEVICE_CPU_XLA_JIT, Name("ConcatOffset"));
REGISTER_XLA_KERNEL(DEVICE_CPU_XLA_JIT,
Name("Conv2D").TypeConstraint("T", kCpuFloatTypes));
@@ -165,8 +166,11 @@ REGISTER_XLA_KERNEL(DEVICE_CPU_XLA_JIT,
Name("Prod").TypeConstraint("T", kCpuNumericTypes));
REGISTER_XLA_KERNEL(DEVICE_CPU_XLA_JIT,
Name("Range").TypeConstraint("Tidx", kCpuNumericTypes));
-// TODO(b/31361304): disabled because of XLA bugs.
-// REGISTER_XLA_KERNEL(DEVICE_CPU_XLA_JIT, Name("RandomStandardNormal"));
+// TODO(b/34339814): implement inverse erf for double types and update the
+// type constraint.
+REGISTER_XLA_KERNEL(
+ DEVICE_CPU_XLA_JIT,
+ Name("RandomStandardNormal").TypeConstraint("dtype", DT_FLOAT));
REGISTER_XLA_KERNEL(DEVICE_CPU_XLA_JIT, Name("RandomUniform"));
REGISTER_XLA_KERNEL(DEVICE_CPU_XLA_JIT, Name("RandomUniformInt"));
REGISTER_XLA_KERNEL(DEVICE_CPU_XLA_JIT, Name("Rank"));
diff --git a/tensorflow/compiler/tf2xla/xla_compiler.cc b/tensorflow/compiler/tf2xla/xla_compiler.cc
index d291888a75..517eae2f5d 100644
--- a/tensorflow/compiler/tf2xla/xla_compiler.cc
+++ b/tensorflow/compiler/tf2xla/xla_compiler.cc
@@ -330,6 +330,8 @@ Status XlaCompiler::CompileGraph(string const& name,
&result->computation, &result->requires_runtime_context,
&compile_time_constants, &num_nonconst_outputs));
+ VLOG(2) << "Outputs: constant: " << compile_time_constants.size()
+ << " nonconstant: " << num_nonconst_outputs;
result->outputs.resize(compile_time_constants.size() + num_nonconst_outputs);
for (const auto& c : compile_time_constants) {
if (!c.status.ok()) {
diff --git a/tensorflow/compiler/xla/legacy_flags/parse_flags_from_env_test.cc b/tensorflow/compiler/xla/legacy_flags/parse_flags_from_env_test.cc
index 7a966ce241..07bbcd802f 100644
--- a/tensorflow/compiler/xla/legacy_flags/parse_flags_from_env_test.cc
+++ b/tensorflow/compiler/xla/legacy_flags/parse_flags_from_env_test.cc
@@ -127,7 +127,6 @@ static const char* binary_name;
// Test that when we use both the environment variable and actual
// commend line flags (when the latter is possible), the latter win.
TEST(ParseFlagsFromEnv, EnvAndFlag) {
- // TODO(m3b): convert to Subprocess when CL 137771604 is finished.
static struct {
const char* env;
const char* arg;
diff --git a/tensorflow/compiler/xla/reference_util.cc b/tensorflow/compiler/xla/reference_util.cc
index f03b158fa7..2465edc498 100644
--- a/tensorflow/compiler/xla/reference_util.cc
+++ b/tensorflow/compiler/xla/reference_util.cc
@@ -93,6 +93,38 @@ namespace xla {
ComputationBuilder::CreateDefaultConvDimensionNumbers());
}
+/* static */ std::unique_ptr<Array4D<float>>
+ReferenceUtil::SeparableConvArray4D(const Array4D<float>& input,
+ const Array4D<float>& depthwise_weights,
+ const Array4D<float>& pointwise_weights,
+ std::pair<int64, int64> kernel_stride,
+ Padding padding) {
+ const int64 depth_multiplier = depthwise_weights.planes();
+ CHECK_EQ(pointwise_weights.depth(), input.depth() * depth_multiplier);
+
+ // Combine the two weights by reducing the depth_multiplier, so that we can
+ // apply a single convolution on the combined weights.
+ Array4D<float> weights(pointwise_weights.planes(), input.depth(),
+ depthwise_weights.height(), depthwise_weights.width());
+ for (int64 kx = 0; kx < depthwise_weights.width(); ++kx) {
+ for (int64 ky = 0; ky < depthwise_weights.height(); ++ky) {
+ for (int64 kz = 0; kz < input.depth(); ++kz) {
+ for (int64 out = 0; out < pointwise_weights.planes(); ++out) {
+ float weight = 0.0;
+ for (int64 depth = 0; depth < depth_multiplier; ++depth) {
+ weight +=
+ depthwise_weights(depth, kz, ky, kx) *
+ pointwise_weights(out, depth + kz * depth_multiplier, 0, 0);
+ }
+ weights(out, kz, ky, kx) = weight;
+ }
+ }
+ }
+ }
+
+ return ConvArray4D(input, weights, kernel_stride, padding);
+}
+
/* static */ int64 ReferenceUtil::WindowCount(int64 unpadded_width,
int64 window_len, int64 stride,
Padding padding) {
diff --git a/tensorflow/compiler/xla/reference_util.h b/tensorflow/compiler/xla/reference_util.h
index 27421b2ac4..d19d5f9dbb 100644
--- a/tensorflow/compiler/xla/reference_util.h
+++ b/tensorflow/compiler/xla/reference_util.h
@@ -73,6 +73,15 @@ class ReferenceUtil {
std::pair<int64, int64> lhs_dilation,
std::pair<int64, int64> rhs_dilation, ConvolutionDimensionNumbers dnums);
+ // Returns the result of a separable convolution with the given parameters.
+ // kernel_stride and padding applies to the depthwise convolution during
+ // the separable convolution. pointwise_weights.depth() must be equal to
+ // input.depth() * depthwise_weights.planes().
+ static std::unique_ptr<Array4D<float>> SeparableConvArray4D(
+ const Array4D<float>& input, const Array4D<float>& depthwise_weights,
+ const Array4D<float>& pointwise_weights,
+ std::pair<int64, int64> kernel_stride, Padding padding);
+
// Returns the result of reducing a matrix to a column vector. init is the
// initial value for the reduce operation, and reduce_function is the function
// to apply for each reduction step.
diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
index 1a87a0043a..4d118d2e4e 100644
--- a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
@@ -281,7 +281,7 @@ llvm::Value* ElementalIrEmitter::EmitFloatMin(llvm::Value* lhs_value,
StatusOr<llvm::Value*> ElementalIrEmitter::EmitErfInv(PrimitiveType prim_type,
llvm::Value* x) const {
if (prim_type != F32) {
- return Unimplemented("inverse erf");
+ return Unimplemented("inverse erf only implemented for F32 (b/34339814)");
}
auto getFloat = [&](const float f) {
return llvm::ConstantFP::get(ir_builder_->getFloatTy(), f);
diff --git a/tensorflow/compiler/xla/service/executable.h b/tensorflow/compiler/xla/service/executable.h
index 373ab79ab2..ac478afabc 100644
--- a/tensorflow/compiler/xla/service/executable.h
+++ b/tensorflow/compiler/xla/service/executable.h
@@ -115,6 +115,9 @@ class Executable {
const HloModuleConfig& module_config() const { return *module_config_; }
+ // Returns whether this executable has an associated HloModuleConfig.
+ bool has_module_config() const { return module_config_ != nullptr; }
+
// Returns the versioned computation handle of the computation computed by
// this executable.
const VersionedComputationHandle& entry_computation_handle() const {
diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index 9aeebe42f8..8353731fdd 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -365,6 +365,38 @@ cc_library(
)
cc_library(
+ name = "fusion_merger",
+ srcs = ["fusion_merger.cc"],
+ hdrs = ["fusion_merger.h"],
+ deps = [
+ "//tensorflow/compiler/xla:shape_util",
+ "//tensorflow/compiler/xla:util",
+ "//tensorflow/compiler/xla:xla_data_proto",
+ "//tensorflow/compiler/xla/service:hlo",
+ "//tensorflow/compiler/xla/service:hlo_cost_analysis",
+ "//tensorflow/compiler/xla/service:hlo_pass",
+ "//tensorflow/core:lib",
+ ],
+)
+
+cc_test(
+ name = "fusion_merger_test",
+ srcs = ["fusion_merger_test.cc"],
+ deps = [
+ ":fusion_merger",
+ ":instruction_fusion",
+ "//tensorflow/compiler/xla:literal_util",
+ "//tensorflow/compiler/xla:shape_util",
+ "//tensorflow/compiler/xla:test_helpers",
+ "//tensorflow/compiler/xla:util",
+ "//tensorflow/compiler/xla:xla_data_proto",
+ "//tensorflow/compiler/xla/service:hlo",
+ "//tensorflow/compiler/xla/tests:hlo_test_base",
+ "//tensorflow/core:test_main",
+ ],
+)
+
+cc_library(
name = "pad_insertion",
srcs = ["pad_insertion.cc"],
hdrs = ["pad_insertion.h"],
@@ -386,6 +418,7 @@ cc_library(
deps = [
":convolution_folding",
":copy_insertion",
+ ":fusion_merger",
":gpu_executable",
":hlo_schedule",
":instruction_fusion",
diff --git a/tensorflow/compiler/xla/service/gpu/fusion_merger.cc b/tensorflow/compiler/xla/service/gpu/fusion_merger.cc
new file mode 100644
index 0000000000..caa919b688
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/fusion_merger.cc
@@ -0,0 +1,270 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/gpu/fusion_merger.h"
+
+#include <algorithm>
+
+#include "tensorflow/compiler/xla/service/hlo_cost_analysis.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/util.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+
+namespace xla {
+namespace gpu {
+
+namespace {
+
+// Traverses users of tuple shape, adding leaf instructions to 'instructions'.
+void MaybeResolveTupleElements(HloInstruction* instruction,
+ std::vector<HloInstruction*>* instructions) {
+ if (ShapeUtil::IsTuple(instruction->shape())) {
+ for (auto tuple_user : instruction->users()) {
+ MaybeResolveTupleElements(tuple_user, instructions);
+ }
+ } else {
+ instructions->push_back(instruction);
+ }
+}
+
+// Returns the bytes read by fusion parameter 'param', by returning the byte
+// size of 'param' shape (or the cumulative byte sizes of all leaf tuple
+// elements if 'param' is tuple-shaped).
+// In the special case where all users of 'param' (or all users of a leaf
+// tuple element if 'param' is tuple-shaped) are Slice instructions, the size
+// of each slice instruction is accumulated instead, to give a more accurate
+// value for bytes read.
+double CalculateBytesReadByFusionParameter(HloInstruction* param) {
+ CHECK_EQ(HloOpcode::kParameter, param->opcode());
+
+ // Adds all leaf tuple elements to 'instructions' if 'param' is tuple-shaped.
+ // Adds 'param' to 'instructions' otherwise.
+ std::vector<HloInstruction*> instructions;
+ MaybeResolveTupleElements(param, &instructions);
+
+ // Iterate through 'instructions' accumulating byte sizes of each instruction
+ // shape. For each 'instruction' in 'instructions', if all users of
+ // 'instruction' are Slice instructions, accumuates the byte sizes of each
+ // Slice for a more accurate estimate of bytes read.
+ double bytes = 0.0;
+ for (auto& instruction : instructions) {
+ if (std::all_of(instruction->users().begin(), instruction->users().end(),
+ [](const HloInstruction* instruction) {
+ return instruction->opcode() == HloOpcode::kSlice ||
+ instruction->opcode() == HloOpcode::kDynamicSlice;
+ })) {
+ // All users are slice: accumulate bytes of all user slice instructions.
+ for (auto& user : instruction->users()) {
+ bytes += ShapeUtil::ByteSizeOf(user->shape());
+ }
+ } else {
+ // Some users are not slice: accumulate full size of 'instruction'.
+ bytes += ShapeUtil::ByteSizeOf(instruction->shape());
+ }
+ }
+ return bytes;
+}
+
+// Returns the bytes read by all fusion parameters of instruction 'fusion'.
+double CalculateBytesReadByFusionInstruction(HloInstruction* fusion) {
+ double bytes = 0.0;
+ for (const auto& fused_instruction : fusion->fused_instructions()) {
+ if (fused_instruction->opcode() != HloOpcode::kParameter) {
+ continue;
+ }
+ bytes += CalculateBytesReadByFusionParameter(fused_instruction.get());
+ }
+ return bytes;
+}
+
+// Returns the flops to bytes transferred ratio of instruction 'fusion'.
+double CalculateFlopsToBytesRatio(HloInstruction* fusion) {
+ CHECK_EQ(HloOpcode::kFusion, fusion->opcode());
+ // Calculate total bytes transferred in/out.
+ double bytes = CalculateBytesReadByFusionInstruction(fusion);
+ // Add bytes written to root instructions buffer.
+ bytes += ShapeUtil::ByteSizeOf(fusion->fused_expression_root()->shape());
+ // Calculate flops for all fused instructions.
+ HloCostAnalysis analysis;
+ TF_CHECK_OK(fusion->fused_expression_root()->Accept(&analysis));
+ // Return flops / bytes.
+ return bytes > 0.0 ? analysis.flop_count() / bytes : analysis.flop_count();
+}
+
+// Returns bytes transferred by instruction 'fusion', including the bytes
+// that would be read by all users.
+double GetCurrentBytesTransferred(HloInstruction* fusion) {
+ CHECK_EQ(HloOpcode::kFusion, fusion->opcode());
+ const double bytes_read = CalculateBytesReadByFusionInstruction(fusion);
+ const double bytes_written =
+ ShapeUtil::ByteSizeOf(fusion->fused_expression_root()->shape());
+ // Current bytes transferred (ignoring non 'fusion' user operands) is bytes
+ // read and written by 'fusion', plus reads of size 'bytes_written' for each
+ // user.
+ return bytes_read + bytes_written * (fusion->user_count() + 1);
+}
+
+// Returns bytes transferred if 'fusion' were to be merged into its users.
+double GetMergedBytesTransferred(HloInstruction* fusion) {
+ CHECK_EQ(HloOpcode::kFusion, fusion->opcode());
+ return CalculateBytesReadByFusionInstruction(fusion) * fusion->user_count();
+}
+
+} // anonymous namespace
+
+// FusionInstructionMerger visits all fusion instructions in 'computation'
+// in post order, attempting to merge each into all of its users.
+// Accumulates and reports stats on successful/failed merge attempts.
+class FusionInstructionMerger {
+ public:
+ explicit FusionInstructionMerger(HloComputation* computation)
+ : computation_(computation) {}
+
+ Status Run();
+
+ bool changed() const { return changed_; }
+
+ private:
+ Status HandleFusion(HloInstruction* fusion);
+
+ HloComputation* computation_;
+ bool changed_ = false;
+
+ // Fusion instruction merge stats.
+ int total_visited_ = 0;
+ int total_merged_ = 0;
+ int num_fail_no_users_ = 0;
+ int num_fail_not_loop_fusion_ = 0;
+ int num_fail_merge_all_users_ = 0;
+ int num_fail_flops_to_byte_ratio_ = 0;
+ int num_fail_net_bytes_transferred_ratio_ = 0;
+
+ TF_DISALLOW_COPY_AND_ASSIGN(FusionInstructionMerger);
+};
+
+Status FusionInstructionMerger::Run() {
+ for (auto* instruction : computation_->MakeInstructionPostOrder()) {
+ if (instruction->opcode() == HloOpcode::kFusion) {
+ TF_RETURN_IF_ERROR(HandleFusion(instruction));
+ }
+ }
+
+ VLOG(1) << "FusionInstructionMerger EXIT"
+ << " computation: " << computation_->name()
+ << " total_visited: " << total_visited_
+ << " total_merged: " << total_merged_ << " merge failures { "
+ << " no_users: " << num_fail_no_users_
+ << " not_loop_fusion: " << num_fail_not_loop_fusion_
+ << " merge_all_users: " << num_fail_merge_all_users_
+ << " flops_to_byte_ratio: " << num_fail_flops_to_byte_ratio_
+ << " net_bytes_transferred: " << num_fail_net_bytes_transferred_ratio_
+ << " }";
+ return Status::OK();
+}
+
+Status FusionInstructionMerger::HandleFusion(HloInstruction* fusion) {
+ VLOG(3) << "FusionInstructionMerger ENTRY fusion: " << fusion->name()
+ << " flops_to_bytes_ratio: " << CalculateFlopsToBytesRatio(fusion);
+ ++total_visited_;
+ // Skip 'fusion' instruction if there are no users into which we can merge.
+ if (fusion->users().empty()) {
+ ++num_fail_no_users_;
+ return Status::OK();
+ }
+
+ // Skip 'fusion' instruction if it is not a loop fusion. Library fusion
+ // instructions match specific patterns, so they shouldn't be further fused.
+ // Input fusion instructions need to be rooted at a particular HLO (e.g.
+ // kReduce), so they shouldn't be further fused either.
+ if (fusion->fusion_kind() != HloInstruction::FusionKind::kLoop) {
+ ++num_fail_not_loop_fusion_;
+ return Status::OK();
+ }
+ // Skip 'fusion' instruction if we cannot merge into all of its users.
+ // Merging into all users enables the removal of 'fusion' from the
+ // computation.
+ if (!std::all_of(fusion->users().begin(), fusion->users().end(),
+ [](const HloInstruction* instruction) {
+ return instruction->opcode() == HloOpcode::kFusion &&
+ instruction->fusion_kind() ==
+ HloInstruction::FusionKind::kLoop;
+ })) {
+ ++num_fail_merge_all_users_;
+ return Status::OK();
+ }
+ // Skip 'fusion' instruction if its flops to bytes transferred ratio
+ // exceeds the threshold value.
+ if (CalculateFlopsToBytesRatio(fusion) >
+ FusionMerger::GetThresholdFlopsToBytesRatio()) {
+ ++num_fail_flops_to_byte_ratio_;
+ return Status::OK();
+ }
+ // Skip 'fusion' instruction if merging it into all users would result in a
+ // net increase in bytes transferred (currently allowing the net bytes
+ // transferred to be exceeded up to ~10% in exhange for eliminating the
+ // overhead from a GPU kernel launch).
+ const double current_bytes_transferred = GetCurrentBytesTransferred(fusion);
+ const double merged_bytes_transferred = GetMergedBytesTransferred(fusion);
+ const double merged_to_current_bytes_ratio =
+ merged_bytes_transferred / std::max(1.0, current_bytes_transferred);
+ if (merged_to_current_bytes_ratio > 1.10) {
+ ++num_fail_net_bytes_transferred_ratio_;
+ return Status::OK();
+ }
+ // Merge fused instructions from 'fusion' into each user.
+ std::set<HloInstruction*> users = fusion->users();
+ for (HloInstruction* user : users) {
+ user->MergeFusionInstruction(fusion);
+ changed_ = true;
+ }
+ ++total_merged_;
+ VLOG(2) << "Merged fusion instruction: " << fusion->name()
+ << " flops_to_bytes_ratio: " << CalculateFlopsToBytesRatio(fusion)
+ << " merged_to_current_bytes_ratio: " << merged_to_current_bytes_ratio
+ << " into users { "
+ << tensorflow::str_util::Join(users, ", ",
+ [](string* out, HloInstruction* user) {
+ tensorflow::strings::StrAppend(
+ out, user->name());
+ })
+ << " }";
+ // Remove 'fusion' instruction.
+ CHECK_EQ(0, fusion->user_count());
+ computation_->RemoveInstruction(fusion);
+ return Status::OK();
+}
+
+StatusOr<bool> FusionMerger::Run(HloModule* module) {
+ bool changed = false;
+ VLOG(2) << "FusionMerger for module: " << module->name();
+ for (auto& computation : module->computations()) {
+ VLOG(1) << "Before running FusionInstructionMerger for computation: "
+ << computation->name();
+ XLA_VLOG_LINES(3, computation->ToString());
+
+ FusionInstructionMerger fusion_merger(computation.get());
+ TF_RETURN_IF_ERROR(fusion_merger.Run());
+ changed |= fusion_merger.changed();
+
+ VLOG(1) << "After running FusionInstructionMerger for computation: "
+ << computation->name() << " changed: " << changed;
+ XLA_VLOG_LINES(3, computation->ToString());
+ }
+ return changed;
+}
+
+} // namespace gpu
+} // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/fusion_merger.h b/tensorflow/compiler/xla/service/gpu/fusion_merger.h
new file mode 100644
index 0000000000..717eb15b85
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/fusion_merger.h
@@ -0,0 +1,47 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_GPU_FUSION_MERGER_H_
+#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_GPU_FUSION_MERGER_H_
+
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_pass.h"
+
+namespace xla {
+namespace gpu {
+
+// An HLO pass that attempts to merge fusion instructions to reduce kernel
+// launch overhead and improve data locality.
+//
+// Fusion instructions are merged into their users if two conditons are met:
+//
+// 1) The flops_to_bytes ratio of the fusion instruction is below the threshold
+// value of 1.0.
+// 2) The result of merging the fusion instruction into its users would not
+// increase bytes transferred.
+//
+class FusionMerger : public HloPass {
+ public:
+ FusionMerger() : HloPass("fusion merger") {}
+
+ StatusOr<bool> Run(HloModule* module) override;
+
+ static double GetThresholdFlopsToBytesRatio() { return 1.0; }
+};
+
+} // namespace gpu
+} // namespace xla
+
+#endif // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_GPU_FUSION_MERGER_H_
diff --git a/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc b/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc
new file mode 100644
index 0000000000..a87e66ca86
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc
@@ -0,0 +1,456 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/gpu/fusion_merger.h"
+
+#include "tensorflow/compiler/xla/service/gpu/instruction_fusion.h"
+#include "tensorflow/compiler/xla/test_helpers.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+
+namespace xla {
+namespace gpu {
+namespace {
+
+class FusionMergerTest : public HloTestBase {
+ protected:
+ FusionMergerTest() : module_(TestName()) {}
+
+ // Builds the following computation:
+ //
+ // Param
+ // / | \
+ // / | \
+ // OnesVec GTE(0) GTE(1) GTE(2)
+ // \ / \ /
+ // Add Add OnesVec
+ // \ / \ /
+ // \ Add Mul OnesVec
+ // \ | | /
+ // \ Mul Add
+ // \ | /
+ // \ | /
+ // Tuple
+ //
+ HloComputation* BuildComputation0() {
+ auto builder = HloComputation::Builder(TestName() + ".Computation0");
+ // Create param instruction to access computation state.
+ auto param = builder.AddInstruction(
+ HloInstruction::CreateParameter(0, tuple_shape3_, "param"));
+
+ // Create GetTupleElement instructions for each tuple element.
+ auto gte0 = builder.AddInstruction(
+ HloInstruction::CreateGetTupleElement(data_shape_, param, 0));
+ auto gte1 = builder.AddInstruction(
+ HloInstruction::CreateGetTupleElement(data_shape_, param, 1));
+ auto gte2 = builder.AddInstruction(
+ HloInstruction::CreateGetTupleElement(data_shape_, param, 2));
+
+ // Create const vector of ones to be used in element-wise computations.
+ auto one_vec = builder.AddInstruction(HloInstruction::CreateConstant(
+ LiteralUtil::CreateR1<float>({1.f, 1.f, 1.f, 1.f})));
+
+ // Create simple fusable computation for tuple element 0 (wont get merged).
+ auto out0 = builder.AddInstruction(HloInstruction::CreateBinary(
+ data_shape_, HloOpcode::kAdd, one_vec, gte0));
+
+ // Create fusable computation which is dependent on second and third tuple
+ // elements (will initially be fused on its own).
+ auto add1 = builder.AddInstruction(
+ HloInstruction::CreateBinary(data_shape_, HloOpcode::kAdd, gte1, gte2));
+
+ // Create two sub-computations, both of which are users of 'add1'.
+
+ // First sub-computation: out1 = Mul(Add(add1, one_vec), one_vec)
+ auto add2 = builder.AddInstruction(HloInstruction::CreateBinary(
+ data_shape_, HloOpcode::kAdd, add1, one_vec));
+ auto out1 = builder.AddInstruction(HloInstruction::CreateBinary(
+ data_shape_, HloOpcode::kMultiply, add2, one_vec));
+
+ // Second sub-computation: out2 = Add(Mul(add1, one_vec), one_vec)
+ auto mul0 = builder.AddInstruction(HloInstruction::CreateBinary(
+ data_shape_, HloOpcode::kMultiply, add1, one_vec));
+ auto out2 = builder.AddInstruction(HloInstruction::CreateBinary(
+ data_shape_, HloOpcode::kAdd, mul0, one_vec));
+
+ // Create output Tuple.
+ builder.AddInstruction(HloInstruction::CreateTuple({out0, out1, out2}));
+ return module_.AddEntryComputation(builder.Build());
+ }
+
+ // Builds the following computation:
+ //
+ // Param
+ // / \
+ // GTE(0) GTE(1)
+ // | | \ /
+ // | | Mul
+ // \ \ |
+ // \ Mul
+ // \ |
+ // OnesVec Mul OnesVec
+ // \ / \ /
+ // OnesVec Add Mul OnesVec
+ // \ | | /
+ // Mul Add
+ // \ /
+ // \ /
+ // Tuple
+ //
+ HloComputation* BuildComputation1() {
+ auto builder = HloComputation::Builder(TestName() + ".Computation1");
+ Shape tuple_shape2_ = ShapeUtil::MakeTupleShape({data_shape_, data_shape_});
+ // Create param instruction to access computation state.
+ auto state = builder.AddInstruction(
+ HloInstruction::CreateParameter(0, tuple_shape2_, "state"));
+
+ // Create shared sub-computation (will initially be fused on its own).
+ auto gte0 = builder.AddInstruction(
+ HloInstruction::CreateGetTupleElement(data_shape_, state, 0));
+ auto gte1 = builder.AddInstruction(
+ HloInstruction::CreateGetTupleElement(data_shape_, state, 2));
+ // Calculate the flops we need to generate for this shared computation
+ // to exceed the threshold flops_to_bytes_ratio.
+ // Note that bytes transferred is multiplied by 3 because there are two
+ // operands and one output of size 'data_shape_'.
+ const int64 flops_needed = FusionMerger::GetThresholdFlopsToBytesRatio() *
+ ShapeUtil::ByteSizeOf(data_shape_) * 3;
+ const int64 vec_elements = ShapeUtil::ElementsIn(data_shape_);
+ const int64 iters = (flops_needed + vec_elements - 1) / vec_elements;
+
+ auto mul0 = builder.AddInstruction(HloInstruction::CreateBinary(
+ data_shape_, HloOpcode::kMultiply, gte0, gte1));
+ for (int i = 0; i < iters; ++i) {
+ mul0 = builder.AddInstruction(HloInstruction::CreateBinary(
+ data_shape_, HloOpcode::kMultiply, gte0, mul0));
+ }
+
+ // Create two sub-computations, both of which are users of 'mul0'.
+ auto one_vec = builder.AddInstruction(HloInstruction::CreateConstant(
+ LiteralUtil::CreateR1<float>({1.f, 1.f, 1.f, 1.f})));
+
+ // First sub-computation: out0 = Mul(Add(mul0, one_vec), one_vec)
+ auto add0 = builder.AddInstruction(HloInstruction::CreateBinary(
+ data_shape_, HloOpcode::kAdd, mul0, one_vec));
+ auto out0 = builder.AddInstruction(HloInstruction::CreateBinary(
+ data_shape_, HloOpcode::kMultiply, add0, one_vec));
+
+ // Second sub-computation: out1 = Add(Mul(mul0, one_vec), one_vec)
+ auto mul1 = builder.AddInstruction(HloInstruction::CreateBinary(
+ data_shape_, HloOpcode::kMultiply, mul0, one_vec));
+ auto out1 = builder.AddInstruction(HloInstruction::CreateBinary(
+ data_shape_, HloOpcode::kAdd, mul1, one_vec));
+
+ // Create output Tuple.
+ builder.AddInstruction(HloInstruction::CreateTuple({out0, out1}));
+ return module_.AddEntryComputation(builder.Build());
+ }
+
+ // Builds the following computation:
+ //
+ // Param
+ // / | | \
+ // / | | \
+ // / | | \
+ // GTE(0) GTE(1) GTE(2) GTE(3)
+ // \ / / /
+ // Add / /
+ // \ / /
+ // Add /
+ // \ /
+ // \ /
+ // OnesVec Add OnesVec
+ // \ / \ /
+ // OnesVec Add Mul OnesVec
+ // \ | | /
+ // Mul Add
+ // \ /
+ // \ /
+ // Tuple
+ //
+ HloComputation* BuildComputation2(bool add_extra_input) {
+ auto builder = HloComputation::Builder(TestName() + ".Computation2");
+ Shape state_shape = add_extra_input ? tuple_shape4_ : tuple_shape3_;
+ // Create param instruction to access computation state.
+ auto state = builder.AddInstruction(
+ HloInstruction::CreateParameter(0, state_shape, "state"));
+
+ // Create GetTupleElement instructions for each tuple element.
+ auto gte0 = builder.AddInstruction(
+ HloInstruction::CreateGetTupleElement(data_shape_, state, 0));
+ auto gte1 = builder.AddInstruction(
+ HloInstruction::CreateGetTupleElement(data_shape_, state, 1));
+ auto gte2 = builder.AddInstruction(
+ HloInstruction::CreateGetTupleElement(data_shape_, state, 2));
+
+ // Create shared fusable computation that reduces its operands.
+ auto reduce0 = builder.AddInstruction(
+ HloInstruction::CreateBinary(data_shape_, HloOpcode::kAdd, gte0, gte1));
+ auto reduce_out = builder.AddInstruction(HloInstruction::CreateBinary(
+ data_shape_, HloOpcode::kAdd, reduce0, gte2));
+ if (add_extra_input) {
+ auto gte3 = builder.AddInstruction(
+ HloInstruction::CreateGetTupleElement(data_shape_, state, 3));
+ reduce_out = builder.AddInstruction(HloInstruction::CreateBinary(
+ data_shape_, HloOpcode::kAdd, reduce_out, gte3));
+ }
+
+ // Create two fusable sub-computations which are dependent on shared
+ // computation 'reduce_out'.
+ auto one_vec = builder.AddInstruction(HloInstruction::CreateConstant(
+ LiteralUtil::CreateR1<float>({1.f, 1.f, 1.f, 1.f})));
+
+ // First sub-computation: out0 = Mul(Add(reduce_out, one_vec), one_vec)
+ auto add2 = builder.AddInstruction(HloInstruction::CreateBinary(
+ data_shape_, HloOpcode::kAdd, reduce_out, one_vec));
+ auto out0 = builder.AddInstruction(HloInstruction::CreateBinary(
+ data_shape_, HloOpcode::kMultiply, add2, one_vec));
+
+ // Second sub-computation: out1 = Add(Mul(reduce_out, one_vec), one_vec)
+ auto mul0 = builder.AddInstruction(HloInstruction::CreateBinary(
+ data_shape_, HloOpcode::kMultiply, reduce_out, one_vec));
+ auto out1 = builder.AddInstruction(HloInstruction::CreateBinary(
+ data_shape_, HloOpcode::kAdd, mul0, one_vec));
+
+ // Create output Tuple.
+ builder.AddInstruction(HloInstruction::CreateTuple({out0, out1}));
+ return module_.AddEntryComputation(builder.Build());
+ }
+
+ Shape data_shape_ = ShapeUtil::MakeShape(F32, {4});
+ Shape tuple_shape2_ = ShapeUtil::MakeTupleShape({data_shape_, data_shape_});
+ Shape tuple_shape3_ =
+ ShapeUtil::MakeTupleShape({data_shape_, data_shape_, data_shape_});
+ Shape tuple_shape4_ = ShapeUtil::MakeTupleShape(
+ {data_shape_, data_shape_, data_shape_, data_shape_});
+
+ HloModule module_;
+};
+
+// Tests that we can merge a fusion instruction that is below threshold.
+//
+// Original computation:
+//
+// Param
+// / | \
+// / | \
+// OnesVec GTE(0) GTE(1) GTE(2)
+// \ / \ /
+// Add Add OnesVec
+// \ / \ /
+// \ Add Mul OnesVec
+// \ | | /
+// \ Mul Add
+// \ | /
+// \ | /
+// Tuple
+//
+// Computation after fusion passes:
+//
+// Param
+// / \
+// Fusion3 Fusion2
+// | / \
+// \ Fusion0 Fusion1
+// \ | /
+// \ | /
+// Tuple
+//
+// Computation after fusion merger pass (Fusion2 is merged into Fusion0 and
+// Fusion1):
+// Param
+// / | \
+// Fusion3 Fusion0 Fusion1
+// \ | /
+// Tuple
+//
+TEST_F(FusionMergerTest, MergeSharedFusionInstruction) {
+ auto computation = BuildComputation0();
+ // Run standard fusion passes.
+ EXPECT_TRUE(
+ GpuInstructionFusion(/*may_duplicate=*/false).Run(&module_).ValueOrDie());
+ EXPECT_FALSE(
+ GpuInstructionFusion(/*may_duplicate=*/true).Run(&module_).ValueOrDie());
+ // Run fusion merger pass, which should merge the shared fusion instruction
+ // into its two users.
+ EXPECT_TRUE(FusionMerger().Run(&module_).ValueOrDie());
+
+ auto* root = computation->root_instruction();
+ EXPECT_EQ(HloOpcode::kTuple, root->opcode());
+ // Check operand 0 (not merged). Should have 4 instructions.
+ auto* operand0 = root->operand(0);
+ EXPECT_EQ(HloOpcode::kFusion, operand0->opcode());
+ EXPECT_EQ(4, operand0->fused_instructions().size());
+ // Check operand 1 (should have merged in its operand fusion instruction).
+ auto* operand1 = root->operand(1);
+ EXPECT_EQ(HloOpcode::kFusion, operand1->opcode());
+ EXPECT_EQ(7, operand1->fused_instructions().size());
+ // Check operand 2 (should have merged in its operand fusion instruction).
+ auto* operand2 = root->operand(2);
+ EXPECT_EQ(HloOpcode::kFusion, operand2->opcode());
+ EXPECT_EQ(7, operand2->fused_instructions().size());
+}
+
+// Tests that we do not merge a fusion instruction that above flops to bytes
+// threshold.
+//
+// Original computation:
+//
+// Param
+// / \
+// GTE(0) GTE(1)
+// | | \ /
+// | | Mul
+// \ \ |
+// \ Mul
+// \ |
+// OnesVec Mul OnesVec
+// \ / \ /
+// OnesVec Add Mul OnesVec
+// \ | | /
+// Mul Add
+// \ /
+// \ /
+// Tuple
+//
+// Computation after fusion passes and fusion merger pass (Fusion2 is not
+// merged because it exceeds the threshold flops to bytes ratio).
+//
+// Param
+// |
+// Fusion2
+// / \
+// Fusion0 Fusion1
+// \ /
+// Tuple
+//
+TEST_F(FusionMergerTest, FlopsToBytesRatioThresholdExceeded) {
+ BuildComputation1();
+ // Run standard fusion passes.
+ EXPECT_TRUE(
+ GpuInstructionFusion(/*may_duplicate=*/false).Run(&module_).ValueOrDie());
+ EXPECT_FALSE(
+ GpuInstructionFusion(/*may_duplicate=*/true).Run(&module_).ValueOrDie());
+ // Run fusion merger pass, which should detect that the flops/bytes of the
+ // shared fusion instruction exceeds the threshold ratio, and therefore
+ // cannot be merged with other fusion instructions.
+ EXPECT_FALSE(FusionMerger().Run(&module_).ValueOrDie());
+}
+
+// Tests that threshold for bytes transferred if merged is exceeded.
+//
+// Original computation:
+//
+// Param
+// / | | \
+// / | | \
+// / | | \
+// GTE(0) GTE(1) GTE(2) GTE(3)
+// \ / / /
+// Add / /
+// \ / /
+// Add /
+// \ /
+// \ /
+// OnesVec Add OnesVec
+// \ / \ /
+// OnesVec Add Mul OnesVec
+// \ | | /
+// Mul Add
+// \ /
+// \ /
+// Tuple
+//
+// Computation after fusion passes and fusion merger pass. Fusion2 is not
+// merged because it exceeds the threshold bytes transferred. This is because
+// the bytes read by Fusion2 (when replicated if the instruction is merged
+// into Fusion0 and Fusion1) would exceed the bytes transferred threshold.
+//
+// Param
+// |
+// Fusion2
+// / \
+// Fusion0 Fusion1
+// \ /
+// Tuple
+//
+TEST_F(FusionMergerTest, BytesTransferredThresholdExeceeded) {
+ BuildComputation2(/*add_extra_input=*/true);
+ // Run standard fusion passes.
+ EXPECT_TRUE(
+ GpuInstructionFusion(/*may_duplicate=*/false).Run(&module_).ValueOrDie());
+ EXPECT_FALSE(
+ GpuInstructionFusion(/*may_duplicate=*/true).Run(&module_).ValueOrDie());
+ // Run fusion merger pass, which should detect that the net bytes transferred
+ // (if merged) would increase.
+ EXPECT_FALSE(FusionMerger().Run(&module_).ValueOrDie());
+}
+
+// Tests that threshold for bytes transferred if merged is not exceeded.
+//
+// Original computation:
+//
+// Param
+// / | \
+// / | \
+// / | \
+// GTE(0) GTE(1) GTE(2)
+// \ / /
+// Add /
+// \ /
+// OnesVec Add OnesVec
+// \ / \ /
+// OnesVec Add Mul OnesVec
+// \ / \ /
+// Mul Add
+// \ /
+// \ /
+// Tuple
+//
+// Computation after fusion passes:
+//
+// Param
+// |
+// Fusion2
+// / \
+// Fusion0 Fusion1
+// \ /
+// Tuple
+//
+// Computation after fusion merger pass (Fusion2 is merged into Fusion0 and
+// Fusion1, because bytes read from Param by Fusion2 is reduced for this test
+// which makes the merge operation into its operand below the bytes
+// transferred threshold.
+//
+// Param
+// / \
+// Fusion0 Fusion1
+// \ /
+// Tuple
+//
+TEST_F(FusionMergerTest, BytesTransferredThresholdNotExeceeded) {
+ BuildComputation2(/*add_extra_input=*/false);
+ // Run standard fusion passes.
+ EXPECT_TRUE(
+ GpuInstructionFusion(/*may_duplicate=*/false).Run(&module_).ValueOrDie());
+ EXPECT_FALSE(
+ GpuInstructionFusion(/*may_duplicate=*/true).Run(&module_).ValueOrDie());
+ // Run fusion merger pass, which should detect that the net bytes transferred
+ // (if merged) would not increase.
+ EXPECT_TRUE(FusionMerger().Run(&module_).ValueOrDie());
+}
+
+} // namespace
+} // namespace gpu
+} // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
index 2f95446e6c..b5d7ba48d2 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
@@ -30,6 +30,7 @@ limitations under the License.
#include "tensorflow/compiler/xla/service/buffer_liveness.h"
#include "tensorflow/compiler/xla/service/gpu/convolution_folding.h"
#include "tensorflow/compiler/xla/service/gpu/copy_insertion.h"
+#include "tensorflow/compiler/xla/service/gpu/fusion_merger.h"
#include "tensorflow/compiler/xla/service/gpu/gpu_executable.h"
#include "tensorflow/compiler/xla/service/gpu/hlo_schedule.h"
#include "tensorflow/compiler/xla/service/gpu/instruction_fusion.h"
@@ -132,6 +133,7 @@ tensorflow::Status OptimizeHloModule(HloModule* hlo_module,
HloPassFix<HloPassPipeline> fusion("fusion", dump_hlo);
fusion.AddPass<GpuInstructionFusion>(/*may_duplicate=*/false);
fusion.AddPass<GpuInstructionFusion>(/*may_duplicate=*/true);
+ fusion.AddPass<FusionMerger>();
return fusion.Run(hlo_module).status();
}
}
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 7ae0a995af..48be0bd2c0 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -420,6 +420,37 @@ HloInstruction::CreateFusionForBackwardConvolution(
return fusion;
}
+void HloInstruction::MergeFusionInstruction(
+ HloInstruction* instruction_to_merge) {
+ CHECK_EQ(opcode_, HloOpcode::kFusion);
+ CHECK_EQ(instruction_to_merge->opcode(), HloOpcode::kFusion);
+ // Clone the instruction from which to merge fused instructions.
+ std::unique_ptr<HloInstruction> clone = instruction_to_merge->Clone();
+ // Replace uses of fused parameters with the corresponding operand of the
+ // fusion.
+ // Add all non-parameter fused instructions to 'unfused_instructions' to be
+ // merged into 'this'.
+ std::vector<HloInstruction*> unfused_instructions;
+ for (auto& fused_instruction : clone->fused_instructions()) {
+ if (fused_instruction->opcode() == HloOpcode::kParameter) {
+ fused_instruction->ReplaceAllUsesWith(
+ clone->mutable_operand(fused_instruction->parameter_number()));
+ } else {
+ unfused_instructions.push_back(fused_instruction.get());
+ }
+ }
+ CHECK(unfused_instructions.front() == clone->fused_expression_root());
+ // Replace instruction_to_merge use of 'this' with unfused_root.
+ instruction_to_merge->ReplaceUseWith(this, unfused_instructions.front());
+ // Fuse 'unfused_instructions' into 'this'.
+ for (auto& instruction : unfused_instructions) {
+ FuseInstruction(instruction);
+ instruction->DetachFromOperands();
+ }
+ CHECK_EQ(0, clone->user_count());
+ clone->DetachFromOperands();
+}
+
HloInstruction* HloInstruction::FuseInstruction(
HloInstruction* instruction_to_fuse) {
CHECK_EQ(opcode_, HloOpcode::kFusion);
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 8e7a253578..ecf29a476d 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -79,11 +79,6 @@ class HloInstruction {
const Shape& shape, RandomDistribution distribution,
tensorflow::gtl::ArraySlice<HloInstruction*> parameters);
- // Creates an n-ary elementwise operation.
- static std::unique_ptr<HloInstruction> CreateNary(
- const Shape& shape, HloOpcode opcode,
- tensorflow::gtl::ArraySlice<HloInstruction*> operands);
-
// Creates a unary instruction (one operand).
// Precondition: opcode must be a legitimate unary operation.
static std::unique_ptr<HloInstruction> CreateUnary(const Shape& shape,
@@ -492,6 +487,13 @@ class HloInstruction {
return fusion_kind_;
}
+ // Merges the fused instructions from 'instruction_to_merge' into the
+ // fused instruction set of 'this', updating operands as necessary.
+ //
+ // Precondition: opcode() == HloOpcode::kFusion
+ // Predondition: 'instruction_to_merge' must be an operand of 'this'.
+ void MergeFusionInstruction(HloInstruction* instruction_to_merge);
+
// Fuses the given instruction in this fusion instruction. instruction_to_fuse
// is cloned and the clone is placed in the fusion
// instruction. instruction_to_fuse is unchanged. Instruction is cloned rather
@@ -636,6 +638,11 @@ class HloInstruction {
private:
enum class UseKind { kNoUse, kReuse, kUsePermutingElements, kUse };
+ // Creates an n-ary elementwise operation.
+ static std::unique_ptr<HloInstruction> CreateNary(
+ const Shape& shape, HloOpcode opcode,
+ tensorflow::gtl::ArraySlice<HloInstruction*> operands);
+
// Appends operand to the list of operands and adds this instruction as a user
// of the operand.
void AppendOperand(HloInstruction* operand);
diff --git a/tensorflow/compiler/xla/service/local_service.cc b/tensorflow/compiler/xla/service/local_service.cc
index 7f86a3cbb5..30bf450c5b 100644
--- a/tensorflow/compiler/xla/service/local_service.cc
+++ b/tensorflow/compiler/xla/service/local_service.cc
@@ -247,10 +247,9 @@ LocalService::CompileAheadOfTime(
*instance.result_layout));
}
- return execute_backend_->compiler()
- ->CompileAheadOfTime(std::move(hlo_modules), std::move(module_configs),
- MakeHloDumper(), options)
- .ConsumeValueOrDie();
+ return execute_backend_->compiler()->CompileAheadOfTime(
+ std::move(hlo_modules), std::move(module_configs), MakeHloDumper(),
+ options);
}
tensorflow::Status LocalService::ValidateExecuteOptions(
diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc
index ab2c43cd3d..6626fe5af8 100644
--- a/tensorflow/compiler/xla/shape_util.cc
+++ b/tensorflow/compiler/xla/shape_util.cc
@@ -37,32 +37,62 @@ limitations under the License.
namespace xla {
-/* static */ bool ShapeUtil::CompareShapes(const Shape& lhs, const Shape& rhs,
- bool compare_layouts) {
- if (IsTuple(lhs)) {
- return IsTuple(rhs) &&
- ContainersEqual(lhs.tuple_shapes(), rhs.tuple_shapes(),
- [=](const Shape& l, const Shape& r) {
- return CompareShapes(l, r, compare_layouts);
- });
+namespace {
+
+// Recursive helper for comparing the equality of two shapes. Returns true if
+// the shapes are the same. If compare_layouts is true, then layouts must also
+// match.
+bool CompareShapes(const Shape& lhs, const Shape& rhs, bool compare_layouts) {
+ if (ShapeUtil::IsTuple(lhs)) {
+ if (!ShapeUtil::IsTuple(rhs)) {
+ VLOG(3) << "CompareShapes: lhs is a tuple, rhs not a tuple";
+ return false;
+ }
+
+ if (!ContainersEqual(lhs.tuple_shapes(), rhs.tuple_shapes(),
+ [=](const Shape& l, const Shape& r) {
+ return CompareShapes(l, r, compare_layouts);
+ })) {
+ VLOG(3) << "CompareShapes: tuples on lhs and rhs not equal";
+ return false;
+ }
}
// Explicitly compare the fields rather than using MessageDifferencer because
// we want empty layouts to be treated identically to missing layouts.
- if (compare_layouts &&
- (!ContainersEqual(lhs.layout().minor_to_major(),
- rhs.layout().minor_to_major()) ||
- !ContainersEqual(lhs.layout().padded_dimensions(),
- rhs.layout().padded_dimensions()) ||
- lhs.layout().padding_value() != rhs.layout().padding_value())) {
+ if (compare_layouts) {
+ if (!ContainersEqual(lhs.layout().minor_to_major(),
+ rhs.layout().minor_to_major())) {
+ VLOG(3) << "CompareShapes: lhs layout != rhs layout";
+ return false;
+ }
+ if (!ContainersEqual(lhs.layout().padded_dimensions(),
+ rhs.layout().padded_dimensions())) {
+ VLOG(3)
+ << "CompareShapes: lhs padded_dimensions != rhs padded_dimensions";
+ return false;
+ }
+ if (lhs.layout().padding_value() != rhs.layout().padding_value()) {
+ VLOG(3) << "CompareShapes: lhs padding value != rhs padding_value";
+ return false;
+ }
+ }
+
+ if (!ShapeUtil::SameDimensions(lhs, rhs)) {
+ VLOG(3) << "CompareShapes: lhs dimensions != rhs dimensions";
return false;
}
- return SameDimensions(lhs, rhs) && SameElementType(lhs, rhs);
+ if (!ShapeUtil::SameElementType(lhs, rhs)) {
+ VLOG(3) << "CompareShapes: lhs element type != rhs element type";
+ return false;
+ }
+ return true;
}
+} // namespace
+
/* static */ bool ShapeUtil::Equal(const Shape& lhs, const Shape& rhs) {
bool equal = CompareShapes(lhs, rhs, /*compare_layouts=*/true);
if (!equal && VLOG_IS_ON(3)) {
- // TODO(jeff): Maybe print more info about where lhs and rhs differ
VLOG(3) << "ShapeUtil::Equal differ: lhs = " << lhs.ShortDebugString()
<< ", rhs = " << rhs.ShortDebugString();
}
diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h
index fa5fcc0224..963a3e4805 100644
--- a/tensorflow/compiler/xla/shape_util.h
+++ b/tensorflow/compiler/xla/shape_util.h
@@ -388,12 +388,6 @@ class ShapeUtil {
Shape shape);
private:
- // Recursive helper for comparing the equality of two shapes. Returns true if
- // the shapes are the same. If compare_layouts is true, then layouts must also
- // match.
- static bool CompareShapes(const Shape& lhs, const Shape& rhs,
- bool compare_layouts);
-
// Validates all of the non-layout properties of the shape -- this is a helper
// used by both the layout-optional and layout-required public method.
static Status ValidateShapeWithOptionalLayoutInternal(const Shape& shape);
diff --git a/tensorflow/compiler/xla/shape_util_test.cc b/tensorflow/compiler/xla/shape_util_test.cc
index 4e8a496e7e..fb2f8fb284 100644
--- a/tensorflow/compiler/xla/shape_util_test.cc
+++ b/tensorflow/compiler/xla/shape_util_test.cc
@@ -150,6 +150,26 @@ TEST(ShapeUtilTest, EmptyLayoutEqualsMissingLayout) {
EXPECT_TRUE(ShapeUtil::Equal(scalar1, scalar2));
}
+TEST(ShapeUtilTest, CompareShapesWithPaddedDimensionsMismatch) {
+ Shape shape1 = ShapeUtil::MakeShape(F32, {20, 30});
+ shape1.mutable_layout()->add_padded_dimensions(10);
+
+ Shape shape2 = ShapeUtil::MakeShape(F32, {20, 30});
+ shape2.mutable_layout()->add_padded_dimensions(11);
+
+ EXPECT_FALSE(ShapeUtil::Equal(shape1, shape2));
+}
+
+TEST(ShapeUtilTest, CompareShapesWithPaddingValueMismatch) {
+ Shape shape1 = ShapeUtil::MakeShape(F32, {20, 30});
+ shape1.mutable_layout()->set_padding_value(ZERO_PAD);
+
+ Shape shape2 = ShapeUtil::MakeShape(F32, {20, 30});
+ shape2.mutable_layout()->set_padding_value(LOWEST_PAD);
+
+ EXPECT_FALSE(ShapeUtil::Equal(shape1, shape2));
+}
+
TEST(ShapeUtilTest, ScalarUnpopulatedLayoutEqualsScalarLayout) {
Shape scalar_unpopulated = ShapeUtil::MakeShape(F32, {});
scalar_unpopulated.clear_layout();
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/categorical_test.py b/tensorflow/contrib/distributions/python/kernel_tests/categorical_test.py
index 81fbf2a6ef..f378966562 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/categorical_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/categorical_test.py
@@ -26,7 +26,9 @@ from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import tensor_util
from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gradients_impl
from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import random_ops
from tensorflow.python.platform import test
@@ -146,6 +148,32 @@ class CategoricalTest(test.TestCase):
-(0.6 * np.log(0.6) + 0.4 * np.log(0.4))
])
+ def testEntropyGradient(self):
+ with self.test_session() as sess:
+ logits = constant_op.constant([[1., 2., 3.], [2., 5., 1.]])
+
+ probabilities = nn_ops.softmax(logits)
+ log_probabilities = nn_ops.log_softmax(logits)
+ true_entropy = - math_ops.reduce_sum(
+ probabilities * log_probabilities, axis=-1)
+
+ categorical_distribution = categorical.Categorical(p=probabilities)
+ categorical_entropy = categorical_distribution.entropy()
+
+ # works
+ true_entropy_g = gradients_impl.gradients(true_entropy, [logits])
+ categorical_entropy_g = gradients_impl.gradients(
+ categorical_entropy, [logits])
+
+ res = sess.run({"true_entropy": true_entropy,
+ "categorical_entropy": categorical_entropy,
+ "true_entropy_g": true_entropy_g,
+ "categorical_entropy_g": categorical_entropy_g})
+ self.assertAllClose(res["true_entropy"],
+ res["categorical_entropy"])
+ self.assertAllClose(res["true_entropy_g"],
+ res["categorical_entropy_g"])
+
def testSample(self):
with self.test_session():
histograms = [[[0.2, 0.8], [0.4, 0.6]]]
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/distribution_util_test.py b/tensorflow/contrib/distributions/python/kernel_tests/distribution_util_test.py
index 57c873f59e..0181ded643 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/distribution_util_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/distribution_util_test.py
@@ -569,10 +569,11 @@ class SoftplusTest(test.TestCase):
def testInverseSoftplusGradientNeverNan(self):
with self.test_session():
# Note that this range contains both zero and inf.
- x = constant_op.constant((10.**np.arange(-8, 6)).astype(np.float16))
- y = distribution_util.softplus_inverse(x).eval()
+ x = constant_op.constant(np.logspace(-8, 6).astype(np.float16))
+ y = distribution_util.softplus_inverse(x)
+ grads = gradients_impl.gradients(y, x)[0].eval()
# Equivalent to `assertAllFalse` (if it existed).
- self.assertAllEqual(np.zeros_like(y).astype(np.bool), np.isnan(y))
+ self.assertAllEqual(np.zeros_like(grads).astype(np.bool), np.isnan(grads))
if __name__ == "__main__":
test.main()
diff --git a/tensorflow/contrib/distributions/python/ops/bijector.py b/tensorflow/contrib/distributions/python/ops/bijector.py
index 7e92f49677..41a4f9d859 100644
--- a/tensorflow/contrib/distributions/python/ops/bijector.py
+++ b/tensorflow/contrib/distributions/python/ops/bijector.py
@@ -1977,7 +1977,7 @@ class AffineLinearOperator(Bijector):
if scale.tensor_rank is not None:
batch_ndims = scale.tensor_rank - 2
else:
- batch_ndims = scale.tensor_rank_dynamic() - 2
+ batch_ndims = scale.tensor_rank_tensor() - 2
graph_parents += [batch_ndims]
else:
batch_ndims = 0 # We won't need shape inference when scale is None.
diff --git a/tensorflow/contrib/distributions/python/ops/categorical.py b/tensorflow/contrib/distributions/python/ops/categorical.py
index feca611d00..9573e89237 100644
--- a/tensorflow/contrib/distributions/python/ops/categorical.py
+++ b/tensorflow/contrib/distributions/python/ops/categorical.py
@@ -209,17 +209,8 @@ class Categorical(distribution.Distribution):
return math_ops.exp(self._log_prob(k))
def _entropy(self):
- if self.logits.get_shape().ndims == 2:
- logits_2d = self.logits
- else:
- logits_2d = array_ops.reshape(self.logits, [-1, self.num_classes])
- histogram_2d = nn_ops.softmax(logits_2d)
- ret = array_ops.reshape(
- nn_ops.softmax_cross_entropy_with_logits(labels=histogram_2d,
- logits=logits_2d),
- self.batch_shape())
- ret.set_shape(self.get_batch_shape())
- return ret
+ return -math_ops.reduce_sum(
+ nn_ops.log_softmax(self.logits) * self.p, axis=-1)
def _mode(self):
ret = math_ops.argmax(self.logits, dimension=self._batch_rank)
@@ -245,5 +236,6 @@ def _kl_categorical_categorical(a, b, name=None):
name, "kl_categorical_categorical", [a.logits, b.logits]):
# sum(p*ln(p/q))
return math_ops.reduce_sum(
- nn_ops.softmax(a.logits)*(nn_ops.log_softmax(a.logits)
- - nn_ops.log_softmax(b.logits)), reduction_indices=[-1])
+ nn_ops.softmax(a.logits) * (
+ nn_ops.log_softmax(a.logits) - nn_ops.log_softmax(b.logits)),
+ axis=-1)
diff --git a/tensorflow/contrib/graph_editor/transform.py b/tensorflow/contrib/graph_editor/transform.py
index 6fb347c834..832698b8a0 100644
--- a/tensorflow/contrib/graph_editor/transform.py
+++ b/tensorflow/contrib/graph_editor/transform.py
@@ -26,13 +26,13 @@ from six import iteritems
from six import iterkeys
from six import string_types
from six import StringIO
-
from tensorflow.contrib.graph_editor import edit
from tensorflow.contrib.graph_editor import reroute
from tensorflow.contrib.graph_editor import select
from tensorflow.contrib.graph_editor import subgraph
from tensorflow.contrib.graph_editor import util
from tensorflow.python.framework import ops as tf_ops
+from tensorflow.python.platform import tf_logging as logging
__all__ = [
"replace_t_with_placeholder_handler",
@@ -87,17 +87,24 @@ def keep_t_if_possible_handler(info, t):
def assign_renamed_collections_handler(info, elem, elem_):
"""Add the transformed elem to the (renamed) collections of elem.
+ A collection is renamed only if is not a known key, as described in
+ `tf.GraphKeys`.
+
Args:
info: Transform._Info instance.
elem: the original element (`tf.Tensor` or `tf.Operation`)
elem_: the transformed element
"""
- # TODO(fkp): handle known special cases
+ known_collection_names = util.get_predefined_collection_names()
for name, collection in iteritems(info.collections):
if elem not in collection:
continue
- collection_name_ = info.transformer.new_name(name)
- info.graph_.add_to_collection(collection_name_, elem_)
+
+ if name in known_collection_names:
+ transformed_name = name
+ else:
+ transformed_name = info.transformer.new_name(name)
+ info.graph_.add_to_collection(transformed_name, elem_)
def transform_op_if_inside_handler(info, op, keep_if_possible=True):
@@ -150,6 +157,11 @@ def copy_op_handler(info, op, copy_shape=True):
# Transform inputs:
inputs_ = [info.transformer._transform_t(t) for t in op.inputs]
+ # Leave inputs empty if a graph cycle was found.
+ if None in inputs_:
+ info.cyclic_ops.append(op)
+ inputs_ = []
+
# Clone the node def:
node_def_ = deepcopy(op._node_def)
@@ -239,7 +251,7 @@ class Transformer(object):
self.transformed_ts = {}
self.collections = dict((key, self.graph.get_collection(key))
for key in self.graph.get_all_collection_keys())
-
+ self.cyclic_ops = []
class ResultInfo(object):
""""Contains information about the result of a transform operation."""
@@ -452,6 +464,17 @@ class Transformer(object):
for op in remaining_roots:
self._transform_op(op)
+ # Finalize cyclic ops:
+ for op in self._info.cyclic_ops:
+ logging.debug("Finalizing cyclic op: %s", op.name)
+ op_ = self._info.transformed_ops[op]
+ inputs_ = [self._info.transformed_ts[t] for t in op.inputs]
+ if None in inputs_:
+ raise ValueError("Could not find all the inputs of cyclic op: {}"
+ .format(op_.name))
+ for input_id, t_ in enumerate(inputs_):
+ op_._update_input(input_id, t_) # pylint: disable=protected-access
+
sgv_ = self._transform_sgv(sgv)
res_info = Transformer.ResultInfo(self._info)
@@ -506,9 +529,13 @@ class Transformer(object):
Returns:
The transformed tensor.
"""
+ logging.debug("Transforming tensor: %s", t.name)
if t in self._info.transformed_ts:
return self._info.transformed_ts[t]
+ # Mark as None to detect cycle.
+ self._info.transformed_ts[t] = None
+
op, op_index = t.op, t.value_index
# If op is not in the subgraph:
diff --git a/tensorflow/contrib/graph_editor/util.py b/tensorflow/contrib/graph_editor/util.py
index 11ee2435c9..d8824f6792 100644
--- a/tensorflow/contrib/graph_editor/util.py
+++ b/tensorflow/contrib/graph_editor/util.py
@@ -20,6 +20,7 @@ from __future__ import division
from __future__ import print_function
import collections
+import re
from six import iteritems
from tensorflow.python.framework import ops as tf_ops
from tensorflow.python.ops import array_ops as tf_array_ops
@@ -465,3 +466,75 @@ def make_placeholder_from_dtype_and_shape(dtype, shape=None, scope=None):
"""
return tf_array_ops.placeholder(
dtype=dtype, shape=shape, name=placeholder_name(scope=scope))
+
+
+_INTERNAL_VARIABLE_RE = re.compile(r"^__\w+__$")
+
+
+def get_predefined_collection_names():
+ """Return all the predefined collection names."""
+ return [getattr(tf_ops.GraphKeys, key) for key in dir(tf_ops.GraphKeys)
+ if not _INTERNAL_VARIABLE_RE.match(key)]
+
+
+def find_corresponding_elem(target, dst_graph, dst_scope="", src_scope=""):
+ """Find corresponding op/tensor in a different graph.
+
+ Args:
+ target: A `tf.Tensor` or a `tf.Operation` belonging to the original graph.
+ dst_graph: The graph in which the corresponding graph element must be found.
+ dst_scope: A scope which is prepended to the name to look for.
+ src_scope: A scope which is removed from the original of `target` name.
+
+ Returns:
+ The corresponding tf.Tensor` or a `tf.Operation`.
+
+ Raises:
+ ValueError: if `src_name` does not start with `src_scope`.
+ TypeError: if `target` is not a `tf.Tensor` or a `tf.Operation`
+ KeyError: If the corresponding graph element cannot be found.
+ """
+ src_name = target.name
+ if src_scope:
+ src_scope = scope_finalize(src_scope)
+ if not src_name.startswidth(src_scope):
+ raise ValueError("{} does not start with {}".format(src_name, src_scope))
+ src_name = src_name[len(src_scope):]
+
+ dst_name = src_name
+ if dst_scope:
+ dst_scope = scope_finalize(dst_scope)
+ dst_name = dst_scope + dst_name
+
+ if isinstance(target, tf_ops.Tensor):
+ return dst_graph.get_tensor_by_name(dst_name)
+ if isinstance(target, tf_ops.Operation):
+ return dst_graph.get_operation_by_name(dst_name)
+ raise TypeError("Expected tf.Tensor or tf.Operation, got: {}", type(target))
+
+
+def find_corresponding(targets, dst_graph, dst_scope="", src_scope=""):
+ """Find corresponding ops/tensors in a different graph.
+
+ `targets` is a Python tree, that is, a nested structure of iterable
+ (list, tupple, dictionary) whose leaves are instances of
+ `tf.Tensor` or `tf.Operation`
+
+ Args:
+ targets: A Python tree containing `tf.Tensor` or `tf.Operation`
+ belonging to the original graph.
+ dst_graph: The graph in which the corresponding graph element must be found.
+ dst_scope: A scope which is prepended to the name to look for.
+ src_scope: A scope which is removed from the original of `top` name.
+
+ Returns:
+ A Python tree containin the corresponding tf.Tensor` or a `tf.Operation`.
+
+ Raises:
+ ValueError: if `src_name` does not start with `src_scope`.
+ TypeError: if `top` is not a `tf.Tensor` or a `tf.Operation`
+ KeyError: If the corresponding graph element cannot be found.
+ """
+ def func(top):
+ return find_corresponding_elem(top, dst_graph, dst_scope, src_scope)
+ return transform_tree(targets, func)
diff --git a/tensorflow/contrib/hvx/hexagon_controller/Makefile b/tensorflow/contrib/hvx/hexagon_controller/Makefile
new file mode 100644
index 0000000000..9fe2ed596a
--- /dev/null
+++ b/tensorflow/contrib/hvx/hexagon_controller/Makefile
@@ -0,0 +1,19 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+include glue/defines.min
+
+include target/make/android.min
+
+include $(RULES_MIN)
diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_dummy_data/inception_v1_graph_init.c b/tensorflow/contrib/hvx/hexagon_controller/src_dummy_data/inception_v1_graph_init.c
new file mode 100644
index 0000000000..3ca5532c38
--- /dev/null
+++ b/tensorflow/contrib/hvx/hexagon_controller/src_dummy_data/inception_v1_graph_init.c
@@ -0,0 +1,16 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+void init_graph_v1(int nn_id) {}
diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_dummy_data/inception_v3_dummy_float_data.c b/tensorflow/contrib/hvx/hexagon_controller/src_dummy_data/inception_v3_dummy_float_data.c
new file mode 100644
index 0000000000..dc61ae754a
--- /dev/null
+++ b/tensorflow/contrib/hvx/hexagon_controller/src_dummy_data/inception_v3_dummy_float_data.c
@@ -0,0 +1,16 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+float inception_dummy_float_data_299x299[299*299*3] = {};
diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_dummy_data/inception_v3_dummy_int_data.c b/tensorflow/contrib/hvx/hexagon_controller/src_dummy_data/inception_v3_dummy_int_data.c
new file mode 100644
index 0000000000..27e1ca40b9
--- /dev/null
+++ b/tensorflow/contrib/hvx/hexagon_controller/src_dummy_data/inception_v3_dummy_int_data.c
@@ -0,0 +1,17 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <stdint.h>
+uint8_t inception_dummy_int_data_299x299[299*299*3] = {};
diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_dummy_data/inception_v3_graph_init.c b/tensorflow/contrib/hvx/hexagon_controller/src_dummy_data/inception_v3_graph_init.c
new file mode 100644
index 0000000000..9def665827
--- /dev/null
+++ b/tensorflow/contrib/hvx/hexagon_controller/src_dummy_data/inception_v3_graph_init.c
@@ -0,0 +1,16 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+void init_graph(int nn_id) {}
diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_impl/graph_functions_wrapper.c b/tensorflow/contrib/hvx/hexagon_controller/src_impl/graph_functions_wrapper.c
new file mode 100644
index 0000000000..567485b035
--- /dev/null
+++ b/tensorflow/contrib/hvx/hexagon_controller/src_impl/graph_functions_wrapper.c
@@ -0,0 +1,355 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// to demonstrate the performance difference between ION and HLOS memory
+// for sharing with ADSP.
+#define USE_ION_MEMORY
+
+#include <limits.h>
+#include <stdio.h>
+
+#include "hexagon_controller.h"
+#include "hexagon_nn.h"
+#include "tfm_log.h"
+
+static const uint32_t MAX_NODES = 2048;
+static const uint32_t MAX_EVENT_COUNT = 256;
+
+static const bool DUMP_OUTPUT = false;
+static const bool DBG_EXECUTION = true;
+
+static const int OUT_RANKING_SIZE = 5;
+
+// static only for this file.
+// TODO(satok): allocate dynamically
+static float s_output_values[300 * 300 * 3 * 4];
+
+extern void init_graph(uint32_t id);
+extern void init_graph_v1(uint32_t id);
+extern uint8_t inception_dummy_int_data_299x299[];
+extern uint8_t inception_sample_int_data_224x224[];
+extern float inception_dummy_float_data_299x299_299x299[];
+
+enum InceptionVersion {
+ INCEPTION_V1,
+ INCEPTION_V3,
+};
+
+static enum InceptionVersion s_inception_version = INCEPTION_V3;
+
+/////////////////////////////////////////////////
+// file local functions
+
+static const char *ConvertGraphInfoIdToName(unsigned int id) {
+ // TODO(satok): implement
+ return "?";
+}
+
+static const char *ConvertGraphInfoIdToOpName(unsigned int id) {
+ // TODO(satok): implement
+ return "?";
+}
+
+/////////////////////////////////////////////////
+// file local utilities
+static uint32_t FindMaxIdxWithExcludeList(
+ const float *data, uint32_t entries, const int exclude_size,
+ const int* exclude_idx) {
+ int i;
+ float maxval = data[0];
+ int maxidx = 0;
+ for (i = 0; i < entries; i++) {
+ bool exclude = false;
+ for (int j = 0; j < exclude_size; ++j) {
+ if (exclude_idx[j] == i) {
+ exclude = true;
+ break;
+ }
+ }
+ if (exclude) {
+ continue;
+ }
+ if (maxval < data[i]) {
+ maxval = data[i];
+ maxidx = i;
+ }
+ }
+ return maxidx;
+}
+
+static uint32_t FindMaxIdx(const float* data, uint32_t entries) {
+ return FindMaxIdxWithExcludeList(data, entries, 0, NULL);
+}
+
+void hexagon_controller_PrintMaxNIdx(const float *data, const uint32_t entries,
+ const int n, int* out_ranking) {
+ if (DUMP_OUTPUT) {
+ for (int i = 0; i < entries; ++i) {
+ TFMLOGD("%d: val = %f", i, data[i]);
+ }
+ }
+ for (int i = 0; i < n; ++i) {
+ out_ranking[i] = INT_MAX;
+ }
+ for (int i = 0; i < n; ++i) {
+ out_ranking[i] = FindMaxIdxWithExcludeList(data, entries, n, out_ranking);
+ }
+ TFMLOGD("=== RANKING ===");
+ for (int i = 0; i < n; ++i) {
+ TFMLOGD("%d: id = %d, val = %f", i, out_ranking[i], data[out_ranking[i]]);
+ }
+}
+
+static inline unsigned long long int GetCounter(hexagon_nn_perfinfo s) {
+ unsigned long long int ret;
+ ret = s.counter_hi;
+ ret <<= 32;
+ ret |= s.counter_lo;
+ return ret;
+}
+
+static int CompareCycle(const void *va, const void *vb) {
+ const hexagon_nn_perfinfo *a = va;
+ const hexagon_nn_perfinfo *b = vb;
+ unsigned long long int acount = GetCounter(*a);
+ unsigned long long int bcount = GetCounter(*b);
+ if (acount < bcount) {
+ return -1;
+ } else if (acount > bcount) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+/////////////////////////////////////////////////
+// Graph functions
+
+uint32_t hexagon_controller_InstantiateGraph() {
+ const uint32_t nn_id = hexagon_nn_init();
+ // set debug level to 99 for now
+ //hexagon_nn_set_debug_level(nn_id, 99);
+ // TODO(satok): make this as argument
+ hexagon_nn_set_debug_level(nn_id, 0);
+ return nn_id;
+}
+
+void hexagon_controller_InitGraph(int version, uint32_t nn_id) {
+ if (version == 1) {
+ s_inception_version = INCEPTION_V1;
+ } else if (version == 3) {
+ s_inception_version = INCEPTION_V3;
+ } else {
+ TFMLOGE("Unsupported inception version %d", version);
+ return;
+ }
+ if (s_inception_version == INCEPTION_V3) {
+ init_graph(nn_id);
+ } else if (s_inception_version == INCEPTION_V1) {
+ init_graph_v1(nn_id);
+ }
+ TFMLOGD("Init graph (inception version = %d) done.", version);
+}
+
+bool hexagon_controller_ConstructGraph(uint32_t nn_id) {
+ int err;
+ if ((err = hexagon_nn_prepare(nn_id)) != 0) {
+ TFMLOGE("Prepare failed! returned 0x%x\n", err);
+ return false;
+ } else {
+ TFMLOGD("Prepare success!\n");
+ return true;
+ }
+}
+
+uint32_t hexagon_controller_SetupGraph(int version) {
+ const uint32_t nn_id = hexagon_controller_InstantiateGraph();
+ hexagon_controller_InitGraph(version, nn_id);
+ hexagon_controller_ConstructGraph(nn_id);
+ return nn_id;
+}
+
+bool hexagon_controller_ExecuteGraph(
+ const uint32_t nn_id,
+ const uint32_t batches,
+ const uint32_t height,
+ const uint32_t width,
+ const uint32_t depth,
+ uint8_t* int_data,
+ const uint32_t int_data_size,
+ uint32_t* out_batches,
+ uint32_t* out_height,
+ uint32_t* out_width,
+ uint32_t* out_depth,
+ uint8_t* out_vals,
+ const uint32_t output_val_byte_size,
+ uint32_t* out_data_byte_size) {
+ int err;
+ if (DBG_EXECUTION) {
+ TFMLOGD("Preparing to execute...");
+ TFMLOGD("Input: %d, %d, %d, %d, %d, %d",
+ batches, height, width, depth, int_data[0], int_data_size);
+ TFMLOGD("Output: %d, %p", output_val_byte_size, out_vals);
+ LogDHexagon("Execute graph!");
+ }
+
+ if ((err = hexagon_nn_execute(nn_id,
+ batches,
+ height,
+ width,
+ depth,
+ int_data,
+ int_data_size,
+ out_batches,
+ out_height,
+ out_width,
+ out_depth,
+ out_vals,
+ output_val_byte_size,
+ out_data_byte_size)) != 0) {
+ if (DBG_EXECUTION) {
+ LogDHexagon("Execution failed!");
+ TFMLOGE("execute got err: %d\n",err);
+ }
+ return false;
+ } else {
+ if (DBG_EXECUTION) {
+ LogDHexagon("Execution succeeded!");
+ TFMLOGD("%d x %d x %d x %d, byte size = %d\n",
+ *out_batches,
+ *out_height,
+ *out_width,
+ *out_depth,
+ *out_data_byte_size);
+ }
+ return true;
+ }
+}
+
+bool hexagon_controller_ExecuteInceptionDummyData(uint32_t nn_id) {
+ uint32_t out_batches, out_height, out_width, out_depth;
+ uint32_t out_data_size;
+ // s_output_values = 300 * 300 * 3 * 4 * 4
+ const bool success = hexagon_controller_ExecuteGraph(
+ nn_id, INCEPTION_PARAM_BATCHES, INCEPTION_PARAM_HEIGHT_V3,
+ INCEPTION_PARAM_WIDTH_V3, INCEPTION_PARAM_DEPTH,
+ (uint8_t *)inception_dummy_int_data_299x299,
+ INCEPTION_PARAM_HEIGHT_V3 * INCEPTION_PARAM_WIDTH_V3 *
+ INCEPTION_PARAM_DEPTH,
+ &out_batches, &out_height, &out_width, &out_depth,
+ (uint8_t *)s_output_values, sizeof(s_output_values),
+ &out_data_size);
+ if (success) {
+ int out_ranking[OUT_RANKING_SIZE];
+ hexagon_controller_PrintMaxNIdx(
+ s_output_values,
+ out_batches * out_height * out_width * out_depth,
+ OUT_RANKING_SIZE, out_ranking);
+ TFMLOGD("%d x %d x %d x %d, size = %d\n",
+ out_batches,
+ out_height,
+ out_width,
+ out_depth,
+ out_data_size);
+ TFMLOGD("max idx: %d\n", FindMaxIdx(
+ s_output_values,
+ out_batches * out_height * out_width * out_depth));
+ if (out_ranking[0] == 169 && out_ranking[1] == 7) {
+ return true;
+ } else {
+ TFMLOGD("Result is wrong! %d, %d", out_ranking[0], out_ranking[1]);
+ return false;
+ }
+ } else {
+ return false;
+ }
+}
+
+void hexagon_controller_DumpPerf(uint32_t nn_id) {
+ hexagon_nn_perfinfo info[MAX_NODES];
+ unsigned long long int total_cycles = 0;
+ unsigned long long int cum_cycles = 0;
+ unsigned long long int counter = 0;
+ int n_nodes;
+ int i;
+ TFMLOGD("Perf dump follows:");
+ if (hexagon_nn_get_perfinfo(nn_id, info, MAX_NODES,&n_nodes) != 0) {
+ TFMLOGE("perf info failure");
+ return;
+ }
+ TFMLOGD("Total %d nodes.",n_nodes);
+ qsort(info,n_nodes,sizeof(info[0]), CompareCycle);
+ for (i = 0; i < n_nodes; i++) {
+ total_cycles += GetCounter(info[i]);
+ }
+ TFMLOGD("Total %lld cycles.",total_cycles);
+ for (i = 0; i < n_nodes; i++) {
+ counter = GetCounter(info[i]);
+ cum_cycles += counter;
+ TFMLOGD("node,0x%x,%s,%s,executions,%d,cycles,%lld,%f %%,"
+ "cum_cycles,%lld,%f %%\n",
+ info[i].node_id,
+ ConvertGraphInfoIdToName(info[i].node_id),
+ ConvertGraphInfoIdToOpName(info[i].node_id),
+ info[i].executions,
+ counter,
+ 100*((double)counter)/total_cycles,
+ cum_cycles,
+ 100*((double)cum_cycles)/total_cycles);
+ }
+#ifdef ENABLE_HVX_FULL_DEBUG
+ DumpAllPerf(nn_id);
+#endif
+}
+
+void hexagon_controller_DumpNodeName(uint32_t nn_id) {
+ TFMLOGD("Show node name");
+ const uint32_t id = nn_id;
+ hexagon_nn_perfinfo info[MAX_NODES];
+ unsigned long long int total_cycles = 0;
+ unsigned long long int cum_cycles = 0;
+ unsigned long long int counter = 0;
+ int node_count;
+ int i;
+ TFMLOGD("Perf dump follows:");
+ if (hexagon_nn_get_perfinfo(id, info, MAX_NODES, &node_count) != 0) {
+ TFMLOGD("perf info failure");
+ return;
+ }
+ TFMLOGD("Total %d nodes.",node_count);
+ qsort(info, node_count, sizeof(info[0]), CompareCycle);
+ for (i = 0; i < node_count; i++) {
+ total_cycles += GetCounter(info[i]);
+ }
+ TFMLOGD("Total %lld cycles.", total_cycles);
+ for (i = 0; i < node_count; i++) {
+ counter = GetCounter(info[i]);
+ cum_cycles += counter;
+ TFMLOGD("node,0x%x,%s,%s,executions,%d,cycles,%lld,%f %%,"
+ "cum_cycles,%lld,%f %%",
+ info[i].node_id,
+ ConvertGraphInfoIdToName(info[i].node_id),
+ ConvertGraphInfoIdToOpName(info[i].node_id),
+ info[i].executions,
+ counter,
+ 100*((double)counter)/total_cycles,
+ cum_cycles,
+ 100*((double)cum_cycles)/total_cycles);
+ }
+}
+
+void hexagon_controller_Teardown(uint32_t nn_id) {
+ hexagon_nn_teardown(nn_id);
+}
diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_impl/hexagon_controller.c b/tensorflow/contrib/hvx/hexagon_controller/src_impl/hexagon_controller.c
new file mode 100644
index 0000000000..fe329e2f59
--- /dev/null
+++ b/tensorflow/contrib/hvx/hexagon_controller/src_impl/hexagon_controller.c
@@ -0,0 +1,374 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// to demonstrate the performance difference between ION and HLOS memory
+// for sharing with ADSP.
+#define USE_ION_MEMORY
+
+#include "hexagon_controller.h"
+
+#include <malloc.h>
+#include <stdio.h>
+
+#include "adspmsgd.h"
+#include "dspCV.h"
+#include "rpcmem.h" // helper API's for shared buffer allocation
+#include "soc_interface.h"
+#include "tfm_log.h"
+
+// if false, use int data as input. This is only for acceleration purpose
+static const bool USE_FLOAT_DATA = true;
+
+// if true, show id for each node
+static const bool DBG_SHOW_ID = false;
+
+static const uint32_t OUTPUT_PARAM_MAX_LINE_SIZE = 1000;
+
+// extern pre-generated inception dummy data
+extern uint8_t inception_dummy_int_data_224x224[];
+extern uint8_t inception_dummy_int_data_299x299[];
+extern float inception_dummy_float_data_299x299_299x299[];
+
+#define GEMM_WRAPPER_VERSION 1
+
+// allocate print bufsize in advance @MB
+#define PRINT_BUFSIZE (2 * 1024 * 1024)
+
+static unsigned char s_print_buf[PRINT_BUFSIZE];
+
+// input node data buffer size
+// x2 1024 * 1024 * 2 > 299 * 299 * 3 * 4 > 1024 * 1024
+static const int INPUT_NODE_DATA_BUFFER_SIZE = 1024 * 1024 * 2;
+// output node data buffer size
+// (1008 is enough for inception)
+static const int OUTPUT_NODE_DATA_BUFFER_SIZE = 300 * 300 * 3 * 4;
+
+static struct NodeDataFloat s_input_node_data_float_buffer;
+static float* s_output_node_data_float_buffer;
+static int s_output_node_data_float_buffer_byte_size;
+static int s_output_node_data_float_array_size;
+static uint32_t s_target_graph_id;
+
+static bool s_dbg_use_inception_dummy_data = false;
+
+void hexagon_controller_InitInputNodeDataToInceptionDummyData(int version) {
+ if (version == 1) {
+ if (USE_FLOAT_DATA) {
+ TFMLOGE("ERROR!!!! Do not use float data for v1");
+ return;
+ }
+ hexagon_controller_CopyByteNodeData(
+ INCEPTION_PARAM_BATCHES, INCEPTION_PARAM_HEIGHT_V1,
+ INCEPTION_PARAM_WIDTH_V1, INCEPTION_PARAM_DEPTH,
+ 1, inception_dummy_int_data_224x224);
+ } else if (version == 3) {
+ if (USE_FLOAT_DATA) {
+ hexagon_controller_CopyByteNodeData(
+ INCEPTION_PARAM_BATCHES, INCEPTION_PARAM_HEIGHT_V3,
+ INCEPTION_PARAM_WIDTH_V3, INCEPTION_PARAM_DEPTH,
+ sizeof(float), (uint8_t*)inception_dummy_float_data_299x299_299x299);
+ } else {
+ hexagon_controller_CopyByteNodeData(
+ INCEPTION_PARAM_BATCHES, INCEPTION_PARAM_HEIGHT_V3,
+ INCEPTION_PARAM_WIDTH_V3, INCEPTION_PARAM_DEPTH,
+ 1, inception_dummy_int_data_299x299);
+ }
+ }
+}
+
+bool hexagon_controller_ExecuteGraphWithBuffer(
+ uint32_t nn_id, bool show_ranking) {
+ uint32_t out_batches, out_height, out_width, out_depth;
+ uint32_t out_data_size;
+ int x = s_input_node_data_float_buffer.x;
+ int y = s_input_node_data_float_buffer.y;
+ int z = s_input_node_data_float_buffer.z;
+ int d = s_input_node_data_float_buffer.d;
+ uint8_t *byte_data = s_input_node_data_float_buffer.byte_array_data;
+ int array_size = s_input_node_data_float_buffer.array_size;
+ const bool success = hexagon_controller_ExecuteGraph(
+ nn_id, x, y, z, d, byte_data, array_size,
+ &out_batches, &out_height, &out_width, &out_depth,
+ (uint8_t *)s_output_node_data_float_buffer,
+ s_output_node_data_float_buffer_byte_size,
+ &out_data_size);
+ s_output_node_data_float_array_size =
+ out_batches * out_height * out_width * out_depth;
+ if (!success) {
+ TFMLOGE("Execution failed");
+ return false;
+ } else if (!show_ranking) {
+ return true;
+ }
+
+ static const int OUT_RANKING_SIZE = 5;
+ int out_ranking[OUT_RANKING_SIZE];
+ hexagon_controller_PrintMaxNIdx(
+ s_output_node_data_float_buffer,
+ out_batches * out_height * out_width * out_depth,
+ OUT_RANKING_SIZE, out_ranking);
+ TFMLOGD("%d x %d x %d x %d, byte size = %d\n",
+ out_batches,
+ out_height,
+ out_width,
+ out_depth,
+ out_data_size);
+ if (s_dbg_use_inception_dummy_data) {
+ // Check the result of inception with a dummy data. This step shouldn't
+ // be passed when show_ranking != true to avoid adding unnecessary
+ // additional computation cost.
+ if (out_ranking[0] == 169 && out_ranking[1] == 7) {
+ TFMLOGD("Result is correct! %d, %d", out_ranking[0], out_ranking[1]);
+ return true;
+ } else {
+ TFMLOGD("Result is wrong! %d, %d", out_ranking[0], out_ranking[1]);
+ return false;
+ }
+ }
+ return true;
+}
+
+uint32_t hexagon_controller_GetTargetGraphId() {
+ return s_target_graph_id;
+}
+
+void hexagon_controller_SetTargetGraphId(uint32_t graph_id) {
+ s_target_graph_id = graph_id;
+}
+
+void hexagon_controller_PrintGraph(uint32_t id) {
+ int retval = hexagon_nn_snpprint(id, s_print_buf, PRINT_BUFSIZE);
+ TFMLOGD("PrintGraph %s\n", s_print_buf);
+ if (retval) {
+ TFMLOGE("Error on print graph\n");
+ }
+}
+
+int hexagon_controller_GetWrapperVersion() {
+ return GEMM_WRAPPER_VERSION;
+}
+
+int hexagon_controller_GetHexagonBinaryVersion() {
+ int retval = 0;
+ hexagon_nn_GetHexagonBinaryVersion(&retval);
+ return retval;
+}
+
+bool hexagon_controller_AllocateNodeDataBuffers(
+ int input_size, int output_size) {
+ TFMLOGD("Allocate memory for input / output node data float");
+ if (s_input_node_data_float_buffer.buf_size != 0) {
+ TFMLOGE("ERROR! input buffer is already allocated!!");
+ return false;
+ } else {
+ int byte_array_data_size = USE_FLOAT_DATA ?
+ input_size * sizeof(float) : input_size; /* sizeof(uint8_t) ? */
+ s_input_node_data_float_buffer.buf_size = input_size;
+ // unused? remove?
+ s_input_node_data_float_buffer.array_data =
+ malloc(input_size * sizeof(float));
+ s_input_node_data_float_buffer.byte_array_data =
+ malloc(byte_array_data_size);
+
+ s_output_node_data_float_buffer = malloc(output_size * sizeof(float));
+ s_output_node_data_float_buffer_byte_size = output_size * sizeof(float);
+ s_output_node_data_float_array_size = 0;
+ TFMLOGD("allocate node data buffers");
+ }
+ return true;
+}
+
+bool hexagon_controller_ReleaseNodeDataBuffers() {
+ if (s_input_node_data_float_buffer.buf_size == 0) {
+ TFMLOGE("ERROR! input buffer has not been allocated yet!!");
+ return false;
+ } else {
+ s_input_node_data_float_buffer.buf_size = 0;
+ free(s_input_node_data_float_buffer.array_data);
+ }
+ if (s_output_node_data_float_buffer_byte_size == 0) {
+ TFMLOGE("ERROR! output buffer has not been allocated yet!!");
+ return false;
+ } else {
+ s_output_node_data_float_buffer_byte_size = 0;
+ free(s_input_node_data_float_buffer.byte_array_data);
+ }
+ return true;
+}
+
+bool hexagon_controller_CopyByteNodeData(
+ int x, int y, int z, int d, int type_byte_size, uint8_t* array_data) {
+ int array_byte_size = x * y * z * d * type_byte_size;
+ TFMLOGD("--- %d, %d, %d, %d, %d, %d",x,y,z,d,type_byte_size,array_byte_size);
+ if (s_input_node_data_float_buffer.buf_size < array_byte_size) {
+ TFMLOGE("ERROR! input buffer size is too small! %d < %d",
+ s_input_node_data_float_buffer.buf_size, array_byte_size);
+ return false;
+ }
+ memcpy(s_input_node_data_float_buffer.byte_array_data,
+ array_data, array_byte_size);
+ s_input_node_data_float_buffer.array_size = array_byte_size;
+ s_input_node_data_float_buffer.x = x;
+ s_input_node_data_float_buffer.y = y;
+ s_input_node_data_float_buffer.z = z;
+ s_input_node_data_float_buffer.d = d;
+ return true;
+}
+
+int hexagon_controller_InitHexagonWithMaxAttributes(
+ int enable_dcvs, int bus_usage, int version) {
+ TFMLOGI("Init hexagon with max attributes");
+ const int MCPS = 1000;
+ const int MBPS = 12000;
+
+ adspmsgd_start(0, RPCMEM_HEAP_DEFAULT, 4096);
+
+ dspCV_Attribute attrib[] = {
+ // The below values will result in the maximum aDSP performance,
+ // at Turbo voltage.
+ // Slightly more MCPS than are available on current targets
+ {DSP_TOTAL_MCPS, MCPS},
+ // drive the clock to MAX on known targets
+ {DSP_MCPS_PER_THREAD, MCPS / 2},
+ // 12 GB/sec is slightly higher than the max realistic
+ // max BW on existing targets.
+ {PEAK_BUS_BANDWIDTH_MBPS, MBPS},
+ // This app is non-real time, and constantly reading/writing memory
+ {BUS_USAGE_PERCENT, bus_usage},
+ };
+ int retval = 0;
+ if (!enable_dcvs) {
+ retval = hexagon_nn_disableDcvs();
+ if (retval) {
+ TFMLOGE("Failed to disable DSP DCVS: %x\n", retval);
+ }
+ }
+
+ retval =
+ dspCV_initQ6_with_attributes(attrib, sizeof(attrib) / sizeof(attrib[0]));
+ TFMLOGD("Return value from dspCV_initQ6() : %d\n", retval);
+
+ hexagon_controller_AllocateNodeDataBuffers(
+ INPUT_NODE_DATA_BUFFER_SIZE, OUTPUT_NODE_DATA_BUFFER_SIZE);
+
+ if (s_dbg_use_inception_dummy_data) {
+ hexagon_controller_InitInputNodeDataToInceptionDummyData(version);
+ }
+ s_target_graph_id = 0;
+
+ return retval;
+}
+
+int hexagon_controller_DeInitHexagon() {
+ adspmsgd_stop();
+ TFMLOGI("Finalize hexagon");
+ const int retval = dspCV_deinitQ6();
+ TFMLOGD("return value from dspCV_deinitQ6(): %d \n", retval);
+
+ hexagon_controller_ReleaseNodeDataBuffers();
+
+ return retval;
+}
+
+void hexagon_controller_GrowMemorySize() {
+ hexagon_nn_config();
+}
+
+struct NodeDataFloat* hexagon_controller_GetInputNodeDataFloatBuffer() {
+ return &s_input_node_data_float_buffer;
+}
+
+float* hexagon_controller_GetOutputNodeDataFloatBuffer(
+ const char *const node_name, int* out_array_size) {
+ *out_array_size = s_output_node_data_float_array_size;
+ return s_output_node_data_float_buffer;
+}
+
+// Append const node to the graph
+int hexagon_controller_AppendConstNode(
+ const char* const name, int graph_id, int node_id,
+ int batch, int height, int width, int depth,
+ const uint8_t* const data, int data_length) {
+ if (DBG_SHOW_ID) {
+ TFMLOGV("---(CONST) %s, %d, %d, %d, %d, %d, %d",
+ name, node_id, batch, height, width, depth, data_length);
+ } else {
+ TFMLOGV("---(CONST) %s, %d, %d, %d, %d, %d",
+ name, batch, height, width, depth, data_length);
+ }
+ const int retval = hexagon_nn_append_const_node(
+ graph_id, node_id, batch, height, width, depth, data, data_length);
+ if (retval != 0) {
+ TFMLOGE("Failed to append const node %d", node_id);
+ return retval;
+ }
+ return retval;
+}
+
+// Append node to the graph
+int hexagon_controller_AppendNode(
+ const char* const name, int graph_id, int node_id, int ops_id,
+ int padding_id, const hexagon_nn_input* const inputs,
+ int inputs_count, const hexagon_nn_output* const outputs,
+ int outputs_count) {
+ char input_param_buf[OUTPUT_PARAM_MAX_LINE_SIZE];
+ memset(input_param_buf, 0, OUTPUT_PARAM_MAX_LINE_SIZE);
+ int pos = 0;
+ pos += snprintf(&input_param_buf[pos], 500, "in: ");
+ for (int i = 0; i < inputs_count; ++i) {
+ if (DBG_SHOW_ID) {
+ pos += snprintf(&input_param_buf[pos], 500, "(%d, %d), ",
+ inputs[i].src_id, inputs[i].output_idx);
+ } else {
+ pos += snprintf(&input_param_buf[pos], 500, "(%d), ",
+ inputs[i].output_idx);
+ }
+ }
+
+ char output_param_buf[OUTPUT_PARAM_MAX_LINE_SIZE];
+ memset(output_param_buf, 0, OUTPUT_PARAM_MAX_LINE_SIZE);
+ pos = 0;
+ pos += snprintf(&output_param_buf[pos], 500, "out: ");
+ for (int i = 0; i < outputs_count; ++i) {
+ pos += snprintf(&output_param_buf[pos], 500, "(%d), ", outputs[i].max_size);
+ }
+
+ if (DBG_SHOW_ID) {
+ TFMLOGV("---(OP) %s, %d, %d, %d, %d, %d, %s, %s", name, node_id,
+ ops_id, padding_id, inputs_count, outputs_count, input_param_buf,
+ output_param_buf);
+ } else {
+ TFMLOGV("---(OP) %s, %d, %d, %d, %d, %s, %s", name,
+ ops_id, padding_id, inputs_count, outputs_count, input_param_buf,
+ output_param_buf);
+ }
+ const int retval = hexagon_nn_append_node(
+ graph_id, node_id, ops_id, padding_id,
+ inputs, inputs_count,
+ outputs, outputs_count);
+ if (retval != 0) {
+ TFMLOGE("Failed to append const node %d", node_id);
+ return retval;
+ }
+ return retval;
+}
+
+void hexagon_controller_EnableDbgUseInceptionDummyData(bool enable) {
+ s_dbg_use_inception_dummy_data = enable;
+}
+
+bool hexagon_controller_IsDbgUseInceptionDummyDataEnabled() {
+ return s_dbg_use_inception_dummy_data;
+}
diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_impl/include/hexagon_controller.h b/tensorflow/contrib/hvx/hexagon_controller/src_impl/include/hexagon_controller.h
new file mode 100644
index 0000000000..eaf4a58751
--- /dev/null
+++ b/tensorflow/contrib/hvx/hexagon_controller/src_impl/include/hexagon_controller.h
@@ -0,0 +1,124 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef GEMM_WRAPPER_H
+#define GEMM_WRAPPER_H
+
+#include <stdbool.h>
+#include <stdlib.h>
+
+#include "hexagon_nn.h"
+#include "node_data_float.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+#define INCEPTION_PARAM_BATCHES 1
+#define INCEPTION_PARAM_HEIGHT_V1 224
+#define INCEPTION_PARAM_WIDTH_V1 224
+#define INCEPTION_PARAM_HEIGHT_V3 299
+#define INCEPTION_PARAM_WIDTH_V3 299
+#define INCEPTION_PARAM_DEPTH 3
+
+// General functions
+void hexagon_controller_PrintGraph(uint32_t nn_id);
+
+int hexagon_controller_GetWrapperVersion();
+
+int hexagon_controller_GetHexagonBinaryVersion();
+
+// Hexagon perf functions
+int hexagon_controller_InitHexagonWithMaxAttributes(int enable_dcvs,
+ int bus_usage, int version);
+
+bool hexagon_controller_AllocateNodeDataBuffers(int input_size,
+ int output_size);
+
+bool hexagon_controller_ReleaseNodeDataBuffers();
+
+bool hexagon_controller_CopyByteNodeData(int x, int y, int z, int d,
+ int type_byte_size,
+ uint8_t* array_data);
+
+int hexagon_controller_DeInitHexagon();
+
+uint32_t hexagon_controller_GetTargetGraphId();
+
+void hexagon_controller_SetTargetGraphId(uint32_t graph_id);
+
+// Hexagon config functions
+void hexagon_controller_GrowMemorySize();
+
+// Graph data transfer functions
+struct NodeDataFloat* hexagon_controller_GetInputNodeDataFloatBuffer();
+
+float* hexagon_controller_GetOutputNodeDataFloatBuffer(
+ const char* const node_name, int* out_array_size);
+
+// Graph functions
+uint32_t hexagon_controller_InstantiateGraph();
+
+void hexagon_controller_InitGraph(int version, uint32_t nn_id);
+
+bool hexagon_controller_ConstructGraph(uint32_t nn_id);
+
+uint32_t hexagon_controller_SetupGraph(int version);
+
+bool hexagon_controller_ExecuteInceptionDummyData(uint32_t nn_id);
+
+bool hexagon_controller_ExecuteGraph(
+ const uint32_t nn_id, const uint32_t batches, const uint32_t height,
+ const uint32_t width, const uint32_t depth, uint8_t* int_data,
+ const uint32_t int_data_size, uint32_t* out_batches, uint32_t* out_height,
+ uint32_t* out_width, uint32_t* out_depth, uint8_t* out_vals,
+ const uint32_t output_val_byte_size, uint32_t* out_data_byte_size);
+
+bool hexagon_controller_ExecuteGraphWithBuffer(uint32_t nn_id,
+ bool show_ranking);
+
+void hexagon_controller_DumpPerf(uint32_t nn_id);
+
+void hexagon_controller_DumpNodeName(uint32_t nn_id);
+
+void hexagon_controller_Teardown(uint32_t nn_id);
+
+void hexagon_controller_PrintMaxNIdx(const float* data, const uint32_t entries,
+ const int n, int* out_ranking);
+
+void hexagon_controller_InitInputNodeDataToInceptionDummyData(int version);
+
+int hexagon_controller_AppendNode(const char* const name, int graph_id,
+ int node_id, int op_id, int padding_id,
+ const hexagon_nn_input* const inputs,
+ int inputs_count,
+ const hexagon_nn_output* const outputs,
+ int outputs_count);
+
+int hexagon_controller_AppendConstNode(const char* const name, int graph_id,
+ int node_id, int batch, int height,
+ int width, int depth,
+ const uint8_t* const data,
+ int data_length);
+
+void hexagon_controller_EnableDbgUseInceptionDummyData(bool enable);
+
+bool hexagon_controller_IsDbgUseInceptionDummyDataEnabled();
+
+#ifdef __cplusplus
+}
+#endif // __cplusplus
+
+#endif // GEMM_WRAPPER_H
diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_log/include/tfm_log.h b/tensorflow/contrib/hvx/hexagon_controller/src_log/include/tfm_log.h
new file mode 100644
index 0000000000..e8615fd4ec
--- /dev/null
+++ b/tensorflow/contrib/hvx/hexagon_controller/src_log/include/tfm_log.h
@@ -0,0 +1,74 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef GEMM_WRAPPER_LOG_H
+#define GEMM_WRAPPER_LOG_H
+
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+
+#define TFM_LOG_LEVEL_VERBOSE -2
+#define TFM_LOG_LEVEL_DEBUG -1
+#define TFM_LOG_LEVEL_INFO 0
+#define TFM_LOG_LEVEL_WARNING 1
+#define TFM_LOG_LEVEL_ERROR 2
+#define TFM_LOG_LEVEL_FATAL 3
+
+static int s_log_level = TFM_LOG_LEVEL_INFO;
+
+static inline bool IsLogOn(int log_level) { return log_level >= s_log_level; }
+
+static inline void SetLogLevel(int log_level) { s_log_level = log_level; }
+
+#define TFMLOGV(fmt, ...) \
+ do { \
+ if (!IsLogOn(TFM_LOG_LEVEL_VERBOSE)) break; \
+ printf(fmt "\n", ##__VA_ARGS__); \
+ } while (0)
+
+#define TFMLOGD(fmt, ...) \
+ do { \
+ if (!IsLogOn(TFM_LOG_LEVEL_DEBUG)) break; \
+ printf(fmt "\n", ##__VA_ARGS__); \
+ } while (0)
+
+#define TFMLOGI(fmt, ...) \
+ do { \
+ if (!IsLogOn(TFM_LOG_LEVEL_INFO)) break; \
+ printf(fmt "\n", ##__VA_ARGS__); \
+ } while (0)
+
+#define TFMLOGE(fmt, ...) \
+ do { \
+ if (!IsLogOn(TFM_LOG_LEVEL_ERROR)) break; \
+ printf(fmt "\n", ##__VA_ARGS__); \
+ } while (0)
+
+static inline void PrintLogHexagon(const char* fmt, va_list ap) {
+ char buffer[200];
+ const int count = snprintf(buffer, 200, fmt, ap);
+ buffer[count] = 0;
+ TFMLOGI("%s", buffer);
+}
+
+static inline void LogDHexagon(const char* fmt, ...) {
+ va_list ap;
+ va_start(ap, fmt);
+ PrintLogHexagon(fmt, ap);
+ va_end(ap);
+}
+
+#endif
diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_soc_interface/include/node_data_float.h b/tensorflow/contrib/hvx/hexagon_controller/src_soc_interface/include/node_data_float.h
new file mode 100644
index 0000000000..a9c3296e9f
--- /dev/null
+++ b/tensorflow/contrib/hvx/hexagon_controller/src_soc_interface/include/node_data_float.h
@@ -0,0 +1,41 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef NODE_DATA_FLOAT_H
+#define NODE_DATA_FLOAT_H
+
+#ifdef __cplusplus
+extern "C" {
+#else
+#include <inttypes.h>
+#endif
+#define NODE_DATA_FLOAT_NODE_NAME_BUF_SIZE 100
+
+struct NodeDataFloat {
+ int x;
+ int y;
+ int z;
+ int d;
+ int buf_size;
+ int array_size;
+ float* array_data;
+ uint8_t* byte_array_data;
+ char node_name[NODE_DATA_FLOAT_NODE_NAME_BUF_SIZE];
+};
+#ifdef __cplusplus
+}
+#endif
+
+#endif // NODE_DATA_FLOAT_H
diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_soc_interface/soc_interface.c b/tensorflow/contrib/hvx/hexagon_controller/src_soc_interface/soc_interface.c
index ebcbb963e8..7db8d4870c 100755
--- a/tensorflow/contrib/hvx/hexagon_controller/src_soc_interface/soc_interface.c
+++ b/tensorflow/contrib/hvx/hexagon_controller/src_soc_interface/soc_interface.c
@@ -15,110 +15,230 @@ limitations under the License.
#include "soc_interface.h"
+#include <inttypes.h>
+
+#include "hexagon_controller.h"
+#include "hexagon_nn.h"
+#include "node_data_float.h"
+#include "tfm_log.h"
+
+const int64_t FLAG_ENABLE_INCEPTION_DUMMY_BINARY_INPUT = 0x01;
+
+static const int INCEPTION_VERSION = 3;
+
+static hexagon_nn_input* s_node_inputs_array;
+static int s_node_inputs_array_index;
+static int s_node_inputs_array_max_count;
+
+static hexagon_nn_output* s_node_outputs_array;
+static int s_node_outputs_array_index;
+static int s_node_outputs_array_max_count;
+
int soc_interface_GetWrapperVersion() {
- // TODO(satok): implement
- return -1;
+ TFMLOGD("GetWrapperVersion");
+ return hexagon_controller_GetWrapperVersion();
}
int soc_interface_GetSocControllerVersion() {
- // TODO(satok): implement
- return -1;
+ TFMLOGD("GetSocControllerVersion");
+ return hexagon_controller_GetHexagonBinaryVersion();
}
bool soc_interface_Init() {
- // TODO(satok): implement
- return false;
+ TFMLOGD("Init");
+ hexagon_controller_InitHexagonWithMaxAttributes(
+ 0, 100, INCEPTION_VERSION /* version */);
+ hexagon_controller_GrowMemorySize();
+ return true;
}
bool soc_interface_Finalize() {
- // TODO(satok): implement
- return false;
+ TFMLOGD("Finalize");
+ hexagon_controller_DeInitHexagon();
+ return true;
}
bool soc_interface_ExecuteGraph() {
- // TODO(satok): implement
- return false;
+ TFMLOGD("ExecuteGraph");
+ if (hexagon_controller_IsDbgUseInceptionDummyDataEnabled()) {
+ hexagon_controller_InitInputNodeDataToInceptionDummyData(
+ INCEPTION_VERSION /* version */);
+ }
+ const uint32_t graph_id = hexagon_controller_GetTargetGraphId();
+ if (graph_id == 0) {
+ TFMLOGE("Graph id has not been set yet.");
+ return false;
+ }
+ hexagon_controller_ExecuteGraphWithBuffer(graph_id, true);
+ return true;
}
bool soc_interface_TeardownGraph() {
- // TODO(satok): implement
- return false;
+ TFMLOGD("TeardownGraph");
+ const uint32_t graph_id = hexagon_controller_GetTargetGraphId();
+ if (graph_id == 0) {
+ TFMLOGE("Graph id has not been set yet.");
+ return false;
+ }
+ hexagon_controller_Teardown(graph_id);
+ return true;
}
bool soc_interface_FillInputNodeFloat(
- int x, int y, int z, int d, const uint8_t* const buf, uint64_t buf_size) {
- // TODO(satok): implement
- return false;
+ int x, int y, int z, int d, const uint8_t* const buf,
+ uint64_t buf_size) {
+ TFMLOGD("FillInputNodeFloat");
+ struct NodeDataFloat* node_data_float =
+ hexagon_controller_GetInputNodeDataFloatBuffer();
+ const int array_size = x * y * z * d;
+ if (array_size > node_data_float->buf_size) {
+ TFMLOGE("Array size exceeds buf size %d > %d",
+ array_size, node_data_float->buf_size);
+ return false;
+ }
+ if (buf_size != array_size * sizeof(float)) {
+ TFMLOGE("Invalid buf size!");
+ return false;
+ }
+ memcpy(node_data_float->byte_array_data, buf, buf_size);
+ node_data_float->x = x;
+ node_data_float->y = y;
+ node_data_float->z = z;
+ node_data_float->d = d;
+ node_data_float->array_size = buf_size;
+ return true;
}
// TODO(satok): Remove and use runtime version
bool soc_interface_ReadOutputNodeFloat(
const char* const node_name, uint8_t** buf, uint64_t *buf_size) {
- // TODO(satok): implement
- return false;
+ TFMLOGD("ReadOutputNodeFloat");
+ int array_size = -1;
+ float* output_node_data_float =
+ hexagon_controller_GetOutputNodeDataFloatBuffer(node_name, &array_size);
+ if (array_size < 0) {
+ TFMLOGE("Failed to read data.");
+ return false;
+ }
+ *buf = (uint8_t*)output_node_data_float;
+ *buf_size = array_size * sizeof(float);
+ return true;
}
bool soc_interface_SetupGraphDummy(int version) {
- // TODO(satok): implement
- return false;
+ TFMLOGD("SetupGraphDummy");
+ const uint32_t graph_id = hexagon_controller_SetupGraph(version);
+ if (graph_id == 0) {
+ TFMLOGE("Failed to setup graph");
+ return false;
+ }
+ hexagon_controller_SetTargetGraphId(graph_id);
+ return true;
}
bool soc_interface_AllocateNodeInputAndNodeOutputArray(
int total_input_count, int total_output_count) {
- // TODO(satok): implement
- return false;
+ TFMLOGD("Allocate node inputs and node outputs array %d, %d",
+ total_input_count, total_output_count);
+ s_node_inputs_array = malloc(total_input_count * sizeof(hexagon_nn_input));
+ s_node_outputs_array = malloc(total_output_count * sizeof(hexagon_nn_output));
+ s_node_inputs_array_index = 0;
+ s_node_outputs_array_index = 0;
+ s_node_inputs_array_max_count = total_input_count;
+ s_node_outputs_array_max_count = total_output_count;
+ return true;
}
bool soc_interface_ReleaseNodeInputAndNodeOutputArray() {
- // TODO(satok): implement
- return false;
+ TFMLOGD("Release node inputs and node outputs array");
+ free(s_node_inputs_array);
+ free(s_node_outputs_array);
+ return true;
}
void* soc_interface_SetOneNodeInputs(
int input_count, const int* const node_id, const int* const port) {
- // TODO(satok): implement
- return 0;
+ if (s_node_inputs_array_index + input_count > s_node_inputs_array_max_count) {
+ TFMLOGE("input count exceeds limit");
+ return 0;
+ }
+ for (int i = 0; i < input_count; ++i) {
+ const int index = s_node_inputs_array_index + i;
+ s_node_inputs_array[index].src_id = node_id[i];
+ s_node_inputs_array[index].output_idx = port[i];
+ }
+ void* retval = (void*)(&s_node_inputs_array[s_node_inputs_array_index]);
+ s_node_inputs_array_index += input_count;
+ return retval;
}
void* soc_interface_SetOneNodeOutputs(int output_count, int* max_size) {
- // TODO(satok): implement
- return 0;
+ if (s_node_outputs_array_index + output_count >
+ s_node_outputs_array_max_count) {
+ TFMLOGE("output count exceeds limit");
+ return 0;
+ }
+ for (int i = 0; i < output_count; ++i) {
+ const int index = s_node_outputs_array_index + i;
+ s_node_outputs_array[index].max_size = max_size[i];
+ }
+ void* retval = (void*)(&s_node_outputs_array[s_node_outputs_array_index]);
+ s_node_outputs_array_index += output_count;
+ return retval;
}
// Append const node to the graph
bool soc_interface_AppendConstNode(
- const char* const name, int node_id, int batch, int height, int width,
- int depth, const uint8_t* const data, int data_length) {
- // TODO(satok): implement
- return false;
+ const char* const name, int node_id, int batch, int height, int width, int depth,
+ const uint8_t* const data, int data_length) {
+ const uint32_t graph_id = hexagon_controller_GetTargetGraphId();
+ const int retval = hexagon_controller_AppendConstNode(
+ name, graph_id, node_id, batch, height, width, depth, data, data_length);
+ if (retval != 0) {
+ TFMLOGE("Failed to append const node %d", node_id);
+ return false;
+ }
+ return true;
}
// Append node to the graph
bool soc_interface_AppendNode(
- const char* const name, int node_id, int ops_id, int padding_id,
- const void* const inputs, int inputs_count, const void* const outputs,
- int outputs_count) {
- // TODO(satok): implement
- return false;
+ const char* const name, int node_id, int ops_id, int padding_id, const void* const inputs,
+ int inputs_count, const void* const outputs, int outputs_count) {
+ const uint32_t graph_id = hexagon_controller_GetTargetGraphId();
+ const int retval = hexagon_controller_AppendNode(
+ name, graph_id, node_id, ops_id, padding_id,
+ (hexagon_nn_input*) inputs, inputs_count,
+ (hexagon_nn_output*) outputs, outputs_count);
+ if (retval != 0) {
+ TFMLOGE("Failed to append const node %d", node_id);
+ return false;
+ }
+ return true;
}
// Instantiate graph
bool soc_interface_InstantiateGraph() {
- // TODO(satok): implement
- return false;
+ const uint32_t nn_id = hexagon_controller_InstantiateGraph();
+ hexagon_controller_SetTargetGraphId(nn_id);
+ return true;
}
// Construct graph
bool soc_interface_ConstructGraph() {
- // TODO(satok): implement
- return false;
+ const uint32_t graph_id = hexagon_controller_GetTargetGraphId();
+ return hexagon_controller_ConstructGraph(graph_id);
}
void soc_interface_SetLogLevel(int log_level) {
- // TODO(satok): implement
+ SetLogLevel(log_level);
}
void soc_interface_SetDebugFlag(uint64_t flag) {
- // TODO(satok): implement
+ TFMLOGI("Set debug flag 0x%" PRIx64, flag);
+ if ((flag & FLAG_ENABLE_INCEPTION_DUMMY_BINARY_INPUT) != 0) {
+ TFMLOGI("Enable always use panda data");
+ hexagon_controller_EnableDbgUseInceptionDummyData(true);
+ }
}
diff --git a/tensorflow/contrib/hvx/hexagon_controller/target/make/android.min b/tensorflow/contrib/hvx/hexagon_controller/target/make/android.min
new file mode 100644
index 0000000000..4770d31c56
--- /dev/null
+++ b/tensorflow/contrib/hvx/hexagon_controller/target/make/android.min
@@ -0,0 +1,70 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+$(info ------------------------------------------)
+$(info --- V = $(V))
+$(info --- GLUE_DIR = $(GLUE_DIR))
+$(info --- HEXAGON_SDK_ROOT = $(HEXAGON_SDK_ROOT))
+$(info ------------------------------------------)
+
+INCDIRS += ../../../libs/common/adspmsgd/ship/android_Release
+
+INCDIRS += src_impl/include
+INCDIRS += src_log/include
+INCDIRS += src_soc_interface/include
+
+LIBDIRS += ../../../libs/common/adspmsgd/ship/android_Release
+
+BUILD_DLLS=libhexagon_controller
+
+hexagon_controller_lib_QAICIDLS += \
+interface/hexagon_nn \
+$(MAKE_D_DSPCV_INCDIR)/dspCV
+
+# hexagon controller library
+hexagon_controller_lib_C_SRCS += \
+src_impl/hexagon_controller \
+src_impl/graph_functions_wrapper \
+src_soc_interface/soc_interface
+
+# dummy data
+hexagon_controller_lib_C_SRCS += \
+src_dummy_data/inception_v1_graph_init \
+src_dummy_data/inception_v3_dummy_float_data \
+src_dummy_data/inception_v3_dummy_int_data \
+src_dummy_data/inception_v3_graph_init
+
+# hexagon interface
+hexagon_controller_lib_C_SRCS += \
+$V/hexagon_nn_stub \
+$V/dspCV_stub
+
+hexagon_controller_lib_DLLS += libadsprpc
+hexagon_controller_lib_LIBS += rpcmem adspmsgd
+hexagon_controller_lib_LD_FLAGS += -llog
+hexagon_controller_lib_DEFINES += VERIFY_PRINT_ERROR
+
+libhexagon_controller_QAICIDLS += $(hexagon_controller_lib_QAICIDLS)
+libhexagon_controller_C_SRCS += $(hexagon_controller_lib_C_SRCS)
+libhexagon_controller_DLLS += $(hexagon_controller_lib_DLLS)
+libhexagon_controller_LIBS += $(hexagon_controller_lib_LIBS)
+libhexagon_controller_LD_FLAGS += $(hexagon_controller_lib_LD_FLAGS)
+libhexagon_controller_DEFINES += $(hexagon_controller_lib_DEFINES)
+
+BUILD_COPIES = \
+ $(DLLS) \
+ $(EXES) \
+ $(LIBS) \
+ $(SHIP_DIR)/ ;
diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py
index 2673495b90..e47342f966 100644
--- a/tensorflow/contrib/layers/python/layers/layers.py
+++ b/tensorflow/contrib/layers/python/layers/layers.py
@@ -1385,7 +1385,8 @@ def fully_connected(inputs,
if not isinstance(num_outputs, six.integer_types):
raise ValueError('num_outputs should be int or long, got %s.', num_outputs)
- layer_variable_getter = _build_variable_getter({'bias': 'biases'})
+ layer_variable_getter = _build_variable_getter({'bias': 'biases',
+ 'kernel': 'weights'})
with variable_scope.variable_scope(
scope, 'fully_connected', [inputs],
@@ -1395,9 +1396,9 @@ def fully_connected(inputs,
units=num_outputs,
activation=None,
use_bias=not normalizer_fn and biases_initializer,
- weights_initializer=weights_initializer,
+ kernel_initializer=weights_initializer,
bias_initializer=biases_initializer,
- weights_regularizer=weights_regularizer,
+ kernel_regularizer=weights_regularizer,
bias_regularizer=biases_regularizer,
activity_regularizer=None,
trainable=trainable,
@@ -1408,7 +1409,7 @@ def fully_connected(inputs,
outputs = layer.apply(inputs)
# Add variables to collections.
- _add_variable_to_collections(layer.w, variables_collections, 'weights')
+ _add_variable_to_collections(layer.kernel, variables_collections, 'weights')
if layer.bias is not None:
_add_variable_to_collections(layer.bias, variables_collections, 'biases')
diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py
index d1b35e33c2..6043d4dc0e 100644
--- a/tensorflow/contrib/layers/python/layers/layers_test.py
+++ b/tensorflow/contrib/layers/python/layers/layers_test.py
@@ -1563,7 +1563,7 @@ class FCTest(test.TestCase):
_layers.fully_connected(inputs, 32, weights_regularizer=weight_decay)
wd = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)[0]
self.assertEqual(wd.op.name,
- 'fully_connected/weights/Regularizer/l2_regularizer')
+ 'fully_connected/kernel/Regularizer/l2_regularizer')
sess.run(variables_lib.global_variables_initializer())
self.assertLess(sess.run(wd), 0.4)
diff --git a/tensorflow/contrib/layers/python/layers/optimizers.py b/tensorflow/contrib/layers/python/layers/optimizers.py
index 0b50d93b72..bab59d0048 100644
--- a/tensorflow/contrib/layers/python/layers/optimizers.py
+++ b/tensorflow/contrib/layers/python/layers/optimizers.py
@@ -176,6 +176,11 @@ def optimize_loss(loss,
str(type(learning_rate))))
if summaries is None:
summaries = ["loss", "learning_rate"]
+ else:
+ for summ in summaries:
+ if summ not in OPTIMIZER_SUMMARIES:
+ raise ValueError("Summaries should be one of [%s], you provided %s." %
+ (", ".join(OPTIMIZER_SUMMARIES), summ))
if learning_rate is not None and learning_rate_decay_fn is not None:
if global_step is None:
raise ValueError("global_step is required for learning_rate_decay_fn.")
diff --git a/tensorflow/contrib/layers/python/layers/optimizers_test.py b/tensorflow/contrib/layers/python/layers/optimizers_test.py
index b7b984b1e8..9dc612e58e 100644
--- a/tensorflow/contrib/layers/python/layers/optimizers_test.py
+++ b/tensorflow/contrib/layers/python/layers/optimizers_test.py
@@ -108,6 +108,14 @@ class OptimizersTest(test.TestCase):
optimizers_lib.optimize_loss(
loss, global_step, learning_rate=0.1, optimizer=optimizer)
+ def testBadSummaries(self):
+ with ops.Graph().as_default() as g, self.test_session(graph=g):
+ _, _, loss, global_step = _setup_model()
+ with self.assertRaises(ValueError):
+ optimizers_lib.optimize_loss(
+ loss, global_step, learning_rate=0.1, optimizer="SGD",
+ summaries=["loss", "bad_summary"])
+
def testInvalidLoss(self):
with ops.Graph().as_default() as g, self.test_session(graph=g):
_, _, _, global_step = _setup_model()
diff --git a/tensorflow/contrib/learn/python/learn/__init__.py b/tensorflow/contrib/learn/python/learn/__init__.py
index d7b9aaffd4..6a6ff10d44 100644
--- a/tensorflow/contrib/learn/python/learn/__init__.py
+++ b/tensorflow/contrib/learn/python/learn/__init__.py
@@ -46,4 +46,5 @@ from tensorflow.contrib.learn.python.learn.learn_io import *
from tensorflow.contrib.learn.python.learn.metric_spec import MetricSpec
from tensorflow.contrib.learn.python.learn.monitors import NanLossDuringTrainingError
from tensorflow.contrib.learn.python.learn.trainable import Trainable
+from tensorflow.contrib.learn.python.learn.utils import *
# pylint: enable=wildcard-import
diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator.py b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
index 1d36389722..becdf61709 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
@@ -36,7 +36,6 @@ from tensorflow.contrib.framework import deprecated_arg_values
from tensorflow.contrib.framework import deprecated_args
from tensorflow.contrib.framework import list_variables
from tensorflow.contrib.framework import load_variable
-from tensorflow.contrib.framework.python.framework import experimental
from tensorflow.contrib.framework.python.ops import variables as contrib_variables
from tensorflow.contrib.learn.python.learn import evaluable
from tensorflow.contrib.learn.python.learn import metric_spec
@@ -68,7 +67,6 @@ from tensorflow.python.training import basic_session_run_hooks
from tensorflow.python.training import device_setter
from tensorflow.python.training import monitored_session
from tensorflow.python.training import saver
-from tensorflow.python.training import session_run_hook
from tensorflow.python.training import summary_io
from tensorflow.python.util import compat
@@ -815,9 +813,10 @@ class BaseEstimator(
update_op, eval_dict = self._extract_metric_update_ops(eval_dict)
- hooks = hooks or []
+ # We need to copy the hook array as we modify it, thus [:].
+ hooks = hooks[:] if hooks else []
if feed_fn:
- hooks.append(_FeedFnHook(feed_fn))
+ hooks.append(basic_session_run_hooks.FeedFnHook(feed_fn))
if steps:
hooks.append(
evaluation.StopAfterNEvalsHook(
@@ -1216,22 +1215,20 @@ class Estimator(BaseEstimator):
self._labels_info)
return self._call_model_fn(features, labels, model_fn_lib.ModeKeys.INFER)
- @experimental
def export_savedmodel(
- self, export_dir_base, input_fn,
+ self, export_dir_base, serving_input_fn,
default_output_alternative_key=None,
assets_extra=None,
- as_text=False,
- exports_to_keep=None):
+ as_text=False):
"""Exports inference graph as a SavedModel into given dir.
Args:
export_dir_base: A string containing a directory to write the exported
graph and checkpoints.
- input_fn: A function that takes no argument and
+ serving_input_fn: A function that takes no argument and
returns an `InputFnOps`.
default_output_alternative_key: the name of the head to serve when none is
- specified.
+ specified. Not needed for single-headed models.
assets_extra: A dict specifying how to populate the assets.extra directory
within the exported SavedModel. Each key should give the destination
path (including the filename) relative to the assets.extra directory.
@@ -1240,7 +1237,6 @@ class Estimator(BaseEstimator):
renaming it is specified as
`{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`.
as_text: whether to write the SavedModel proto in text format.
- exports_to_keep: Number of exports to keep.
Returns:
The string path to the exported directory.
@@ -1248,14 +1244,14 @@ class Estimator(BaseEstimator):
Raises:
ValueError: if an unrecognized export_type is requested.
"""
- if input_fn is None:
- raise ValueError('input_fn must be defined.')
+ if serving_input_fn is None:
+ raise ValueError('serving_input_fn must be defined.')
with ops.Graph().as_default() as g:
contrib_variables.create_global_step(g)
- # Call the input_fn and collect the input alternatives.
- input_ops = input_fn()
+ # Call the serving_input_fn and collect the input alternatives.
+ input_ops = serving_input_fn()
input_alternatives, features = (
saved_model_export_utils.get_input_alternatives(input_ops))
@@ -1266,7 +1262,7 @@ class Estimator(BaseEstimator):
saved_model_export_utils.get_output_alternatives(
model_fn_ops, default_output_alternative_key))
- # Build the SignatureDefs from all pairs of input and output signatures
+ # Build the SignatureDefs from all pairs of input and output alternatives
signature_def_map = saved_model_export_utils.build_all_signature_defs(
input_alternatives, output_alternatives,
actual_default_output_alternative_key)
@@ -1317,17 +1313,6 @@ class Estimator(BaseEstimator):
return export_dir
-class _FeedFnHook(session_run_hook.SessionRunHook):
- """Runs feed_fn and sets the feed_dict accordingly."""
-
- def __init__(self, feed_fn):
- self.feed_fn = feed_fn
-
- def before_run(self, run_context): # pylint: disable=unused-argument
- return session_run_hook.SessionRunArgs(
- fetches=None, feed_dict=self.feed_fn())
-
-
# For time of deprecation x,y from Estimator allow direct access.
# pylint: disable=protected-access
class SKCompat(sklearn.BaseEstimator):
@@ -1343,7 +1328,7 @@ class SKCompat(sklearn.BaseEstimator):
epochs=None)
all_monitors = []
if feed_fn:
- all_monitors = [_FeedFnHook(feed_fn)]
+ all_monitors = [basic_session_run_hooks.FeedFnHook(feed_fn)]
if monitors:
all_monitors.extend(monitors)
diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py b/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py
index 0b4897d4b2..ffa2e17aec 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py
@@ -211,12 +211,12 @@ def _build_estimator_for_export_tests(tmpdir):
feature_spec = feature_column_lib.create_feature_spec_for_parsing(
feature_columns)
- export_input_fn = input_fn_utils.build_parsing_serving_input_fn(feature_spec)
+ serving_input_fn = input_fn_utils.build_parsing_serving_input_fn(feature_spec)
# hack in an op that uses an asset, in order to test asset export.
# this is not actually valid, of course.
- def export_input_fn_with_asset():
- features, labels, inputs = export_input_fn()
+ def serving_input_fn_with_asset():
+ features, labels, inputs = serving_input_fn()
vocab_file_name = os.path.join(tmpdir, 'my_vocab_file')
vocab_file = gfile.GFile(vocab_file_name, mode='w')
@@ -229,7 +229,7 @@ def _build_estimator_for_export_tests(tmpdir):
return input_fn_utils.InputFnOps(features, labels, inputs)
- return est, export_input_fn_with_asset
+ return est, serving_input_fn_with_asset
class CheckCallsMonitor(monitors_lib.BaseMonitor):
@@ -620,6 +620,16 @@ class EstimatorTest(test.TestCase):
predictions = list(est.predict(x=iris.data))
self.assertEqual(len(predictions), iris.target.shape[0])
+ def testHooksNotChanged(self):
+ est = estimator.Estimator(model_fn=logistic_model_no_mode_fn)
+ # We pass empty array and expect it to remain empty after calling
+ # fit and evaluate. Requires inside to copy this array if any hooks were
+ # added.
+ my_array = []
+ est.fit(input_fn=iris_input_fn, steps=100, monitors=my_array)
+ _ = est.evaluate(input_fn=iris_input_fn, steps=1, hooks=my_array)
+ self.assertEqual(my_array, [])
+
def testIrisInputFnLabelsDict(self):
iris = base.load_iris()
est = estimator.Estimator(model_fn=logistic_model_no_mode_fn)
@@ -811,7 +821,7 @@ class EstimatorTest(test.TestCase):
def test_export_savedmodel(self):
tmpdir = tempfile.mkdtemp()
- est, export_input_fn = _build_estimator_for_export_tests(tmpdir)
+ est, serving_input_fn = _build_estimator_for_export_tests(tmpdir)
extra_file_name = os.path.join(
compat.as_bytes(tmpdir), compat.as_bytes('my_extra_file'))
@@ -823,7 +833,7 @@ class EstimatorTest(test.TestCase):
export_dir_base = os.path.join(
compat.as_bytes(tmpdir), compat.as_bytes('export'))
export_dir = est.export_savedmodel(
- export_dir_base, export_input_fn, assets_extra=assets_extra)
+ export_dir_base, serving_input_fn, assets_extra=assets_extra)
self.assertTrue(gfile.Exists(export_dir_base))
self.assertTrue(gfile.Exists(export_dir))
diff --git a/tensorflow/contrib/learn/python/learn/estimators/svm.py b/tensorflow/contrib/learn/python/learn/estimators/svm.py
index e7805d9a90..c898a4865b 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/svm.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/svm.py
@@ -18,14 +18,8 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
-import inspect
-import re
-
from tensorflow.contrib import layers
from tensorflow.contrib.framework import deprecated_arg_values
-from tensorflow.contrib.framework.python.framework import experimental
-from tensorflow.contrib.learn.python.learn import evaluable
-from tensorflow.contrib.learn.python.learn import trainable
from tensorflow.contrib.learn.python.learn.estimators import estimator
from tensorflow.contrib.learn.python.learn.estimators import head as head_lib
from tensorflow.contrib.learn.python.learn.estimators import linear
@@ -38,15 +32,7 @@ def _as_iterable(preds, output):
yield pred[output]
-def _get_metric_args(metric):
- if hasattr(metric, "__code__"):
- return inspect.getargspec(metric).args
- elif hasattr(metric, "func") and hasattr(metric, "keywords"):
- return [arg for arg in inspect.getargspec(metric.func).args
- if arg not in metric.keywords.keys()]
-
-
-class SVM(trainable.Trainable, evaluable.Evaluable):
+class SVM(estimator.Estimator):
"""Support Vector Machine (SVM) model for binary classification.
Currently, only linear SVMs are supported. For the underlying optimization
@@ -106,7 +92,7 @@ class SVM(trainable.Trainable, evaluable.Evaluable):
kernels=None,
config=None,
feature_engineering_fn=None):
- """Constructs a `SVM~ estimator object.
+ """Constructs an `SVM` estimator object.
Args:
example_id_column: A string defining the feature column name representing
@@ -139,15 +125,15 @@ class SVM(trainable.Trainable, evaluable.Evaluable):
"""
if kernels is not None:
raise ValueError("Kernel SVMs are not currently supported.")
- self._optimizer = sdca_optimizer.SDCAOptimizer(
+ optimizer = sdca_optimizer.SDCAOptimizer(
example_id_column=example_id_column,
num_loss_partitions=num_loss_partitions,
symmetric_l1_regularization=l1_regularization,
symmetric_l2_regularization=l2_regularization)
self._feature_columns = feature_columns
- self._chief_hook = linear._SdcaUpdateWeightsHook() # pylint: disable=protected-access
- self._estimator = estimator.Estimator(
+ chief_hook = linear._SdcaUpdateWeightsHook() # pylint: disable=protected-access
+ super(SVM, self).__init__(
model_fn=linear.sdca_model_fn,
model_dir=model_dir,
config=config,
@@ -156,62 +142,20 @@ class SVM(trainable.Trainable, evaluable.Evaluable):
weight_column_name=weight_column_name,
enable_centered_bias=False),
"feature_columns": feature_columns,
- "optimizer": self._optimizer,
+ "optimizer": optimizer,
"weight_column_name": weight_column_name,
- "update_weights_hook": self._chief_hook,
+ "update_weights_hook": chief_hook,
},
feature_engineering_fn=feature_engineering_fn)
- if not self._estimator.config.is_chief:
- self._chief_hook = None
-
- @property
- def model_dir(self):
- """See trainable.Evaluable."""
- return self._estimator.model_dir
-
- def fit(self, x=None, y=None, input_fn=None, steps=None, batch_size=None,
- monitors=None, max_steps=None):
- """See trainable.Trainable."""
- if monitors is None:
- monitors = []
- if self._chief_hook:
- monitors.append(self._chief_hook)
- return self._estimator.fit(x=x, y=y, input_fn=input_fn, steps=steps,
- batch_size=batch_size, monitors=monitors,
- max_steps=max_steps)
-
- # pylint: disable=protected-access
- def evaluate(self,
- x=None,
- y=None,
- input_fn=None,
- feed_fn=None,
- batch_size=None,
- steps=None,
- metrics=None,
- name=None,
- checkpoint_path=None,
- hooks=None):
- """See evaluable.Evaluable."""
- return self._estimator.evaluate(
- x=x,
- y=y,
- input_fn=input_fn,
- feed_fn=feed_fn,
- batch_size=batch_size,
- steps=steps,
- metrics=metrics,
- name=name,
- checkpoint_path=checkpoint_path,
- hooks=hooks)
@deprecated_arg_values(
estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS,
as_iterable=False)
- def predict(self, x=None, input_fn=None, batch_size=None, as_iterable=True):
+ def predict_classes(self, x=None, input_fn=None, batch_size=None,
+ as_iterable=True):
"""Runs inference to determine the predicted class."""
key = prediction_key.PredictionKey.CLASSES
- preds = self._estimator.predict(
+ preds = super(SVM, self).predict(
x=x,
input_fn=input_fn,
batch_size=batch_size,
@@ -228,7 +172,7 @@ class SVM(trainable.Trainable, evaluable.Evaluable):
as_iterable=True):
"""Runs inference to determine the class probability predictions."""
key = prediction_key.PredictionKey.PROBABILITIES
- preds = self._estimator.predict(
+ preds = super(SVM, self).predict(
x=x,
input_fn=input_fn,
batch_size=batch_size,
@@ -239,51 +183,30 @@ class SVM(trainable.Trainable, evaluable.Evaluable):
return preds[key]
# pylint: enable=protected-access
- def get_variable_names(self):
- return self._estimator.get_variable_names()
-
def export(self, export_dir, signature_fn=None,
input_fn=None, default_batch_size=1,
exports_to_keep=None):
"""See BaseEstimator.export."""
+ return self.export_with_defaults(
+ export_dir=export_dir,
+ signature_fn=signature_fn,
+ input_fn=input_fn,
+ default_batch_size=default_batch_size,
+ exports_to_keep=exports_to_keep)
+
+ def export_with_defaults(
+ self,
+ export_dir,
+ signature_fn=None,
+ input_fn=None,
+ default_batch_size=1,
+ exports_to_keep=None):
+ """Same as BaseEstimator.export, but uses some defaults."""
def default_input_fn(unused_estimator, examples):
return layers.parse_feature_columns_from_examples(
examples, self._feature_columns)
- return self._estimator.export(export_dir=export_dir,
- signature_fn=signature_fn,
- input_fn=input_fn or default_input_fn,
- default_batch_size=default_batch_size,
- exports_to_keep=exports_to_keep)
-
- @experimental
- def export_savedmodel(self,
- export_dir_base,
- input_fn,
- default_output_alternative_key=None,
- assets_extra=None,
- as_text=False,
- exports_to_keep=None):
- return self._estimator.export_savedmodel(
- export_dir_base,
- input_fn,
- default_output_alternative_key=default_output_alternative_key,
- assets_extra=assets_extra,
- as_text=as_text,
- exports_to_keep=exports_to_keep)
-
- @property
- def weights_(self):
- values = {}
- optimizer_regex = r".*/"+self._optimizer.get_name() + r"(_\d)?$"
- for name in self.get_variable_names():
- if (name.startswith("linear/") and
- name != "linear/bias_weight" and
- not re.match(optimizer_regex, name)):
- values[name] = self.get_variable_value(name)
- if len(values) == 1:
- return values[list(values.keys())[0]]
- return values
-
- @property
- def bias_(self):
- return self.get_variable_value("linear/bias_weight")
+ return super(SVM, self).export(export_dir=export_dir,
+ signature_fn=signature_fn,
+ input_fn=input_fn or default_input_fn,
+ default_batch_size=default_batch_size,
+ exports_to_keep=exports_to_keep)
diff --git a/tensorflow/contrib/learn/python/learn/experiment.py b/tensorflow/contrib/learn/python/learn/experiment.py
index 3bc5013540..ed0e546442 100644
--- a/tensorflow/contrib/learn/python/learn/experiment.py
+++ b/tensorflow/contrib/learn/python/learn/experiment.py
@@ -139,8 +139,8 @@ class Experiment(object):
self._continuous_eval_throttle_secs = continuous_eval_throttle_secs
self._min_eval_frequency = min_eval_frequency
self._delay_workers_by_global_step = delay_workers_by_global_step
+ self._train_monitors = train_monitors or []
# Mutable fields, using the setters.
- self.train_monitors = train_monitors
self.eval_hooks = eval_hooks
self.export_strategies = export_strategies
self.continuous_eval_predicate_fn = continuous_eval_predicate_fn
@@ -170,12 +170,9 @@ class Experiment(object):
return self._eval_steps
@property
- def train_monitors(self):
- return self._train_monitors
-
- @train_monitors.setter
- def train_monitors(self, value):
- self._train_monitors = value or []
+ def train_hooks(self):
+ """Returns a shallow copy of train hooks for inspecting."""
+ return [m for m in self._train_monitors]
@property
def eval_hooks(self):
@@ -232,6 +229,10 @@ class Experiment(object):
raise ValueError("`export_strategies` must be an ExportStrategy, "
"a list of ExportStrategies, or None.")
+ def extend_train_hooks(self, additional_hooks):
+ """Extends the hooks for training."""
+ self._train_monitors.extend(additional_hooks)
+
def train(self, delay_secs=None):
"""Fit the estimator using the training data.
@@ -378,7 +379,8 @@ class Experiment(object):
steps=self._eval_steps,
metrics=self._eval_metrics,
name=name,
- checkpoint_path=latest_path)
+ checkpoint_path=latest_path,
+ hooks=self._eval_hooks)
# Ensure eval result is not None for next round of evaluation.
if not eval_result:
eval_result = {}
@@ -454,14 +456,15 @@ class Experiment(object):
self._train_monitors += [monitors.ValidationMonitor(
input_fn=self._eval_input_fn, eval_steps=self._eval_steps,
metrics=self._eval_metrics, every_n_steps=self._min_eval_frequency,
- name=eval_dir_suffix,
+ name=eval_dir_suffix, hooks=self._eval_hooks
)]
self.train(delay_secs=0)
eval_result = self._estimator.evaluate(input_fn=self._eval_input_fn,
steps=self._eval_steps,
metrics=self._eval_metrics,
- name=eval_dir_suffix)
+ name=eval_dir_suffix,
+ hooks=self._eval_hooks)
export_results = self._maybe_export(eval_result)
return eval_result, export_results
diff --git a/tensorflow/contrib/learn/python/learn/experiment_test.py b/tensorflow/contrib/learn/python/learn/experiment_test.py
index 8b43973bb8..096d334e8c 100644
--- a/tensorflow/contrib/learn/python/learn/experiment_test.py
+++ b/tensorflow/contrib/learn/python/learn/experiment_test.py
@@ -42,6 +42,7 @@ from tensorflow.python.platform import test
from tensorflow.python.platform import tf_logging
from tensorflow.python.training import saver
from tensorflow.python.training import server_lib
+from tensorflow.python.training import session_run_hook
from tensorflow.python.util import compat
from tensorflow.python.util.all_util import reveal_undocumented
@@ -74,6 +75,7 @@ class TestEstimator(evaluable.Evaluable, trainable.Trainable):
self._max_evals = max_evals
self.export_count = 0
self.monitors = []
+ self.eval_hooks = []
self._config = config or run_config.RunConfig()
self._model_dir = tempfile.mkdtemp()
@@ -87,6 +89,8 @@ class TestEstimator(evaluable.Evaluable, trainable.Trainable):
def evaluate(self, **kwargs):
tf_logging.info('evaluate called with args: %s' % kwargs)
+ if 'hooks' in kwargs:
+ self.eval_hooks = kwargs['hooks']
self.eval_count += 1
if self.eval_count > self._max_evals:
tf_logging.info('Ran %d evals. Done.' % self.eval_count)
@@ -109,14 +113,18 @@ class TestEstimator(evaluable.Evaluable, trainable.Trainable):
self.monitors = kwargs['monitors']
return [(key, kwargs[key]) for key in sorted(kwargs.keys())]
- def export_savedmodel(self, export_dir_base, export_input_fn, **kwargs):
+ def export_savedmodel(self, export_dir_base, serving_input_fn, **kwargs):
tf_logging.info('export_savedmodel called with args: %s, %s, %s' %
- (export_dir_base, export_input_fn, kwargs))
+ (export_dir_base, serving_input_fn, kwargs))
self.export_count += 1
return os.path.join(
compat.as_bytes(export_dir_base), compat.as_bytes('bogus_timestamp'))
+class _NoopHook(session_run_hook.SessionRunHook):
+ pass
+
+
class ExperimentTest(test.TestCase):
def _cluster_spec(self):
@@ -253,52 +261,63 @@ class ExperimentTest(test.TestCase):
def test_evaluate(self):
est = TestEstimator()
est.fake_checkpoint()
+ noop_hook = _NoopHook()
ex = experiment.Experiment(
est,
train_input_fn='train_input',
eval_input_fn='eval_input',
eval_metrics='eval_metrics',
+ eval_hooks=[noop_hook],
eval_steps='steps',
eval_delay_secs=0)
ex.evaluate()
- self.assertEquals(1, est.eval_count)
self.assertEquals(0, est.fit_count)
+ self.assertEquals(1, est.eval_count)
+ self.assertEquals([noop_hook], est.eval_hooks)
def test_evaluate_delay(self):
est = TestEstimator()
est.fake_checkpoint()
+ noop_hook = _NoopHook()
ex = experiment.Experiment(
- est, train_input_fn='train_input', eval_input_fn='eval_input')
+ est, train_input_fn='train_input', eval_input_fn='eval_input',
+ eval_hooks=[noop_hook])
for delay in [0, 1, 3]:
with test.mock.patch('time.sleep', SheepCounter()) as sheep:
ex.evaluate(delay_secs=delay)
self.assertAlmostEqual(delay, sheep.total_time, delta=0.1)
+ self.assertEquals([noop_hook], est.eval_hooks)
def test_continuous_eval(self):
est = TestEstimator()
est.fake_checkpoint()
+ noop_hook = _NoopHook()
ex = experiment.Experiment(
est,
train_input_fn='train_input',
eval_input_fn='eval_input',
eval_metrics='eval_metrics',
+ eval_hooks=[noop_hook],
eval_delay_secs=0,
continuous_eval_throttle_secs=0)
self.assertRaises(
StopIteration, ex.continuous_eval, evaluate_checkpoint_only_once=False)
- self.assertEquals(6, est.eval_count)
self.assertEquals(0, est.fit_count)
+ self.assertEquals(6, est.eval_count)
+ self.assertEquals([noop_hook], est.eval_hooks)
def test_continuous_eval_throttle_delay(self):
for delay in [0, 1, 2]:
est = TestEstimator()
est.fake_checkpoint()
+ noop_hook = _NoopHook()
ex = experiment.Experiment(
est,
train_input_fn='train_input',
eval_input_fn='eval_input',
eval_metrics='eval_metrics',
+ eval_hooks=[noop_hook],
continuous_eval_throttle_secs=delay,
eval_delay_secs=0)
with test.mock.patch('time.sleep', SheepCounter()) as sheep:
@@ -311,6 +330,7 @@ class ExperimentTest(test.TestCase):
def test_continuous_eval_predicate_fn(self):
est = TestEstimator()
est.fake_checkpoint()
+ noop_hook = _NoopHook()
def _predicate_fn(unused_eval_result):
return est.eval_count < 3
@@ -320,20 +340,24 @@ class ExperimentTest(test.TestCase):
train_input_fn='train_input',
eval_input_fn='eval_input',
eval_metrics='eval_metrics',
+ eval_hooks=[noop_hook],
eval_delay_secs=0,
continuous_eval_throttle_secs=0,
continuous_eval_predicate_fn=_predicate_fn)
ex.continuous_eval(evaluate_checkpoint_only_once=False)
- self.assertEquals(3, est.eval_count)
self.assertEquals(0, est.fit_count)
+ self.assertEquals(3, est.eval_count)
+ self.assertEquals([noop_hook], est.eval_hooks)
def test_run_local(self):
est = TestEstimator()
+ noop_hook = _NoopHook()
ex = experiment.Experiment(
est,
train_input_fn='train_input',
eval_input_fn='eval_input',
eval_metrics='eval_metrics',
+ eval_hooks=[noop_hook],
train_steps=100,
eval_steps=100,
local_eval_frequency=10)
@@ -341,17 +365,42 @@ class ExperimentTest(test.TestCase):
self.assertEquals(1, est.fit_count)
self.assertEquals(1, est.eval_count)
self.assertEquals(1, len(est.monitors))
+ self.assertEquals([noop_hook], est.eval_hooks)
self.assertTrue(isinstance(est.monitors[0], monitors.ValidationMonitor))
+ def test_train_monitors_returns_shallow_copy(self):
+ noop_hook = _NoopHook()
+ ex = experiment.Experiment(
+ TestEstimator(),
+ train_input_fn='train_input',
+ eval_input_fn='eval_input',
+ eval_metrics='eval_metrics',
+ train_monitors=[noop_hook],
+ train_steps=100,
+ eval_steps=100,
+ local_eval_frequency=10)
+ self.assertAllEqual([noop_hook], ex.train_hooks)
+
+ another_noop_hook = _NoopHook()
+ # Assert that the property getter returns a shallow copy.
+ ex.train_hooks.extend([another_noop_hook])
+ self.assertAllEqual([noop_hook], ex.train_hooks)
+
+ # Assert that the extend API mutates the monitors.
+ ex.extend_train_hooks([another_noop_hook])
+ self.assertAllEqual([noop_hook, another_noop_hook], ex.train_hooks)
+
def test_train_and_evaluate(self):
est = TestEstimator()
+ noop_hook = _NoopHook()
export_strategy = saved_model_export_utils.make_export_strategy(
- est, 'export_input')
+ est, 'export_input', exports_to_keep=None)
ex = experiment.Experiment(
est,
train_input_fn='train_input',
eval_input_fn='eval_input',
eval_metrics='eval_metrics',
+ eval_hooks=[noop_hook],
train_steps=100,
eval_steps=100,
export_strategies=export_strategy)
@@ -360,6 +409,7 @@ class ExperimentTest(test.TestCase):
self.assertEquals(1, est.eval_count)
self.assertEquals(1, est.export_count)
self.assertEquals(1, len(est.monitors))
+ self.assertEquals([noop_hook], est.eval_hooks)
self.assertTrue(isinstance(est.monitors[0], monitors.ValidationMonitor))
@test.mock.patch.object(server_lib, 'Server')
diff --git a/tensorflow/contrib/learn/python/learn/monitors.py b/tensorflow/contrib/learn/python/learn/monitors.py
index d8fe2315da..ab6ea0fb02 100644
--- a/tensorflow/contrib/learn/python/learn/monitors.py
+++ b/tensorflow/contrib/learn/python/learn/monitors.py
@@ -618,7 +618,8 @@ class ValidationMonitor(EveryN):
def __init__(self, x=None, y=None, input_fn=None, batch_size=None,
eval_steps=None,
- every_n_steps=100, metrics=None, early_stopping_rounds=None,
+ every_n_steps=100, metrics=None, hooks=None,
+ early_stopping_rounds=None,
early_stopping_metric="loss",
early_stopping_metric_minimize=True, name=None):
"""Initializes a ValidationMonitor.
@@ -632,6 +633,8 @@ class ValidationMonitor(EveryN):
every_n_steps: Check for new checkpoints to evaluate every N steps. If a
new checkpoint is found, it is evaluated. See `EveryN`.
metrics: See `BaseEstimator.evaluate`.
+ hooks: A list of `SessionRunHook` hooks to pass to the
+ `Estimator`'s `evaluate` function.
early_stopping_rounds: `int`. If the metric indicated by
`early_stopping_metric` does not change according to
`early_stopping_metric_minimize` for this many steps, then training
@@ -660,6 +663,7 @@ class ValidationMonitor(EveryN):
self.batch_size = batch_size
self.eval_steps = eval_steps
self.metrics = metrics
+ self.hooks = hooks
self.early_stopping_rounds = early_stopping_rounds
self.early_stopping_metric = early_stopping_metric
self.early_stopping_metric_minimize = early_stopping_metric_minimize
@@ -709,7 +713,8 @@ class ValidationMonitor(EveryN):
# Run evaluation and log it.
validation_outputs = self._estimator.evaluate(
x=self.x, y=self.y, input_fn=self.input_fn, batch_size=self.batch_size,
- steps=self.eval_steps, metrics=self.metrics, name=self.name)
+ steps=self.eval_steps, metrics=self.metrics, hooks=self.hooks,
+ name=self.name)
stats = []
for name in validation_outputs:
stats.append("%s = %s" % (name, str(validation_outputs[name])))
diff --git a/tensorflow/contrib/learn/python/learn/utils/__init__.py b/tensorflow/contrib/learn/python/learn/utils/__init__.py
index f313699c14..74236da979 100644
--- a/tensorflow/contrib/learn/python/learn/utils/__init__.py
+++ b/tensorflow/contrib/learn/python/learn/utils/__init__.py
@@ -20,3 +20,7 @@ from __future__ import division
from __future__ import print_function
from tensorflow.contrib.learn.python.learn.utils.export import export_estimator
+from tensorflow.contrib.learn.python.learn.utils.input_fn_utils import build_default_serving_input_fn
+from tensorflow.contrib.learn.python.learn.utils.input_fn_utils import build_parsing_serving_input_fn
+from tensorflow.contrib.learn.python.learn.utils.saved_model_export_utils import make_export_strategy
+
diff --git a/tensorflow/contrib/learn/python/learn/utils/export_test.py b/tensorflow/contrib/learn/python/learn/utils/export_test.py
index caae60029a..ce1d73256a 100644
--- a/tensorflow/contrib/learn/python/learn/utils/export_test.py
+++ b/tensorflow/contrib/learn/python/learn/utils/export_test.py
@@ -112,7 +112,7 @@ class ExportTest(test.TestCase):
def testExportMonitorInputFeatureKeyMissing(self):
random.seed(42)
- def _export_input_fn():
+ def _serving_input_fn():
return {
_X_KEY:
random_ops.random_uniform(
@@ -123,7 +123,7 @@ class ExportTest(test.TestCase):
monitor = learn.monitors.ExportMonitor(
every_n_steps=1,
export_dir=tempfile.mkdtemp() + 'export/',
- input_fn=_export_input_fn,
+ input_fn=_serving_input_fn,
input_feature_key=input_feature_key,
exports_to_keep=2,
signature_fn=export.generic_signature_fn)
@@ -135,13 +135,13 @@ class ExportTest(test.TestCase):
random.seed(42)
input_feature_key = 'my_example_key'
- def _export_input_fn():
+ def _serving_input_fn():
return {input_feature_key: None}, None
monitor = learn.monitors.ExportMonitor(
every_n_steps=1,
export_dir=tempfile.mkdtemp() + 'export/',
- input_fn=_export_input_fn,
+ input_fn=_serving_input_fn,
input_feature_key=input_feature_key,
exports_to_keep=2,
signature_fn=export.generic_signature_fn)
@@ -154,7 +154,7 @@ class ExportTest(test.TestCase):
random.seed(42)
input_feature_key = 'my_example_key'
- def _export_input_fn():
+ def _serving_input_fn():
return {
input_feature_key:
None,
@@ -166,7 +166,7 @@ class ExportTest(test.TestCase):
monitor = learn.monitors.ExportMonitor(
every_n_steps=1,
export_dir=tempfile.mkdtemp() + 'export/',
- input_fn=_export_input_fn,
+ input_fn=_serving_input_fn,
input_feature_key=input_feature_key,
exports_to_keep=2,
signature_fn=export.generic_signature_fn)
@@ -178,7 +178,7 @@ class ExportTest(test.TestCase):
random.seed(42)
input_feature_key = 'my_example_key'
- def _export_input_fn():
+ def _serving_input_fn():
return {
input_feature_key:
array_ops.placeholder(
@@ -188,7 +188,7 @@ class ExportTest(test.TestCase):
monitor = learn.monitors.ExportMonitor(
every_n_steps=1,
export_dir=tempfile.mkdtemp() + 'export/',
- input_fn=_export_input_fn,
+ input_fn=_serving_input_fn,
input_feature_key=input_feature_key,
exports_to_keep=2,
signature_fn=export.generic_signature_fn)
@@ -200,7 +200,7 @@ class ExportTest(test.TestCase):
random.seed(42)
input_feature_key = 'my_example_key'
- def _export_input_fn():
+ def _serving_input_fn():
return {
input_feature_key:
array_ops.placeholder(
@@ -214,7 +214,7 @@ class ExportTest(test.TestCase):
monitor = learn.monitors.ExportMonitor(
every_n_steps=1,
export_dir=export_dir,
- input_fn=_export_input_fn,
+ input_fn=_serving_input_fn,
input_feature_key=input_feature_key,
exports_to_keep=2,
signature_fn=export.generic_signature_fn)
diff --git a/tensorflow/contrib/learn/python/learn/utils/input_fn_utils.py b/tensorflow/contrib/learn/python/learn/utils/input_fn_utils.py
index 18bfdc61c6..1a51971619 100644
--- a/tensorflow/contrib/learn/python/learn/utils/input_fn_utils.py
+++ b/tensorflow/contrib/learn/python/learn/utils/input_fn_utils.py
@@ -41,7 +41,7 @@ InputFnOps = collections.namedtuple('InputFnOps',
'default_inputs'])
-def build_parsing_serving_input_fn(feature_spec, default_batch_size=1):
+def build_parsing_serving_input_fn(feature_spec, default_batch_size=None):
"""Build an input_fn appropriate for serving, expecting fed tf.Examples.
Creates an input_fn that expects a serialized tf.Example fed into a string
@@ -52,6 +52,7 @@ def build_parsing_serving_input_fn(feature_spec, default_batch_size=1):
Args:
feature_spec: a dict of string to `VarLenFeature`/`FixedLenFeature`.
default_batch_size: the number of query examples expected per batch.
+ Leave unset for variable batch size (recommended).
Returns:
An input_fn suitable for use in serving.
@@ -68,7 +69,7 @@ def build_parsing_serving_input_fn(feature_spec, default_batch_size=1):
return input_fn
-def build_default_serving_input_fn(features, default_batch_size=1):
+def build_default_serving_input_fn(features, default_batch_size=None):
"""Build an input_fn appropriate for serving, expecting feature Tensors.
Creates an input_fn that expects all features to be fed directly.
@@ -78,6 +79,7 @@ def build_default_serving_input_fn(features, default_batch_size=1):
Args:
features: a dict of string to `Tensor`.
default_batch_size: the number of query examples expected per batch.
+ Leave unset for variable batch size (recommended).
Returns:
An input_fn suitable for use in serving.
diff --git a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py
index 9e452d0905..8d53b01511 100644
--- a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py
+++ b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py
@@ -19,7 +19,6 @@ from __future__ import division
from __future__ import print_function
import os
-import re
import time
from tensorflow.contrib.learn.python.learn import export_strategy
@@ -208,7 +207,7 @@ def get_timestamped_export_dir(export_dir_base):
Each export is written into a new subdirectory named using the
current time. This guarantees monotonically increasing version
numbers even across multiple runs of the pipeline.
- The timestamp used is the number of milliseconds since epoch UTC.
+ The timestamp used is the number of seconds since epoch UTC.
Args:
export_dir_base: A string containing a directory to write the exported
@@ -216,7 +215,7 @@ def get_timestamped_export_dir(export_dir_base):
Returns:
The full path of the new subdirectory (which is not actually created yet).
"""
- export_timestamp = int(time.time() * 1e3)
+ export_timestamp = int(time.time())
export_dir = os.path.join(
compat.as_bytes(export_dir_base),
@@ -241,37 +240,63 @@ def garbage_collect_exports(export_dir_base, exports_to_keep):
keep_filter = gc.largest_export_versions(exports_to_keep)
delete_filter = gc.negation(keep_filter)
- # Export dir must not end with / or it will break the re match below.
- if export_dir_base.endswith('/'):
- export_dir_base = export_dir_base[:-1]
-
# create a simple parser that pulls the export_version from the directory.
def parser(path):
- match = re.match('^' + export_dir_base + '/(\\d{13})$', path.path)
- if not match:
+ filename = os.path.basename(path.path)
+ if not (len(filename) == 10 and filename.isdigit()):
return None
- return path._replace(export_version=int(match.group(1)))
+ return path._replace(export_version=int(filename))
for p in delete_filter(gc.get_paths(export_dir_base, parser=parser)):
gfile.DeleteRecursively(p.path)
-def make_export_strategy(export_input_fn,
+def make_export_strategy(serving_input_fn,
default_output_alternative_key='default',
assets_extra=None,
as_text=False,
- exports_to_keep=None):
- """Create an ExportStrategy for use with Experiment."""
+ exports_to_keep=5):
+ """Create an ExportStrategy for use with Experiment.
+
+ Args:
+ serving_input_fn: A function that takes no arguments and returns an
+ `InputFnOps`.
+ default_output_alternative_key: the name of the head to serve when an
+ incoming serving request does not explicitly request a specific head.
+ Not needed for single-headed models.
+ assets_extra: A dict specifying how to populate the assets.extra directory
+ within the exported SavedModel. Each key should give the destination
+ path (including the filename) relative to the assets.extra directory.
+ The corresponding value gives the full path of the source file to be
+ copied. For example, the simple case of copying a single file without
+ renaming it is specified as
+ `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`.
+ as_text: whether to write the SavedModel proto in text format.
+ exports_to_keep: Number of exports to keep. Older exports will be
+ garbage-collected. Defaults to 5. Set to None to disable garbage
+ collection.
+
+ Returns:
+ an ExportStrategy that can be passed to the Experiment constructor.
+ """
def export_fn(estimator, export_dir_base):
- """Exports the given Estimator as a SavedModel."""
+ """Exports the given Estimator as a SavedModel.
+
+ Args:
+ estimator: the Estimator to export.
+ export_dir_base: A string containing a directory to write the exported
+ graph and checkpoints.
+
+ Returns:
+ The string path to the exported directory.
+ """
export_result = estimator.export_savedmodel(
export_dir_base,
- export_input_fn,
+ serving_input_fn,
default_output_alternative_key=default_output_alternative_key,
assets_extra=assets_extra,
- as_text=as_text,
- exports_to_keep=exports_to_keep)
+ as_text=as_text)
garbage_collect_exports(export_dir_base, exports_to_keep)
return export_result
diff --git a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils_test.py b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils_test.py
index 955e14ae44..e22f11943b 100644
--- a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils_test.py
+++ b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils_test.py
@@ -240,21 +240,21 @@ class SavedModelExportUtilsTest(test.TestCase):
export_dir_base = tempfile.mkdtemp() + "export/"
export_dir_1 = saved_model_export_utils.get_timestamped_export_dir(
export_dir_base)
- time.sleep(0.001)
+ time.sleep(1)
export_dir_2 = saved_model_export_utils.get_timestamped_export_dir(
export_dir_base)
- time.sleep(0.001)
+ time.sleep(1)
export_dir_3 = saved_model_export_utils.get_timestamped_export_dir(
export_dir_base)
- # Export directories should be named using a timestamp that is milliseconds
- # since epoch. Such a timestamp is 13 digits long.
+ # Export directories should be named using a timestamp that is seconds
+ # since epoch. Such a timestamp is 10 digits long.
time_1 = os.path.basename(export_dir_1)
- self.assertEqual(13, len(time_1))
+ self.assertEqual(10, len(time_1))
time_2 = os.path.basename(export_dir_2)
- self.assertEqual(13, len(time_2))
+ self.assertEqual(10, len(time_2))
time_3 = os.path.basename(export_dir_3)
- self.assertEqual(13, len(time_3))
+ self.assertEqual(10, len(time_3))
self.assertTrue(int(time_1) < int(time_2))
self.assertTrue(int(time_2) < int(time_3))
@@ -283,10 +283,10 @@ class SavedModelExportUtilsTest(test.TestCase):
def test_make_export_strategy(self):
"""Only tests that an ExportStrategy instance is created."""
- def _export_input_fn():
+ def _serving_input_fn():
return array_ops.constant([1]), None
export_strategy = saved_model_export_utils.make_export_strategy(
- export_input_fn=_export_input_fn,
+ serving_input_fn=_serving_input_fn,
default_output_alternative_key="default",
assets_extra={"from/path": "to/path"},
as_text=False,
@@ -299,7 +299,7 @@ def _create_test_export_dir(export_dir_base):
export_dir = saved_model_export_utils.get_timestamped_export_dir(
export_dir_base)
gfile.MkDir(export_dir)
- time.sleep(0.001)
+ time.sleep(1)
return export_dir
diff --git a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_composition_test.py b/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_composition_test.py
index 2f60554104..6309d36258 100644
--- a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_composition_test.py
+++ b/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_composition_test.py
@@ -200,16 +200,16 @@ class NonSquareLinearOperatorCompositionTest(
operator = linalg.LinearOperatorComposition(operators)
self.assertAllEqual((2, 3, 5), operator.shape)
- def test_dynamic_shapes_when_statically_available(self):
+ def test_shape_tensors_when_statically_available(self):
operators = [
linalg.LinearOperatorMatrix(rng.rand(2, 3, 4)),
linalg.LinearOperatorMatrix(rng.rand(2, 4, 5))
]
operator = linalg.LinearOperatorComposition(operators)
with self.test_session():
- self.assertAllEqual((2, 3, 5), operator.shape_dynamic().eval())
+ self.assertAllEqual((2, 3, 5), operator.shape_tensor().eval())
- def test_dynamic_shapes_when_only_dynamically_available(self):
+ def test_shape_tensors_when_only_dynamically_available(self):
mat_1 = rng.rand(1, 2, 3, 4)
mat_2 = rng.rand(1, 2, 4, 5)
mat_ph_1 = array_ops.placeholder(dtypes.float64)
@@ -223,7 +223,7 @@ class NonSquareLinearOperatorCompositionTest(
operator = linalg.LinearOperatorComposition(operators)
with self.test_session():
self.assertAllEqual(
- (1, 2, 3, 5), operator.shape_dynamic().eval(feed_dict=feed_dict))
+ (1, 2, 3, 5), operator.shape_tensor().eval(feed_dict=feed_dict))
if __name__ == "__main__":
diff --git a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_test.py b/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_test.py
index 8f77c5e6e3..c099194eed 100644
--- a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_test.py
+++ b/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_test.py
@@ -31,7 +31,7 @@ rng = np.random.RandomState(123)
class LinearOperatorShape(linalg.LinearOperator):
- """LinearOperator that implements the methods ._shape and _shape_dynamic."""
+ """LinearOperator that implements the methods ._shape and _shape_tensor."""
def __init__(self,
shape,
@@ -49,7 +49,7 @@ class LinearOperatorShape(linalg.LinearOperator):
def _shape(self):
return tensor_shape.TensorShape(self._stored_shape)
- def _shape_dynamic(self):
+ def _shape_tensor(self):
return constant_op.constant(self._stored_shape, dtype=dtypes.int32)
@@ -71,7 +71,7 @@ class LinearOperatorApplyOnly(linalg.LinearOperator):
def _shape(self):
return self._matrix.get_shape()
- def _shape_dynamic(self):
+ def _shape_tensor(self):
return array_ops.shape(self._matrix)
def _apply(self, x, adjoint=False):
@@ -96,11 +96,11 @@ class LinearOperatorTest(test.TestCase):
shape = (1, 2, 3, 4)
operator = LinearOperatorShape(shape)
- self.assertAllEqual(shape, operator.shape_dynamic().eval())
- self.assertAllEqual(4, operator.tensor_rank_dynamic().eval())
- self.assertAllEqual((1, 2), operator.batch_shape_dynamic().eval())
- self.assertAllEqual(4, operator.domain_dimension_dynamic().eval())
- self.assertAllEqual(3, operator.range_dimension_dynamic().eval())
+ self.assertAllEqual(shape, operator.shape_tensor().eval())
+ self.assertAllEqual(4, operator.tensor_rank_tensor().eval())
+ self.assertAllEqual((1, 2), operator.batch_shape_tensor().eval())
+ self.assertAllEqual(4, operator.domain_dimension_tensor().eval())
+ self.assertAllEqual(3, operator.range_dimension_tensor().eval())
def test_is_x_properties(self):
operator = LinearOperatorShape(
@@ -120,7 +120,7 @@ class LinearOperatorTest(test.TestCase):
self.assertAllEqual((2, 3, 4), operator_dense.get_shape())
self.assertAllClose(matrix, operator_dense.eval())
- def test_generic_to_dense_method_non_square_matrix_dynamic(self):
+ def test_generic_to_dense_method_non_square_matrix_tensor(self):
matrix = rng.randn(2, 3, 4)
matrix_ph = array_ops.placeholder(dtypes.float64)
operator = LinearOperatorApplyOnly(matrix_ph)
diff --git a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_util_test.py b/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_util_test.py
index 4eac01092f..bf6f8f8302 100644
--- a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_util_test.py
+++ b/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_util_test.py
@@ -96,7 +96,7 @@ class DomainDimensionStubOperator(object):
def __init__(self, domain_dimension):
self._domain_dimension = ops.convert_to_tensor(domain_dimension)
- def domain_dimension_dynamic(self):
+ def domain_dimension_tensor(self):
return self._domain_dimension
diff --git a/tensorflow/contrib/linalg/python/ops/linear_operator.py b/tensorflow/contrib/linalg/python/ops/linear_operator.py
index e229820edc..2467603605 100644
--- a/tensorflow/contrib/linalg/python/ops/linear_operator.py
+++ b/tensorflow/contrib/linalg/python/ops/linear_operator.py
@@ -180,13 +180,15 @@ class LinearOperator(object):
self._is_positive_definite = is_positive_definite
self._name = name or type(self).__name__
- # We will cache some values to avoid repeatedly adding shape
- # manipulation ops to the graph. Cleaner.
- self._cached_shape_dynamic = None
- self._cached_batch_shape_dynamic = None
- self._cached_domain_dimension_dynamic = None
- self._cached_range_dimension_dynamic = None
- self._cached_tensor_rank_dynamic = None
+ # We will cache some tensors to avoid repeatedly adding shape
+ # manipulation ops to the graph.
+ # Naming convention:
+ # self._cached_X_tensor is the cached version of self._X_tensor.
+ self._cached_shape_tensor = None
+ self._cached_batch_shape_tensor = None
+ self._cached_domain_dimension_tensor = None
+ self._cached_range_dimension_tensor = None
+ self._cached_tensor_rank_tensor = None
@contextlib.contextmanager
def _name_scope(self, name=None, values=None):
@@ -240,10 +242,10 @@ class LinearOperator(object):
"""
return self._shape()
- def _shape_dynamic(self):
- raise NotImplementedError("_shape_dynamic is not implemented.")
+ def _shape_tensor(self):
+ raise NotImplementedError("_shape_tensor is not implemented.")
- def shape_dynamic(self, name="shape_dynamic"):
+ def shape_tensor(self, name="shape_tensor"):
"""Shape of this `LinearOperator`, determined at runtime.
If this operator acts like the batch matrix `A` with
@@ -258,14 +260,14 @@ class LinearOperator(object):
"""
with self._name_scope(name):
# Be clean by avoiding adding shape Ops to the graph too many times.
- if self._cached_shape_dynamic is None:
+ if self._cached_shape_tensor is None:
# Prefer to use statically defined shape if available.
if self.shape.is_fully_defined():
- self._cached_shape_dynamic = linear_operator_util.shape_tensor(
+ self._cached_shape_tensor = linear_operator_util.shape_tensor(
self.shape.as_list())
else:
- self._cached_shape_dynamic = self._shape_dynamic()
- return self._cached_shape_dynamic
+ self._cached_shape_tensor = self._shape_tensor()
+ return self._cached_shape_tensor
@property
def batch_shape(self):
@@ -281,7 +283,7 @@ class LinearOperator(object):
# Derived classes get this "for free" once .shape is implemented.
return self.shape[:-2]
- def batch_shape_dynamic(self, name="batch_shape_dynamic"):
+ def batch_shape_tensor(self, name="batch_shape_tensor"):
"""Shape of batch dimensions of this operator, determined at runtime.
If this operator acts like the batch matrix `A` with
@@ -296,14 +298,14 @@ class LinearOperator(object):
"""
# Derived classes get this "for free" once .shape() is implemented.
with self._name_scope(name):
- if self._cached_batch_shape_dynamic is None:
+ if self._cached_batch_shape_tensor is None:
# Prefer to use statically defined shape if available.
if self.batch_shape.is_fully_defined():
- self._cached_batch_shape_dynamic = linear_operator_util.shape_tensor(
+ self._cached_batch_shape_tensor = linear_operator_util.shape_tensor(
self.batch_shape.as_list(), name="batch_shape")
else:
- self._cached_batch_shape_dynamic = self.shape_dynamic()[:-2]
- return self._cached_batch_shape_dynamic
+ self._cached_batch_shape_tensor = self.shape_tensor()[:-2]
+ return self._cached_batch_shape_tensor
@property
def tensor_rank(self, name="tensor_rank"):
@@ -322,7 +324,7 @@ class LinearOperator(object):
with self._name_scope(name):
return self.shape.ndims
- def tensor_rank_dynamic(self, name="tensor_rank_dynamic"):
+ def tensor_rank_tensor(self, name="tensor_rank_tensor"):
"""Rank (in the sense of tensors) of matrix corresponding to this operator.
If this operator acts like the batch matrix `A` with
@@ -336,15 +338,15 @@ class LinearOperator(object):
"""
# Derived classes get this "for free" once .shape() is implemented.
with self._name_scope(name):
- if self._cached_tensor_rank_dynamic is None:
+ if self._cached_tensor_rank_tensor is None:
# Prefer to use statically defined shape if available.
if self.tensor_rank is not None:
- self._cached_tensor_rank_dynamic = ops.convert_to_tensor(
+ self._cached_tensor_rank_tensor = ops.convert_to_tensor(
self.tensor_rank)
else:
- self._cached_tensor_rank_dynamic = array_ops.size(
- self.shape_dynamic())
- return self._cached_tensor_rank_dynamic
+ self._cached_tensor_rank_tensor = array_ops.size(
+ self.shape_tensor())
+ return self._cached_tensor_rank_tensor
@property
def domain_dimension(self):
@@ -359,7 +361,7 @@ class LinearOperator(object):
# Derived classes get this "for free" once .shape is implemented.
return self.shape[-1]
- def domain_dimension_dynamic(self, name="domain_dimension_dynamic"):
+ def domain_dimension_tensor(self, name="domain_dimension_tensor"):
"""Dimension (in the sense of vector spaces) of the domain of this operator.
Determined at runtime.
@@ -375,14 +377,14 @@ class LinearOperator(object):
"""
# Derived classes get this "for free" once .shape() is implemented.
with self._name_scope(name):
- if self._cached_domain_dimension_dynamic is None:
+ if self._cached_domain_dimension_tensor is None:
# Prefer to use statically defined shape if available.
if self.domain_dimension.value is not None:
- self._cached_domain_dimension_dynamic = ops.convert_to_tensor(
+ self._cached_domain_dimension_tensor = ops.convert_to_tensor(
self.domain_dimension.value)
else:
- self._cached_domain_dimension_dynamic = self.shape_dynamic()[-1]
- return self._cached_domain_dimension_dynamic
+ self._cached_domain_dimension_tensor = self.shape_tensor()[-1]
+ return self._cached_domain_dimension_tensor
@property
def range_dimension(self):
@@ -397,7 +399,7 @@ class LinearOperator(object):
# Derived classes get this "for free" once .shape is implemented.
return self.shape[-2]
- def range_dimension_dynamic(self, name="range_dimension_dynamic"):
+ def range_dimension_tensor(self, name="range_dimension_tensor"):
"""Dimension (in the sense of vector spaces) of the range of this operator.
Determined at runtime.
@@ -413,14 +415,14 @@ class LinearOperator(object):
"""
# Derived classes get this "for free" once .shape() is implemented.
with self._name_scope(name):
- if self._cached_range_dimension_dynamic is None:
+ if self._cached_range_dimension_tensor is None:
# Prefer to use statically defined shape if available.
if self.range_dimension.value is not None:
- self._cached_range_dimension_dynamic = ops.convert_to_tensor(
+ self._cached_range_dimension_tensor = ops.convert_to_tensor(
self.range_dimension.value)
else:
- self._cached_range_dimension_dynamic = self.shape_dynamic()[-2]
- return self._cached_range_dimension_dynamic
+ self._cached_range_dimension_tensor = self.shape_tensor()[-2]
+ return self._cached_range_dimension_tensor
def _assert_non_singular(self):
raise NotImplementedError("assert_non_singular is not implemented.")
@@ -574,12 +576,12 @@ class LinearOperator(object):
if self.batch_shape.is_fully_defined():
batch_shape = self.batch_shape
else:
- batch_shape = self.batch_shape_dynamic()
+ batch_shape = self.batch_shape_tensor()
if self.domain_dimension.value is not None:
n = self.domain_dimension.value
else:
- n = self.domain_dimension_dynamic()
+ n = self.domain_dimension_tensor()
eye = linalg_ops.eye(num_rows=n, batch_shape=batch_shape, dtype=self.dtype)
return self.apply(eye)
diff --git a/tensorflow/contrib/linalg/python/ops/linear_operator_composition.py b/tensorflow/contrib/linalg/python/ops/linear_operator_composition.py
index 3e118ebbd4..81e7735841 100644
--- a/tensorflow/contrib/linalg/python/ops/linear_operator_composition.py
+++ b/tensorflow/contrib/linalg/python/ops/linear_operator_composition.py
@@ -202,7 +202,7 @@ class LinearOperatorComposition(linear_operator.LinearOperator):
return batch_shape.concatenate(matrix_shape)
- def _shape_dynamic(self):
+ def _shape_tensor(self):
# Avoid messy broadcasting if possible.
if self.shape.is_fully_defined():
return ops.convert_to_tensor(
@@ -212,14 +212,14 @@ class LinearOperatorComposition(linear_operator.LinearOperator):
# the graph. Things will fail at runtime naturally if shapes are
# incompatible.
matrix_shape = array_ops.stack([
- self.operators[0].range_dimension_dynamic(),
- self.operators[-1].domain_dimension_dynamic()
+ self.operators[0].range_dimension_tensor(),
+ self.operators[-1].domain_dimension_tensor()
])
# Dummy Tensor of zeros. Will never be materialized.
- zeros = array_ops.zeros(shape=self.operators[0].batch_shape_dynamic())
+ zeros = array_ops.zeros(shape=self.operators[0].batch_shape_tensor())
for operator in self.operators[1:]:
- zeros += array_ops.zeros(shape=operator.batch_shape_dynamic())
+ zeros += array_ops.zeros(shape=operator.batch_shape_tensor())
batch_shape = array_ops.shape(zeros)
return array_ops.concat((batch_shape, matrix_shape), 0)
diff --git a/tensorflow/contrib/linalg/python/ops/linear_operator_diag.py b/tensorflow/contrib/linalg/python/ops/linear_operator_diag.py
index d59e8be767..4700e65518 100644
--- a/tensorflow/contrib/linalg/python/ops/linear_operator_diag.py
+++ b/tensorflow/contrib/linalg/python/ops/linear_operator_diag.py
@@ -166,7 +166,7 @@ class LinearOperatorDiag(linear_operator.LinearOperator):
d_shape = self._diag.get_shape()
return d_shape.concatenate(d_shape[-1:])
- def _shape_dynamic(self):
+ def _shape_tensor(self):
d_shape = array_ops.shape(self._diag)
k = d_shape[-1]
return array_ops.concat((d_shape, [k]), 0)
diff --git a/tensorflow/contrib/linalg/python/ops/linear_operator_identity.py b/tensorflow/contrib/linalg/python/ops/linear_operator_identity.py
index 3304698ec6..6559f8b116 100644
--- a/tensorflow/contrib/linalg/python/ops/linear_operator_identity.py
+++ b/tensorflow/contrib/linalg/python/ops/linear_operator_identity.py
@@ -261,7 +261,7 @@ class LinearOperatorIdentity(BaseLinearOperatorIdentity):
batch_shape = tensor_shape.TensorShape(self._batch_shape_static)
return batch_shape.concatenate(matrix_shape)
- def _shape_dynamic(self):
+ def _shape_tensor(self):
matrix_shape = array_ops.stack(
(self._num_rows, self._num_rows), axis=0)
if self._batch_shape_arg is None:
@@ -307,7 +307,7 @@ class LinearOperatorIdentity(BaseLinearOperatorIdentity):
# Dynamic broadcast:
# Always add to an array of zeros, rather than using a "cond", since a
# cond would require copying data from GPU --> CPU.
- special_shape = array_ops.concat((self.batch_shape_dynamic(), [1, 1]), 0)
+ special_shape = array_ops.concat((self.batch_shape_tensor(), [1, 1]), 0)
zeros = array_ops.zeros(shape=special_shape, dtype=self.dtype)
return x + zeros
@@ -320,10 +320,10 @@ class LinearOperatorIdentity(BaseLinearOperatorIdentity):
return self._possibly_broadcast_batch_shape(x)
def _determinant(self):
- return array_ops.ones(shape=self.batch_shape_dynamic(), dtype=self.dtype)
+ return array_ops.ones(shape=self.batch_shape_tensor(), dtype=self.dtype)
def _log_abs_determinant(self):
- return array_ops.zeros(shape=self.batch_shape_dynamic(), dtype=self.dtype)
+ return array_ops.zeros(shape=self.batch_shape_tensor(), dtype=self.dtype)
def _solve(self, rhs, adjoint=False):
return self._apply(rhs)
@@ -566,7 +566,7 @@ class LinearOperatorScaledIdentity(BaseLinearOperatorIdentity):
batch_shape = self.multiplier.get_shape()
return batch_shape.concatenate(matrix_shape)
- def _shape_dynamic(self):
+ def _shape_tensor(self):
matrix_shape = array_ops.stack(
(self._num_rows, self._num_rows), axis=0)
diff --git a/tensorflow/contrib/linalg/python/ops/linear_operator_matrix.py b/tensorflow/contrib/linalg/python/ops/linear_operator_matrix.py
index 7ca18450d1..3b5dc7c481 100644
--- a/tensorflow/contrib/linalg/python/ops/linear_operator_matrix.py
+++ b/tensorflow/contrib/linalg/python/ops/linear_operator_matrix.py
@@ -157,7 +157,7 @@ class LinearOperatorMatrix(linear_operator.LinearOperator):
def _shape(self):
return self._matrix.get_shape()
- def _shape_dynamic(self):
+ def _shape_tensor(self):
return array_ops.shape(self._matrix)
def _apply(self, x, adjoint=False):
diff --git a/tensorflow/contrib/linalg/python/ops/linear_operator_test_util.py b/tensorflow/contrib/linalg/python/ops/linear_operator_test_util.py
index 5de9bb5d77..85cd7fcd9a 100644
--- a/tensorflow/contrib/linalg/python/ops/linear_operator_test_util.py
+++ b/tensorflow/contrib/linalg/python/ops/linear_operator_test_util.py
@@ -174,6 +174,29 @@ class LinearOperatorDerivedClassTest(test.TestCase):
feed_dict=feed_dict)
self.assertAC(op_det_v, mat_det_v)
+ def test_log_abs_det(self):
+ self._maybe_skip("log_abs_det")
+ for use_placeholder in False, True:
+ for shape in self._shapes_to_test:
+ for dtype in self._dtypes_to_test:
+ if dtype.is_complex:
+ self.skipTest(
+ "tf.matrix_determinant does not work with complex, so this "
+ "test is being skipped.")
+ with self.test_session(graph=ops.Graph()) as sess:
+ sess.graph.seed = random_seed.DEFAULT_GRAPH_SEED
+ operator, mat, feed_dict = self._operator_and_mat_and_feed_dict(
+ shape, dtype, use_placeholder=use_placeholder)
+ op_log_abs_det = operator.log_abs_determinant()
+ mat_log_abs_det = math_ops.log(
+ math_ops.abs(linalg_ops.matrix_determinant(mat)))
+ if not use_placeholder:
+ self.assertAllEqual(shape[:-2], op_log_abs_det.get_shape())
+ op_log_abs_det_v, mat_log_abs_det_v = sess.run(
+ [op_log_abs_det, mat_log_abs_det],
+ feed_dict=feed_dict)
+ self.assertAC(op_log_abs_det_v, mat_log_abs_det_v)
+
def test_apply(self):
self._maybe_skip("apply")
for use_placeholder in False, True:
@@ -262,8 +285,8 @@ class SquareLinearOperatorDerivedClassTest(LinearOperatorDerivedClassTest):
n = operator.domain_dimension.value
x_shape = batch_shape + [n, r]
else:
- batch_shape = operator.batch_shape_dynamic()
- n = operator.domain_dimension_dynamic()
+ batch_shape = operator.batch_shape_tensor()
+ n = operator.domain_dimension_tensor()
x_shape = array_ops.concat((batch_shape, [n, r]), 0)
return random_normal(x_shape, dtype=operator.dtype)
@@ -291,7 +314,7 @@ class NonSquareLinearOperatorDerivedClassTest(LinearOperatorDerivedClassTest):
@property
def _tests_to_skip(self):
"""List of test names to skip."""
- return ["solve", "det"]
+ return ["solve", "det", "log_abs_det"]
@property
def _shapes_to_test(self):
@@ -316,11 +339,11 @@ class NonSquareLinearOperatorDerivedClassTest(LinearOperatorDerivedClassTest):
n = operator.domain_dimension.value
x_shape = batch_shape + [n, r]
else:
- batch_shape = operator.batch_shape_dynamic()
+ batch_shape = operator.batch_shape_tensor()
if adjoint:
- n = operator.range_dimension_dynamic()
+ n = operator.range_dimension_tensor()
else:
- n = operator.domain_dimension_dynamic()
+ n = operator.domain_dimension_tensor()
x_shape = array_ops.concat((batch_shape, [n, r]), 0)
return random_normal(x_shape, dtype=operator.dtype)
diff --git a/tensorflow/contrib/linalg/python/ops/linear_operator_tril.py b/tensorflow/contrib/linalg/python/ops/linear_operator_tril.py
index 7c5b9b6b54..2b1fb4c04c 100644
--- a/tensorflow/contrib/linalg/python/ops/linear_operator_tril.py
+++ b/tensorflow/contrib/linalg/python/ops/linear_operator_tril.py
@@ -157,7 +157,7 @@ class LinearOperatorTriL(linear_operator.LinearOperator):
def _shape(self):
return self._tril.get_shape()
- def _shape_dynamic(self):
+ def _shape_tensor(self):
return array_ops.shape(self._tril)
def _assert_non_singular(self):
diff --git a/tensorflow/contrib/linalg/python/ops/linear_operator_util.py b/tensorflow/contrib/linalg/python/ops/linear_operator_util.py
index 44092f0c06..6e56fac2e3 100644
--- a/tensorflow/contrib/linalg/python/ops/linear_operator_util.py
+++ b/tensorflow/contrib/linalg/python/ops/linear_operator_util.py
@@ -83,10 +83,10 @@ def assert_compatible_matrix_dimensions(operator, x):
Returns:
`Assert` `Op`.
"""
- # Static checks are done in the base class. Only dynamic asserts here.
+ # Static checks are done in the base class. Only tensor asserts here.
assert_same_dd = check_ops.assert_equal(
array_ops.shape(x)[-2],
- operator.domain_dimension_dynamic(),
+ operator.domain_dimension_tensor(),
message=(
"Incompatible matrix dimensions. "
"shape[-2] of argument to be the same as this operator"))
diff --git a/tensorflow/contrib/makefile/sub_makefiles/hexagon_graph_execution/Makefile.in b/tensorflow/contrib/makefile/sub_makefiles/hexagon_graph_execution/Makefile.in
index 3bad4c42a9..986150cb3f 100644
--- a/tensorflow/contrib/makefile/sub_makefiles/hexagon_graph_execution/Makefile.in
+++ b/tensorflow/contrib/makefile/sub_makefiles/hexagon_graph_execution/Makefile.in
@@ -44,6 +44,7 @@ CXXFLAGS += -DTENSORFLOW_DISABLE_META
CXXFLAGS += -D__ANDROID_TYPES_FULL__
GRAPH_EXECUTION_SRCS := \
+tensorflow/core/kernels/hexagon/graph_transfer_utils.cc \
tensorflow/core/kernels/hexagon/graph_transferer.cc \
tensorflow/core/kernels/hexagon/hexagon_control_wrapper.cc \
tensorflow/core/kernels/hexagon/hexagon_ops_definitions.cc \
diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt
index c3f59dd84c..96acead47f 100644
--- a/tensorflow/contrib/makefile/tf_op_files.txt
+++ b/tensorflow/contrib/makefile/tf_op_files.txt
@@ -28,6 +28,7 @@ tensorflow/core/kernels/split_op.cc
tensorflow/core/kernels/split_v_op.cc
tensorflow/core/kernels/split_lib_cpu.cc
tensorflow/core/kernels/sparse_to_dense_op.cc
+tensorflow/core/kernels/sparse_matmul_op.cc
tensorflow/core/kernels/softsign_op.cc
tensorflow/core/kernels/softplus_op.cc
tensorflow/core/kernels/softmax_op.cc
diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py
index 6ceeacbc72..7ac337732a 100644
--- a/tensorflow/contrib/metrics/python/ops/metric_ops.py
+++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py
@@ -167,10 +167,10 @@ def streaming_true_positives(predictions, labels, weights=None,
If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
Args:
- predictions: The predicted values, a `bool` `Tensor` of arbitrary
- dimensions.
- labels: The ground truth values, a `bool` `Tensor` whose dimensions must
- match `predictions`.
+ predictions: The predicted values, a `Tensor` of arbitrary dimensions. Will
+ be cast to `bool`.
+ labels: The ground truth values, a `Tensor` whose dimensions must match
+ `predictions`. Will be cast to `bool`.
weights: Optional `Tensor` whose rank is either 0, or the same rank as
`labels`, and must be broadcastable to `labels` (i.e., all dimensions
must be either `1`, or the same as the corresponding `labels`
@@ -206,10 +206,10 @@ def streaming_true_negatives(predictions, labels, weights=None,
If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
Args:
- predictions: The predicted values, a `bool` `Tensor` of arbitrary
- dimensions.
- labels: The ground truth values, a `bool` `Tensor` whose dimensions must
- match `predictions`.
+ predictions: The predicted values, a `Tensor` of arbitrary dimensions. Will
+ be cast to `bool`.
+ labels: The ground truth values, a `Tensor` whose dimensions must match
+ `predictions`. Will be cast to `bool`.
weights: Optional `Tensor` whose rank is either 0, or the same rank as
`labels`, and must be broadcastable to `labels` (i.e., all dimensions
must be either `1`, or the same as the corresponding `labels`
@@ -233,11 +233,11 @@ def streaming_true_negatives(predictions, labels, weights=None,
with variable_scope.variable_scope(
name, 'true_negatives', (predictions, labels, weights)):
- predictions = ops.convert_to_tensor(predictions)
- labels = ops.convert_to_tensor(labels)
+ predictions = math_ops.cast(predictions, dtype=dtypes.bool)
+ labels = math_ops.cast(labels, dtype=dtypes.bool)
predictions.get_shape().assert_is_compatible_with(labels.get_shape())
- is_true_negative = math_ops.logical_and(math_ops.equal(labels, 0),
- math_ops.equal(predictions, 0))
+ is_true_negative = math_ops.logical_and(math_ops.equal(labels, False),
+ math_ops.equal(predictions, False))
return _count_condition(is_true_negative, weights, metrics_collections,
updates_collections)
@@ -251,10 +251,10 @@ def streaming_false_positives(predictions, labels, weights=None,
If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
Args:
- predictions: The predicted values, a `bool` `Tensor` of arbitrary
- dimensions.
- labels: The ground truth values, a `bool` `Tensor` whose dimensions must
- match `predictions`.
+ predictions: The predicted values, a `Tensor` of arbitrary dimensions. Will
+ be cast to `bool`.
+ labels: The ground truth values, a `Tensor` whose dimensions must match
+ `predictions`. Will be cast to `bool`.
weights: Optional `Tensor` whose rank is either 0, or the same rank as
`labels`, and must be broadcastable to `labels` (i.e., all dimensions
must be either `1`, or the same as the corresponding `labels`
@@ -290,10 +290,10 @@ def streaming_false_negatives(predictions, labels, weights=None,
If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
Args:
- predictions: The predicted values, a `bool` `Tensor` of arbitrary
- dimensions.
- labels: The ground truth values, a `bool` `Tensor` whose dimensions must
- match `predictions`.
+ predictions: The predicted values, a `Tensor` of arbitrary dimensions. Will
+ be cast to `bool`.
+ labels: The ground truth values, a `Tensor` whose dimensions must match
+ `predictions`. Will be cast to `bool`.
weights: Optional `Tensor` whose rank is either 0, or the same rank as
`labels`, and must be broadcastable to `labels` (i.e., all dimensions
must be either `1`, or the same as the corresponding `labels`
diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py
index 3e2e408e6f..4fb244e3d4 100644
--- a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py
+++ b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py
@@ -663,35 +663,41 @@ class StreamingTruePositivesTest(test.TestCase):
_assert_local_variables(self, ('true_positives/count:0',))
def testUnweighted(self):
- predictions = constant_op.constant(((1, 0, 1, 0),
- (0, 1, 1, 1),
- (0, 0, 0, 0)))
- labels = constant_op.constant(((0, 1, 1, 0),
- (1, 0, 0, 0),
- (0, 0, 0, 0)))
- tp, tp_update_op = metrics.streaming_true_positives(predictions, labels)
+ for dtype in (dtypes_lib.bool, dtypes_lib.int32, dtypes_lib.float32):
+ predictions = math_ops.cast(constant_op.constant(
+ ((1, 0, 1, 0),
+ (0, 1, 1, 1),
+ (0, 0, 0, 0))), dtype=dtype)
+ labels = math_ops.cast(constant_op.constant(
+ ((0, 1, 1, 0),
+ (1, 0, 0, 0),
+ (0, 0, 0, 0))), dtype=dtype)
+ tp, tp_update_op = metrics.streaming_true_positives(predictions, labels)
- with self.test_session() as sess:
- sess.run(variables.local_variables_initializer())
- self.assertEqual(0, tp.eval())
- self.assertEqual(1, tp_update_op.eval())
- self.assertEqual(1, tp.eval())
+ with self.test_session() as sess:
+ sess.run(variables.local_variables_initializer())
+ self.assertEqual(0, tp.eval())
+ self.assertEqual(1, tp_update_op.eval())
+ self.assertEqual(1, tp.eval())
def testWeighted(self):
- predictions = constant_op.constant(((1, 0, 1, 0),
- (0, 1, 1, 1),
- (0, 0, 0, 0)))
- labels = constant_op.constant(((0, 1, 1, 0),
- (1, 0, 0, 0),
- (0, 0, 0, 0)))
- tp, tp_update_op = metrics.streaming_true_positives(
- predictions, labels, weights=37.0)
+ for dtype in (dtypes_lib.bool, dtypes_lib.int32, dtypes_lib.float32):
+ predictions = math_ops.cast(constant_op.constant(
+ ((1, 0, 1, 0),
+ (0, 1, 1, 1),
+ (0, 0, 0, 0))), dtype=dtype)
+ labels = math_ops.cast(constant_op.constant(
+ ((0, 1, 1, 0),
+ (1, 0, 0, 0),
+ (0, 0, 0, 0))), dtype=dtype)
+ tp, tp_update_op = metrics.streaming_true_positives(
+ predictions, labels, weights=37.0)
- with self.test_session() as sess:
- sess.run(variables.local_variables_initializer())
- self.assertEqual(0, tp.eval())
- self.assertEqual(37.0, tp_update_op.eval())
- self.assertEqual(37.0, tp.eval())
+ with self.test_session() as sess:
+ sess.run(variables.local_variables_initializer())
+ self.assertEqual(0, tp.eval())
+ self.assertEqual(37.0, tp_update_op.eval())
+ self.assertEqual(37.0, tp.eval())
class StreamingFalseNegativesTest(test.TestCase):
@@ -706,35 +712,41 @@ class StreamingFalseNegativesTest(test.TestCase):
_assert_local_variables(self, ('false_negatives/count:0',))
def testUnweighted(self):
- predictions = constant_op.constant(((1, 0, 1, 0),
- (0, 1, 1, 1),
- (0, 0, 0, 0)))
- labels = constant_op.constant(((0, 1, 1, 0),
- (1, 0, 0, 0),
- (0, 0, 0, 0)))
- fn, fn_update_op = metrics.streaming_false_negatives(predictions, labels)
+ for dtype in (dtypes_lib.bool, dtypes_lib.int32, dtypes_lib.float32):
+ predictions = math_ops.cast(constant_op.constant(
+ ((1, 0, 1, 0),
+ (0, 1, 1, 1),
+ (0, 0, 0, 0))), dtype=dtype)
+ labels = math_ops.cast(constant_op.constant(
+ ((0, 1, 1, 0),
+ (1, 0, 0, 0),
+ (0, 0, 0, 0))), dtype=dtype)
+ fn, fn_update_op = metrics.streaming_false_negatives(predictions, labels)
- with self.test_session() as sess:
- sess.run(variables.local_variables_initializer())
- self.assertEqual(0, fn.eval())
- self.assertEqual(2, fn_update_op.eval())
- self.assertEqual(2, fn.eval())
+ with self.test_session() as sess:
+ sess.run(variables.local_variables_initializer())
+ self.assertEqual(0, fn.eval())
+ self.assertEqual(2, fn_update_op.eval())
+ self.assertEqual(2, fn.eval())
def testWeighted(self):
- predictions = constant_op.constant(((1, 0, 1, 0),
- (0, 1, 1, 1),
- (0, 0, 0, 0)))
- labels = constant_op.constant(((0, 1, 1, 0),
- (1, 0, 0, 0),
- (0, 0, 0, 0)))
- fn, fn_update_op = metrics.streaming_false_negatives(
- predictions, labels, weights=((3.0,), (5.0,), (7.0,)))
+ for dtype in (dtypes_lib.bool, dtypes_lib.int32, dtypes_lib.float32):
+ predictions = math_ops.cast(constant_op.constant(
+ ((1, 0, 1, 0),
+ (0, 1, 1, 1),
+ (0, 0, 0, 0))), dtype=dtype)
+ labels = math_ops.cast(constant_op.constant(
+ ((0, 1, 1, 0),
+ (1, 0, 0, 0),
+ (0, 0, 0, 0))), dtype=dtype)
+ fn, fn_update_op = metrics.streaming_false_negatives(
+ predictions, labels, weights=((3.0,), (5.0,), (7.0,)))
- with self.test_session() as sess:
- sess.run(variables.local_variables_initializer())
- self.assertEqual(0, fn.eval())
- self.assertEqual(8.0, fn_update_op.eval())
- self.assertEqual(8.0, fn.eval())
+ with self.test_session() as sess:
+ sess.run(variables.local_variables_initializer())
+ self.assertEqual(0, fn.eval())
+ self.assertEqual(8.0, fn_update_op.eval())
+ self.assertEqual(8.0, fn.eval())
class StreamingFalsePositivesTest(test.TestCase):
@@ -749,39 +761,45 @@ class StreamingFalsePositivesTest(test.TestCase):
_assert_local_variables(self, ('false_positives/count:0',))
def testUnweighted(self):
- predictions = constant_op.constant(((1, 0, 1, 0),
- (0, 1, 1, 1),
- (0, 0, 0, 0)))
- labels = constant_op.constant(((0, 1, 1, 0),
- (1, 0, 0, 0),
- (0, 0, 0, 0)))
- fp, fp_update_op = metrics.streaming_false_positives(predictions, labels)
+ for dtype in (dtypes_lib.bool, dtypes_lib.int32, dtypes_lib.float32):
+ predictions = math_ops.cast(constant_op.constant(
+ ((1, 0, 1, 0),
+ (0, 1, 1, 1),
+ (0, 0, 0, 0))), dtype=dtype)
+ labels = math_ops.cast(constant_op.constant(
+ ((0, 1, 1, 0),
+ (1, 0, 0, 0),
+ (0, 0, 0, 0))), dtype=dtype)
+ fp, fp_update_op = metrics.streaming_false_positives(predictions, labels)
- with self.test_session() as sess:
- sess.run(variables.local_variables_initializer())
- self.assertEqual(0, fp.eval())
- self.assertEqual(4, fp_update_op.eval())
- self.assertEqual(4, fp.eval())
+ with self.test_session() as sess:
+ sess.run(variables.local_variables_initializer())
+ self.assertEqual(0, fp.eval())
+ self.assertEqual(4, fp_update_op.eval())
+ self.assertEqual(4, fp.eval())
def testWeighted(self):
- predictions = constant_op.constant(((1, 0, 1, 0),
- (0, 1, 1, 1),
- (0, 0, 0, 0)))
- labels = constant_op.constant(((0, 1, 1, 0),
- (1, 0, 0, 0),
- (0, 0, 0, 0)))
- fp, fp_update_op = metrics.streaming_false_positives(
- predictions,
- labels,
- weights=((1.0, 2.0, 3.0, 5.0),
- (7.0, 11.0, 13.0, 17.0),
- (19.0, 23.0, 29.0, 31.0)))
+ for dtype in (dtypes_lib.bool, dtypes_lib.int32, dtypes_lib.float32):
+ predictions = math_ops.cast(constant_op.constant(
+ ((1, 0, 1, 0),
+ (0, 1, 1, 1),
+ (0, 0, 0, 0))), dtype=dtype)
+ labels = math_ops.cast(constant_op.constant(
+ ((0, 1, 1, 0),
+ (1, 0, 0, 0),
+ (0, 0, 0, 0))), dtype=dtype)
+ fp, fp_update_op = metrics.streaming_false_positives(
+ predictions,
+ labels,
+ weights=((1.0, 2.0, 3.0, 5.0),
+ (7.0, 11.0, 13.0, 17.0),
+ (19.0, 23.0, 29.0, 31.0)))
- with self.test_session() as sess:
- sess.run(variables.local_variables_initializer())
- self.assertEqual(0, fp.eval())
- self.assertEqual(42.0, fp_update_op.eval())
- self.assertEqual(42.0, fp.eval())
+ with self.test_session() as sess:
+ sess.run(variables.local_variables_initializer())
+ self.assertEqual(0, fp.eval())
+ self.assertEqual(42.0, fp_update_op.eval())
+ self.assertEqual(42.0, fp.eval())
class StreamingTrueNegativesTest(test.TestCase):
@@ -796,35 +814,41 @@ class StreamingTrueNegativesTest(test.TestCase):
_assert_local_variables(self, ('true_negatives/count:0',))
def testUnweighted(self):
- predictions = constant_op.constant(((1, 0, 1, 0),
- (0, 1, 1, 1),
- (0, 0, 0, 0)))
- labels = constant_op.constant(((0, 1, 1, 0),
- (1, 0, 0, 0),
- (0, 0, 0, 0)))
- tn, tn_update_op = metrics.streaming_true_negatives(predictions, labels)
+ for dtype in (dtypes_lib.bool, dtypes_lib.int32, dtypes_lib.float32):
+ predictions = math_ops.cast(constant_op.constant(
+ ((1, 0, 1, 0),
+ (0, 1, 1, 1),
+ (0, 0, 0, 0))), dtype=dtype)
+ labels = math_ops.cast(constant_op.constant(
+ ((0, 1, 1, 0),
+ (1, 0, 0, 0),
+ (0, 0, 0, 0))), dtype=dtype)
+ tn, tn_update_op = metrics.streaming_true_negatives(predictions, labels)
- with self.test_session() as sess:
- sess.run(variables.local_variables_initializer())
- self.assertEqual(0, tn.eval())
- self.assertEqual(5, tn_update_op.eval())
- self.assertEqual(5, tn.eval())
+ with self.test_session() as sess:
+ sess.run(variables.local_variables_initializer())
+ self.assertEqual(0, tn.eval())
+ self.assertEqual(5, tn_update_op.eval())
+ self.assertEqual(5, tn.eval())
def testWeighted(self):
- predictions = constant_op.constant(((1, 0, 1, 0),
- (0, 1, 1, 1),
- (0, 0, 0, 0)))
- labels = constant_op.constant(((0, 1, 1, 0),
- (1, 0, 0, 0),
- (0, 0, 0, 0)))
- tn, tn_update_op = metrics.streaming_true_negatives(
- predictions, labels, weights=((0.0, 2.0, 3.0, 5.0),))
+ for dtype in (dtypes_lib.bool, dtypes_lib.int32, dtypes_lib.float32):
+ predictions = math_ops.cast(constant_op.constant(
+ ((1, 0, 1, 0),
+ (0, 1, 1, 1),
+ (0, 0, 0, 0))), dtype=dtype)
+ labels = math_ops.cast(constant_op.constant(
+ ((0, 1, 1, 0),
+ (1, 0, 0, 0),
+ (0, 0, 0, 0))), dtype=dtype)
+ tn, tn_update_op = metrics.streaming_true_negatives(
+ predictions, labels, weights=((0.0, 2.0, 3.0, 5.0),))
- with self.test_session() as sess:
- sess.run(variables.local_variables_initializer())
- self.assertEqual(0, tn.eval())
- self.assertEqual(15.0, tn_update_op.eval())
- self.assertEqual(15.0, tn.eval())
+ with self.test_session() as sess:
+ sess.run(variables.local_variables_initializer())
+ self.assertEqual(0, tn.eval())
+ self.assertEqual(15.0, tn_update_op.eval())
+ self.assertEqual(15.0, tn.eval())
class StreamingTruePositivesAtThresholdsTest(test.TestCase):
diff --git a/tensorflow/contrib/seq2seq/BUILD b/tensorflow/contrib/seq2seq/BUILD
index 3c314e2f28..a739487ae3 100644
--- a/tensorflow/contrib/seq2seq/BUILD
+++ b/tensorflow/contrib/seq2seq/BUILD
@@ -72,6 +72,46 @@ cuda_py_test(
],
)
+cuda_py_test(
+ name = "sampling_decoder_test",
+ size = "medium",
+ srcs = ["python/kernel_tests/sampling_decoder_test.py"],
+ additional_deps = [
+ ":seq2seq_py",
+ "//tensorflow/contrib/layers:layers_py",
+ "//tensorflow/contrib/rnn:rnn_py",
+ "//tensorflow/python:array_ops",
+ "//tensorflow/python:client_testlib",
+ "//tensorflow/python:framework_for_generated_wrappers",
+ "//tensorflow/python:framework_test_lib",
+ "//tensorflow/python:init_ops",
+ "//tensorflow/python:platform_test",
+ "//tensorflow/python:rnn",
+ "//tensorflow/python:variable_scope",
+ "//tensorflow/python:variables",
+ ],
+)
+
+cuda_py_test(
+ name = "decoder_test",
+ size = "medium",
+ srcs = ["python/kernel_tests/decoder_test.py"],
+ additional_deps = [
+ ":seq2seq_py",
+ "//tensorflow/contrib/layers:layers_py",
+ "//tensorflow/contrib/rnn:rnn_py",
+ "//tensorflow/python:array_ops",
+ "//tensorflow/python:client_testlib",
+ "//tensorflow/python:framework_for_generated_wrappers",
+ "//tensorflow/python:framework_test_lib",
+ "//tensorflow/python:init_ops",
+ "//tensorflow/python:platform_test",
+ "//tensorflow/python:rnn",
+ "//tensorflow/python:variable_scope",
+ "//tensorflow/python:variables",
+ ],
+)
+
filegroup(
name = "all_files",
srcs = glob(
diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/decoder_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/decoder_test.py
new file mode 100644
index 0000000000..b3c6c593c5
--- /dev/null
+++ b/tensorflow/contrib/seq2seq/python/kernel_tests/decoder_test.py
@@ -0,0 +1,156 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for contrib.seq2seq.python.seq2seq.decoder."""
+# pylint: disable=unused-import,g-bad-import-order
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+# pylint: enable=unused-import
+
+import sys
+
+# TODO(jart): #6568 Remove this hack that makes dlopen() not crash.
+if hasattr(sys, "getdlopenflags") and hasattr(sys, "setdlopenflags"):
+ import ctypes # pylint: disable=g-import-not-at-top
+ sys.setdlopenflags(sys.getdlopenflags() | ctypes.RTLD_GLOBAL)
+
+# pylint: disable=g-import-not-at-top
+import numpy as np
+
+from tensorflow.contrib.rnn import core_rnn_cell
+from tensorflow.contrib.seq2seq.python.ops import decoder
+from tensorflow.contrib.seq2seq.python.ops import sampling_decoder
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import rnn
+from tensorflow.python.ops import variables
+from tensorflow.python.ops import variable_scope as vs
+from tensorflow.python.platform import test
+# pylint: enable=g-import-not-at-top
+
+
+class DynamicDecodeRNNTest(test.TestCase):
+
+ def _testDynamicDecodeRNN(self, time_major):
+
+ sequence_length = [3, 4, 3, 1, 0]
+ batch_size = 5
+ max_time = 8
+ input_depth = 7
+ cell_depth = 10
+ max_out = max(sequence_length)
+
+ with self.test_session() as sess:
+ if time_major:
+ inputs = np.random.randn(max_time, batch_size,
+ input_depth).astype(np.float32)
+ else:
+ inputs = np.random.randn(batch_size, max_time,
+ input_depth).astype(np.float32)
+ cell = core_rnn_cell.LSTMCell(cell_depth)
+ sampler = sampling_decoder.BasicTrainingSampler(
+ inputs, sequence_length, time_major=time_major)
+ my_decoder = sampling_decoder.BasicSamplingDecoder(
+ cell=cell,
+ sampler=sampler,
+ initial_state=cell.zero_state(
+ dtype=dtypes.float32, batch_size=batch_size))
+
+ final_outputs, final_state = decoder.dynamic_decode_rnn(
+ my_decoder, output_time_major=time_major)
+
+ def _t(shape):
+ if time_major:
+ return (shape[1], shape[0]) + shape[2:]
+ return shape
+
+ self.assertTrue(
+ isinstance(final_outputs, sampling_decoder.SamplingDecoderOutput))
+ self.assertTrue(isinstance(final_state, core_rnn_cell.LSTMStateTuple))
+
+ self.assertEqual(
+ _t((batch_size, None, cell_depth)),
+ tuple(final_outputs.rnn_output.get_shape().as_list()))
+ self.assertEqual(
+ _t((batch_size, None)),
+ tuple(final_outputs.sample_id.get_shape().as_list()))
+
+ sess.run(variables.global_variables_initializer())
+ sess_results = sess.run({
+ "final_outputs": final_outputs,
+ "final_state": final_state
+ })
+
+ self.assertEqual(
+ _t((batch_size, max_out, cell_depth)),
+ sess_results["final_outputs"].rnn_output.shape)
+ self.assertEqual(
+ _t((batch_size, max_out)),
+ sess_results["final_outputs"].sample_id.shape)
+
+ def testDynamicDecodeRNNBatchMajor(self):
+ self._testDynamicDecodeRNN(time_major=False)
+
+ def testDynamicDecodeRNNTimeMajor(self):
+ self._testDynamicDecodeRNN(time_major=True)
+
+ def testDynamicDecodeRNNWithBasicTrainingSamplerMatchesDynamicRNN(self):
+ sequence_length = [3, 4, 3, 1, 0]
+ batch_size = 5
+ max_time = 8
+ input_depth = 7
+ cell_depth = 10
+ max_out = max(sequence_length)
+
+ with self.test_session() as sess:
+ inputs = np.random.randn(batch_size, max_time,
+ input_depth).astype(np.float32)
+
+ cell = core_rnn_cell.LSTMCell(cell_depth)
+ zero_state = cell.zero_state(dtype=dtypes.float32, batch_size=batch_size)
+ sampler = sampling_decoder.BasicTrainingSampler(inputs, sequence_length)
+ my_decoder = sampling_decoder.BasicSamplingDecoder(
+ cell=cell, sampler=sampler, initial_state=zero_state)
+
+ # Match the variable scope of dynamic_rnn below so we end up
+ # using the same variables
+ with vs.variable_scope("rnn"):
+ final_decoder_outputs, final_decoder_state = decoder.dynamic_decode_rnn(
+ my_decoder)
+
+ with vs.variable_scope(vs.get_variable_scope(), reuse=True):
+ final_rnn_outputs, final_rnn_state = rnn.dynamic_rnn(
+ cell,
+ inputs,
+ sequence_length=sequence_length,
+ initial_state=zero_state)
+
+ sess.run(variables.global_variables_initializer())
+ sess_results = sess.run({
+ "final_decoder_outputs": final_decoder_outputs,
+ "final_decoder_state": final_decoder_state,
+ "final_rnn_outputs": final_rnn_outputs,
+ "final_rnn_state": final_rnn_state
+ })
+
+ # Decoder only runs out to max_out; ensure values are identical
+ # to dynamic_rnn, which also zeros out outputs and passes along state.
+ self.assertAllClose(sess_results["final_decoder_outputs"].rnn_output,
+ sess_results["final_rnn_outputs"][:, 0:max_out, :])
+ self.assertAllClose(sess_results["final_decoder_state"],
+ sess_results["final_rnn_state"])
+
+
+if __name__ == "__main__":
+ test.main()
diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/sampling_decoder_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/sampling_decoder_test.py
new file mode 100644
index 0000000000..ba945a0ecb
--- /dev/null
+++ b/tensorflow/contrib/seq2seq/python/kernel_tests/sampling_decoder_test.py
@@ -0,0 +1,109 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for contrib.seq2seq.python.seq2seq.sampling_decoder."""
+# pylint: disable=unused-import,g-bad-import-order
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+# pylint: enable=unused-import
+
+import sys
+
+# TODO(jart): #6568 Remove this hack that makes dlopen() not crash.
+if hasattr(sys, "getdlopenflags") and hasattr(sys, "setdlopenflags"):
+ import ctypes # pylint: disable=g-import-not-at-top
+ sys.setdlopenflags(sys.getdlopenflags() | ctypes.RTLD_GLOBAL)
+
+# pylint: disable=g-import-not-at-top
+import numpy as np
+
+from tensorflow.contrib.rnn import core_rnn_cell
+from tensorflow.contrib.seq2seq.python.ops import sampling_decoder
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+# pylint: enable=g-import-not-at-top
+
+
+class BasicSamplingDecoderTest(test.TestCase):
+
+ def testStepWithBasicTrainingSampler(self):
+ sequence_length = [3, 4, 3, 1, 0]
+ batch_size = 5
+ max_time = 8
+ input_depth = 7
+ cell_depth = 10
+
+ with self.test_session() as sess:
+ inputs = np.random.randn(batch_size, max_time,
+ input_depth).astype(np.float32)
+ cell = core_rnn_cell.LSTMCell(cell_depth)
+ sampler = sampling_decoder.BasicTrainingSampler(
+ inputs, sequence_length, time_major=False)
+ my_decoder = sampling_decoder.BasicSamplingDecoder(
+ cell=cell,
+ sampler=sampler,
+ initial_state=cell.zero_state(
+ dtype=dtypes.float32, batch_size=batch_size))
+ output_size = my_decoder.output_size
+ output_dtype = my_decoder.output_dtype
+ batch_size_t = my_decoder.batch_size
+ self.assertEqual(
+ sampling_decoder.SamplingDecoderOutput(cell_depth,
+ tensor_shape.TensorShape([])),
+ output_size)
+ self.assertEqual(
+ sampling_decoder.SamplingDecoderOutput(dtypes.float32, dtypes.int32),
+ output_dtype)
+
+ (first_finished, first_inputs, first_state) = my_decoder.initialize()
+ (step_outputs, step_state, step_next_inputs,
+ step_finished) = my_decoder.step(
+ constant_op.constant(0), first_inputs, first_state)
+
+ self.assertTrue(isinstance(first_state, core_rnn_cell.LSTMStateTuple))
+ self.assertTrue(isinstance(step_state, core_rnn_cell.LSTMStateTuple))
+ self.assertTrue(
+ isinstance(step_outputs, sampling_decoder.SamplingDecoderOutput))
+ self.assertEqual((batch_size, cell_depth), step_outputs[0].get_shape())
+ self.assertEqual((batch_size,), step_outputs[1].get_shape())
+ self.assertEqual((batch_size, cell_depth), first_state[0].get_shape())
+ self.assertEqual((batch_size, cell_depth), first_state[1].get_shape())
+ self.assertEqual((batch_size, cell_depth), step_state[0].get_shape())
+ self.assertEqual((batch_size, cell_depth), step_state[1].get_shape())
+
+ sess.run(variables.global_variables_initializer())
+ sess_results = sess.run({
+ "batch_size": batch_size_t,
+ "first_finished": first_finished,
+ "first_inputs": first_inputs,
+ "first_state": first_state,
+ "step_outputs": step_outputs,
+ "step_state": step_state,
+ "step_next_inputs": step_next_inputs,
+ "step_finished": step_finished
+ })
+
+ self.assertAllEqual([False, False, False, False, True],
+ sess_results["first_finished"])
+ self.assertAllEqual([False, False, False, True, True],
+ sess_results["step_finished"])
+ self.assertAllEqual([-1] * 5, sess_results["step_outputs"].sample_id)
+
+
+if __name__ == "__main__":
+ test.main()
diff --git a/tensorflow/contrib/seq2seq/python/ops/decoder.py b/tensorflow/contrib/seq2seq/python/ops/decoder.py
new file mode 100644
index 0000000000..3ab6cb0e8c
--- /dev/null
+++ b/tensorflow/contrib/seq2seq/python/ops/decoder.py
@@ -0,0 +1,237 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Seq2seq layer operations for use in neural networks.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import abc
+
+import six
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import tensor_array_ops
+from tensorflow.python.util import nest
+
+__all__ = ["Decoder", "dynamic_decode_rnn"]
+
+
+def _transpose_batch_time(x):
+ """Transpose the batch and time dimensions of a Tensor.
+
+ Retains as much of the static shape information as possible.
+
+ Args:
+ x: A tensor of rank 2 or higher.
+
+ Returns:
+ x transposed along the first two dimensions.
+
+ Raises:
+ ValueError: if `x` is rank 1 or lower.
+ """
+ x_static_shape = x.get_shape()
+ if x_static_shape.ndims is not None and x_static_shape.ndims < 2:
+ raise ValueError(
+ "Expected input tensor %s to have rank at least 2, but saw shape: %s" %
+ (x, x_static_shape))
+ x_rank = array_ops.rank(x)
+ x_t = array_ops.transpose(
+ x, array_ops.concat_v2(
+ ([1, 0], math_ops.range(2, x_rank)), axis=0))
+ x_t.set_shape(
+ tensor_shape.TensorShape([
+ x_static_shape[1].value, x_static_shape[0].value
+ ]).concatenate(x_static_shape[2:]))
+ return x_t
+
+
+@six.add_metaclass(abc.ABCMeta)
+class Decoder(object):
+ """An RNN Decoder abstract interface object."""
+
+ @property
+ def batch_size(self):
+ """The batch size of the inputs returned by `sample`."""
+ raise NotImplementedError
+
+ @property
+ def output_size(self):
+ """A (possibly nested tuple of...) integer[s] or `TensorShape` object[s]."""
+ raise NotImplementedError
+
+ @property
+ def output_dtype(self):
+ """A (possibly nested tuple of...) dtype[s]."""
+ raise NotImplementedError
+
+ @abc.abstractmethod
+ def initialize(self, name=None):
+ """Called before any decoding iterations.
+
+ Args:
+ name: Name scope for any created operations.
+
+ Returns:
+ `(finished, first_inputs, initial_state)`.
+ """
+ raise NotImplementedError
+
+ @abc.abstractmethod
+ def step(self, time, inputs, state):
+ """Called per step of decoding (but only once for dynamic decoding).
+
+ Args:
+ time: Scalar `int32` tensor.
+ inputs: Input (possibly nested tuple of) tensor[s] for this time step.
+ state: State (possibly nested tuple of) tensor[s] from previous time step.
+
+ Returns:
+ `(outputs, next_state, next_inputs, finished)`.
+ """
+ raise NotImplementedError
+
+
+def _create_zero_outputs(size, dtype, batch_size):
+ """Create a zero outputs Tensor structure."""
+ def _t(s):
+ return (s if isinstance(s, ops.Tensor) else constant_op.constant(
+ tensor_shape.TensorShape(s).as_list(),
+ dtype=dtypes.int32,
+ name="zero_suffix_shape"))
+
+ def _create(s, d):
+ return array_ops.zeros(
+ array_ops.concat(
+ ([batch_size], _t(s)), axis=0), dtype=d)
+
+ return nest.map_structure(_create, size, dtype)
+
+
+def dynamic_decode_rnn(decoder,
+ output_time_major=False,
+ parallel_iterations=32,
+ swap_memory=False):
+ """Perform dynamic decoding with `decoder`.
+
+ Args:
+ decoder: A `Decoder` instance.
+ output_time_major: Python boolean. Default: `False` (batch major). If
+ `True`, outputs are returned as time major tensors (this mode is faster).
+ Otherwise, outputs are returned as batch major tensors (this adds extra
+ time to the computation).
+ parallel_iterations: Argument passed to `tf.while_loop`.
+ swap_memory: Argument passed to `tf.while_loop`.
+
+ Returns:
+ `(final_outputs, final_state)`.
+
+ Raises:
+ TypeError: if `decoder` is not an instance of `Decoder`.
+ """
+ if not isinstance(decoder, Decoder):
+ raise TypeError("Expected decoder to be type Decoder, but saw: %s" %
+ type(decoder))
+
+ zero_outputs = _create_zero_outputs(decoder.output_size, decoder.output_dtype,
+ decoder.batch_size)
+
+ initial_finished, initial_inputs, initial_state = decoder.initialize()
+ initial_time = constant_op.constant(0, dtype=dtypes.int32)
+
+ def _shape(batch_size, from_shape):
+ if not isinstance(from_shape, tensor_shape.TensorShape):
+ return tensor_shape.TensorShape(None)
+ else:
+ batch_size = tensor_util.constant_value(
+ ops.convert_to_tensor(
+ batch_size, name="batch_size"))
+ return tensor_shape.TensorShape([batch_size]).concatenate(from_shape)
+
+ def _create_ta(s, d):
+ return tensor_array_ops.TensorArray(
+ dtype=d, size=0, dynamic_size=True,
+ element_shape=_shape(decoder.batch_size, s))
+
+ initial_outputs_ta = nest.map_structure(
+ _create_ta, decoder.output_size, decoder.output_dtype)
+
+ def condition(unused_time, unused_outputs_ta, unused_state, unused_inputs,
+ finished):
+ return math_ops.logical_not(math_ops.reduce_all(finished))
+
+ def body(time, outputs_ta, state, inputs, finished):
+ """Internal while_loop body.
+
+ Args:
+ time: scalar int32 tensor.
+ outputs_ta: structure of TensorArray.
+ state: (structure of) state tensors and TensorArrays.
+ inputs: (structure of) input tensors.
+ finished: 1-D bool tensor.
+
+ Returns:
+ `(time + 1, outputs_ta, next_state, next_inputs, next_finished)`.
+ """
+ (next_outputs, decoder_state, next_inputs, decoder_finished) = decoder.step(
+ time, inputs, state)
+ next_finished = math_ops.logical_or(decoder_finished, finished)
+
+ nest.assert_same_structure(state, decoder_state)
+ nest.assert_same_structure(outputs_ta, next_outputs)
+ nest.assert_same_structure(inputs, next_inputs)
+
+ # Zero out output values past finish
+ emit = nest.map_structure(
+ lambda out, zero: array_ops.where(finished, zero, out), next_outputs,
+ zero_outputs)
+
+ # Copy through states past finish
+ def _maybe_copy_state(new, cur):
+ return (new if isinstance(cur, tensor_array_ops.TensorArray) else
+ array_ops.where(finished, cur, new))
+
+ next_state = nest.map_structure(_maybe_copy_state, decoder_state, state)
+ outputs_ta = nest.map_structure(lambda ta, out: ta.write(time, out),
+ outputs_ta, emit)
+ return (time + 1, outputs_ta, next_state, next_inputs, next_finished)
+
+ res = control_flow_ops.while_loop(
+ condition,
+ body,
+ loop_vars=[
+ initial_time, initial_outputs_ta, initial_state, initial_inputs,
+ initial_finished
+ ],
+ parallel_iterations=parallel_iterations,
+ swap_memory=swap_memory)
+
+ final_outputs_ta = res[1]
+ final_state = res[2]
+
+ final_outputs = nest.map_structure(lambda ta: ta.stack(), final_outputs_ta)
+ if not output_time_major:
+ final_outputs = nest.map_structure(_transpose_batch_time, final_outputs)
+
+ return final_outputs, final_state
diff --git a/tensorflow/contrib/seq2seq/python/ops/sampling_decoder.py b/tensorflow/contrib/seq2seq/python/ops/sampling_decoder.py
new file mode 100644
index 0000000000..c4654e535d
--- /dev/null
+++ b/tensorflow/contrib/seq2seq/python/ops/sampling_decoder.py
@@ -0,0 +1,190 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""A class of Decoders that may sample to generate the next input.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import abc
+import collections
+
+import six
+
+from tensorflow.contrib.rnn import core_rnn_cell
+from tensorflow.contrib.seq2seq.python.ops import decoder
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import tensor_array_ops
+from tensorflow.python.util import nest
+
+__all__ = [
+ "Sampler", "SamplingDecoderOutput", "BasicSamplingDecoder",
+ "BasicTrainingSampler"
+]
+
+_transpose_batch_time = decoder._transpose_batch_time # pylint: disable=protected-access
+
+
+@six.add_metaclass(abc.ABCMeta)
+class Sampler(object):
+
+ @property
+ def batch_size(self):
+ pass
+
+ @abc.abstractmethod
+ def initialize(self):
+ pass
+
+ @abc.abstractmethod
+ def sample(self, time, outputs, state):
+ pass
+
+
+class SamplingDecoderOutput(
+ collections.namedtuple("SamplingDecoderOutput",
+ ("rnn_output", "sample_id"))):
+ pass
+
+
+class BasicSamplingDecoder(decoder.Decoder):
+ """Basic sampling decoder."""
+
+ def __init__(self, cell, sampler, initial_state):
+ """Initialize BasicSamplingDecoder.
+
+ Args:
+ cell: An `RNNCell` instance.
+ sampler: A `Sampler` instance.
+ initial_state: A (possibly nested tuple of...) tensors and TensorArrays.
+
+ Raises:
+ TypeError: if `cell` is not an instance of `RNNCell` or `sampler`
+ is not an instance of `Sampler`.
+ """
+ if not isinstance(cell, core_rnn_cell.RNNCell):
+ raise TypeError("cell must be an RNNCell, received: %s" % type(cell))
+ if not isinstance(sampler, Sampler):
+ raise TypeError("sampler must be a Sampler, received: %s" %
+ type(sampler))
+ self._cell = cell
+ self._sampler = sampler
+ self._initial_state = initial_state
+
+ @property
+ def batch_size(self):
+ return self._sampler.batch_size
+
+ @property
+ def output_size(self):
+ # Return the cell output and the id
+ return SamplingDecoderOutput(
+ rnn_output=self._cell.output_size,
+ sample_id=tensor_shape.TensorShape([]))
+
+ @property
+ def output_dtype(self):
+ # Assume the dtype of the cell is the output_size structure
+ # containing the input_state's first component's dtype.
+ # Return that structure and int32 (the id)
+ dtype = nest.flatten(self._initial_state)[0].dtype
+ return SamplingDecoderOutput(
+ nest.map_structure(lambda _: dtype, self._cell.output_size),
+ dtypes.int32)
+
+ def initialize(self, name=None):
+ return self._sampler.initialize() + (self._initial_state,)
+
+ def step(self, time, inputs, state):
+ """Perform a decoding step.
+
+ Args:
+ time: scalar `int32` tensor.
+ inputs: A (structure of) input tensors.
+ state: A (structure of) state tensors and TensorArrays.
+
+ Returns:
+ `(outputs, next_state, next_inputs, finished)`.
+ """
+ cell_outputs, next_state = self._cell(inputs, state)
+ (sample_id, finished, next_inputs) = self._sampler.sample(
+ time=time, outputs=cell_outputs, state=next_state)
+ outputs = SamplingDecoderOutput(cell_outputs, sample_id)
+ return (outputs, next_state, next_inputs, finished)
+
+
+class BasicTrainingSampler(Sampler):
+ """A (non-)sampler for use during training. Only reads inputs."""
+
+ def __init__(self, inputs, sequence_length, time_major=False):
+ """Initializer.
+
+ Args:
+ inputs: A (structure of) input tensors.
+ sequence_length: An int32 vector tensor.
+ time_major: Python bool.
+
+ Raises:
+ ValueError: if `sequence_length` is not a 1D tensor.
+ """
+ inputs = ops.convert_to_tensor(inputs, name="inputs")
+ if not time_major:
+ inputs = nest.map_structure(_transpose_batch_time, inputs)
+
+ def _unstack_ta(inp):
+ return tensor_array_ops.TensorArray(
+ dtype=inp.dtype, size=array_ops.shape(inp)[0],
+ element_shape=inp.get_shape()[1:]).unstack(inp)
+
+ self._input_tas = nest.map_structure(_unstack_ta, inputs)
+ sequence_length = ops.convert_to_tensor(
+ sequence_length, name="sequence_length")
+ if sequence_length.get_shape().ndims != 1:
+ raise ValueError(
+ "Expected sequence_length to be a vector, but received shape: %s" %
+ sequence_length.get_shape())
+ self._sequence_length = sequence_length
+ self._zero_inputs = nest.map_structure(
+ lambda inp: array_ops.zeros_like(inp[0, :]), inputs)
+ self._batch_size = array_ops.size(sequence_length)
+
+ @property
+ def batch_size(self):
+ return self._batch_size
+
+ def initialize(self):
+ finished = math_ops.equal(0, self._sequence_length)
+ all_finished = math_ops.reduce_all(finished)
+ next_inputs = control_flow_ops.cond(
+ all_finished, lambda: self._zero_inputs,
+ lambda: nest.map_structure(lambda inp: inp.read(0), self._input_tas))
+ return (finished, next_inputs)
+
+ def sample(self, time, **unused_kwargs):
+ next_time = time + 1
+ finished = (next_time >= self._sequence_length)
+ all_finished = math_ops.reduce_all(finished)
+ sample_id = array_ops.tile([constant_op.constant(-1)], [self._batch_size])
+ next_inputs = control_flow_ops.cond(
+ all_finished, lambda: self._zero_inputs,
+ lambda: nest.map_structure(lambda inp: inp.read(next_time), self._input_tas))
+ return (sample_id, finished, next_inputs)
diff --git a/tensorflow/contrib/tensor_forest/client/random_forest.py b/tensorflow/contrib/tensor_forest/client/random_forest.py
index 28d9b43bbd..420a6d3138 100644
--- a/tensorflow/contrib/tensor_forest/client/random_forest.py
+++ b/tensorflow/contrib/tensor_forest/client/random_forest.py
@@ -18,7 +18,6 @@ from __future__ import division
from __future__ import print_function
from tensorflow.contrib import framework as contrib_framework
-from tensorflow.contrib.framework.python.framework import experimental
from tensorflow.contrib.learn.python.learn import evaluable
from tensorflow.contrib.learn.python.learn import trainable
@@ -355,18 +354,15 @@ class TensorForestEstimator(evaluable.Evaluable, trainable.Trainable):
# pylint: enable=protected-access
return result
- @experimental
def export_savedmodel(self,
export_dir_base,
- input_fn,
+ serving_input_fn,
default_output_alternative_key=None,
assets_extra=None,
- as_text=False,
- exports_to_keep=None):
+ as_text=False):
return self._estimator.export_savedmodel(
export_dir_base,
- input_fn,
+ serving_input_fn,
default_output_alternative_key=default_output_alternative_key,
assets_extra=assets_extra,
- as_text=as_text,
- exports_to_keep=exports_to_keep)
+ as_text=as_text)
diff --git a/tensorflow/contrib/training/python/training/batch_sequences_with_states_test.py b/tensorflow/contrib/training/python/training/batch_sequences_with_states_test.py
index 9fd102d0f6..0f52c2128d 100644
--- a/tensorflow/contrib/training/python/training/batch_sequences_with_states_test.py
+++ b/tensorflow/contrib/training/python/training/batch_sequences_with_states_test.py
@@ -26,6 +26,8 @@ from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import errors_impl
from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import array_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import random_ops
from tensorflow.python.ops import string_ops
@@ -41,6 +43,31 @@ class BatchSequencesWithStatesTest(test.TestCase):
def setUp(self):
super(BatchSequencesWithStatesTest, self).setUp()
self.value_length = 4
+ ind1 = np.array([
+ [0, 0],
+ [1, 0], [1, 3], [1, 4],
+ [3, 2], [3, 3]])
+ val1 = np.array([0, 10, 13, 14, 32, 33])
+ shape1 = np.array([self.value_length, 6])
+ sp_tensor1 = sparse_tensor.SparseTensor(
+ array_ops.constant(ind1, dtypes.int64),
+ array_ops.constant(val1, dtypes.int64),
+ array_ops.constant(shape1, dtypes.int64))
+ ind2 = np.array([
+ [0, 0, 1],
+ [0, 1, 0],
+ [0, 1, 2],
+ [1, 0, 3],
+ [1, 1, 0],
+ [1, 1, 1],
+ [1, 1, 2],
+ [1, 2, 2]])
+ val2 = np.array([1, 10, 12, 103, 150, 149, 150, 122])
+ shape2 = np.array([self.value_length, 3, 4])
+ sp_tensor2 = sparse_tensor.SparseTensor(
+ array_ops.constant(ind2, dtypes.int64),
+ array_ops.constant(val2, dtypes.int64),
+ array_ops.constant(shape2, dtypes.int64))
self.batch_size = 2
self.key = string_ops.string_join([
"key_", string_ops.as_string(
@@ -48,8 +75,9 @@ class BatchSequencesWithStatesTest(test.TestCase):
])
self.sequences = {
"seq1": np.random.rand(self.value_length, 5),
- "seq2": np.random.rand(self.value_length, 4, 2)
- }
+ "seq2": np.random.rand(self.value_length, 4, 2),
+ "seq3": sp_tensor1,
+ "seq4": sp_tensor2}
self.context = {"context1": [3, 4]}
self.initial_states = {
"state1": np.random.rand(6, 7),
@@ -60,9 +88,12 @@ class BatchSequencesWithStatesTest(test.TestCase):
return set(
[s.decode("ascii").split(":")[0].encode("ascii") for s in key_value])
- def _testBasics(self, num_unroll, length, pad, expected_seq1_batch1,
- expected_seq2_batch1, expected_seq1_batch2,
- expected_seq2_batch2):
+ def _testBasics(self, num_unroll, length, pad,
+ expected_seq1_batch1, expected_seq2_batch1,
+ expected_seq1_batch2, expected_seq2_batch2,
+ expected_seq3_batch1, expected_seq3_batch2,
+ expected_seq4_batch1, expected_seq4_batch2):
+
with self.test_session() as sess:
next_batch = sqss.batch_sequences_with_states(
input_key=self.key,
@@ -99,12 +130,13 @@ class BatchSequencesWithStatesTest(test.TestCase):
threads = queue_runner_impl.start_queue_runners(coord=coord)
# Step 1
- (key_value, next_key_value, seq1_value, seq2_value, context1_value,
- state1_value, state2_value, length_value, _, _) = sess.run(
+ (key_value, next_key_value, seq1_value, seq2_value, seq3_value,
+ seq4_value, context1_value, state1_value, state2_value, length_value,
+ _, _) = sess.run(
(next_batch.key, next_batch.next_key, next_batch.sequences["seq1"],
- next_batch.sequences["seq2"], next_batch.context["context1"],
+ next_batch.sequences["seq2"], next_batch.sequences["seq3"],
+ next_batch.sequences["seq4"], next_batch.context["context1"],
state1, state2, next_batch.length, state1_update, state2_update))
-
expected_first_keys = set([b"00000_of_00002"])
expected_second_keys = set([b"00001_of_00002"])
expected_final_keys = set([b"STOP"])
@@ -116,6 +148,14 @@ class BatchSequencesWithStatesTest(test.TestCase):
context1_value)
self.assertAllEqual(expected_seq1_batch1, seq1_value)
self.assertAllEqual(expected_seq2_batch1, seq2_value)
+ self.assertAllEqual(expected_seq3_batch1.indices, seq3_value.indices)
+ self.assertAllEqual(expected_seq3_batch1.values, seq3_value.values)
+ self.assertAllEqual(expected_seq3_batch1.dense_shape,
+ seq3_value.dense_shape)
+ self.assertAllEqual(expected_seq4_batch1.indices, seq4_value.indices)
+ self.assertAllEqual(expected_seq4_batch1.values, seq4_value.values)
+ self.assertAllEqual(expected_seq4_batch1.dense_shape,
+ seq4_value.dense_shape)
self.assertAllEqual(
np.tile(self.initial_states["state1"], (self.batch_size, 1, 1)),
state1_value)
@@ -125,12 +165,13 @@ class BatchSequencesWithStatesTest(test.TestCase):
self.assertAllEqual(length_value, [num_unroll, num_unroll])
# Step 2
- (key_value, next_key_value, seq1_value, seq2_value, context1_value,
- state1_value, state2_value, length_value, _, _) = sess.run(
+ (key_value, next_key_value, seq1_value, seq2_value, seq3_value,
+ seq4_value, context1_value, state1_value, state2_value, length_value,
+ _, _) = sess.run(
(next_batch.key, next_batch.next_key, next_batch.sequences["seq1"],
- next_batch.sequences["seq2"], next_batch.context["context1"],
- next_batch.state("state1"), next_batch.state("state2"),
- next_batch.length, state1_update, state2_update))
+ next_batch.sequences["seq2"], next_batch.sequences["seq3"],
+ next_batch.sequences["seq4"], next_batch.context["context1"],
+ state1, state2, next_batch.length, state1_update, state2_update))
self.assertEqual(expected_second_keys, self._prefix(key_value))
self.assertEqual(expected_final_keys, self._prefix(next_key_value))
@@ -139,6 +180,14 @@ class BatchSequencesWithStatesTest(test.TestCase):
context1_value)
self.assertAllEqual(expected_seq1_batch2, seq1_value)
self.assertAllEqual(expected_seq2_batch2, seq2_value)
+ self.assertAllEqual(expected_seq3_batch2.indices, seq3_value.indices)
+ self.assertAllEqual(expected_seq3_batch2.values, seq3_value.values)
+ self.assertAllEqual(expected_seq3_batch2.dense_shape,
+ seq3_value.dense_shape)
+ self.assertAllEqual(expected_seq4_batch2.indices, seq4_value.indices)
+ self.assertAllEqual(expected_seq4_batch2.values, seq4_value.values)
+ self.assertAllEqual(expected_seq4_batch2.dense_shape,
+ seq4_value.dense_shape)
self.assertAllEqual(1 + np.tile(self.initial_states["state1"],
(self.batch_size, 1, 1)), state1_value)
self.assertAllEqual(-1 + np.tile(self.initial_states["state2"],
@@ -148,7 +197,7 @@ class BatchSequencesWithStatesTest(test.TestCase):
coord.request_stop()
coord.join(threads, stop_grace_period_secs=2)
- def testBasicPadding(self):
+ def _testBasicPadding(self, pad):
num_unroll = 2 # Divisor of value_length - so no padding necessary.
expected_seq1_batch1 = np.tile(
self.sequences["seq1"][np.newaxis, 0:num_unroll, :],
@@ -162,37 +211,74 @@ class BatchSequencesWithStatesTest(test.TestCase):
expected_seq2_batch2 = np.tile(
self.sequences["seq2"][np.newaxis, num_unroll:self.value_length, :, :],
(self.batch_size, 1, 1, 1))
+ ind1_1 = np.array([
+ # batch entry 1
+ [0, 0, 0],
+ [0, 1, 0], [0, 1, 3], [0, 1, 4],
+ # batch entry 2
+ [1, 0, 0],
+ [1, 1, 0], [1, 1, 3], [1, 1, 4]])
+ ind1_2 = np.array([
+ # batch entry 1
+ [0, 1, 2], [0, 1, 3],
+ # batch entry 2
+ [1, 1, 2], [1, 1, 3]])
+ val1_1 = np.array([0, 10, 13, 14,
+ 0, 10, 13, 14])
+ val1_2 = np.array([32, 33,
+ 32, 33])
+ shape1 = np.array([self.batch_size, num_unroll, 6])
+
+ # For sp_tensor2 all values fall into the first segment.
+ ind2_1 = np.array([
+ # batch entry 1
+ [0, 0, 0, 1],
+ [0, 0, 1, 0],
+ [0, 0, 1, 2],
+ [0, 1, 0, 3],
+ [0, 1, 1, 0],
+ [0, 1, 1, 1],
+ [0, 1, 1, 2],
+ [0, 1, 2, 2],
+ # batch entry 2
+ [1, 0, 0, 1],
+ [1, 0, 1, 0],
+ [1, 0, 1, 2],
+ [1, 1, 0, 3],
+ [1, 1, 1, 0],
+ [1, 1, 1, 1],
+ [1, 1, 1, 2],
+ [1, 1, 2, 2],
+ ])
+ val2_1 = np.array([1, 10, 12, 103, 150, 149, 150, 122,
+ 1, 10, 12, 103, 150, 149, 150, 122])
+ shape2 = np.array([self.batch_size, num_unroll, 3, 4])
+ expected_seq3_batch1 = sparse_tensor.SparseTensorValue(
+ ind1_1, val1_1, shape1)
+ expected_seq3_batch2 = sparse_tensor.SparseTensorValue(
+ ind1_2, val1_2, shape1)
+ expected_seq4_batch1 = sparse_tensor.SparseTensorValue(
+ ind2_1, val2_1, shape2)
+ expected_seq4_batch2 = sparse_tensor.SparseTensorValue(
+ np.empty(shape=[0, 4], dtype=np.int64), np.array([]), shape2)
self._testBasics(
num_unroll=num_unroll,
length=3,
- pad=True,
+ pad=pad,
expected_seq1_batch1=expected_seq1_batch1,
- expected_seq2_batch1=expected_seq2_batch1,
expected_seq1_batch2=expected_seq1_batch2,
- expected_seq2_batch2=expected_seq2_batch2)
-
- def testBasics(self):
- num_unroll = 2 # Divisor of value_length - so no padding necessary.
- expected_seq1_batch1 = np.tile(
- self.sequences["seq1"][np.newaxis, 0:num_unroll, :],
- (self.batch_size, 1, 1))
- expected_seq2_batch1 = np.tile(
- self.sequences["seq2"][np.newaxis, 0:num_unroll, :, :],
- (self.batch_size, 1, 1, 1))
- expected_seq1_batch2 = np.tile(
- self.sequences["seq1"][np.newaxis, num_unroll:self.value_length, :],
- (self.batch_size, 1, 1))
- expected_seq2_batch2 = np.tile(
- self.sequences["seq2"][np.newaxis, num_unroll:self.value_length, :, :],
- (self.batch_size, 1, 1, 1))
- self._testBasics(
- num_unroll=num_unroll,
- length=3,
- pad=False,
- expected_seq1_batch1=expected_seq1_batch1,
expected_seq2_batch1=expected_seq2_batch1,
- expected_seq1_batch2=expected_seq1_batch2,
- expected_seq2_batch2=expected_seq2_batch2)
+ expected_seq2_batch2=expected_seq2_batch2,
+ expected_seq3_batch1=expected_seq3_batch1,
+ expected_seq3_batch2=expected_seq3_batch2,
+ expected_seq4_batch1=expected_seq4_batch1,
+ expected_seq4_batch2=expected_seq4_batch2)
+
+ def testBasicPadding(self):
+ self._testBasicPadding(pad=True)
+
+ def testBasicNoPadding(self):
+ self._testBasicPadding(pad=False)
def testNotAMultiple(self):
num_unroll = 3 # Not a divisor of value_length -
@@ -254,14 +340,69 @@ class BatchSequencesWithStatesTest(test.TestCase):
expected_seq2_batch2 = np.concatenate(
[padded_seq2] * self.batch_size, axis=0)
+ ind1_1 = np.array([
+ # batch entry 1
+ [0, 0, 0],
+ [0, 1, 0], [0, 1, 3], [0, 1, 4],
+ # batch entry 2
+ [1, 0, 0],
+ [1, 1, 0], [1, 1, 3], [1, 1, 4]])
+ ind1_2 = np.array([
+ # batch entry 1
+ [0, 0, 2], [0, 0, 3],
+ # batch entry 2
+ [1, 0, 2], [1, 0, 3]])
+ val1_1 = np.array([0, 10, 13, 14,
+ 0, 10, 13, 14])
+ val1_2 = np.array([32, 33,
+ 32, 33])
+ shape1 = np.array([self.batch_size, num_unroll, 6])
+
+ # For sp_tensor2 all values fall into the first segment.
+ ind2_1 = np.array([
+ # batch entry 1
+ [0, 0, 0, 1],
+ [0, 0, 1, 0],
+ [0, 0, 1, 2],
+ [0, 1, 0, 3],
+ [0, 1, 1, 0],
+ [0, 1, 1, 1],
+ [0, 1, 1, 2],
+ [0, 1, 2, 2],
+ # batch entry 2
+ [1, 0, 0, 1],
+ [1, 0, 1, 0],
+ [1, 0, 1, 2],
+ [1, 1, 0, 3],
+ [1, 1, 1, 0],
+ [1, 1, 1, 1],
+ [1, 1, 1, 2],
+ [1, 1, 2, 2],
+ ])
+ val2_1 = np.array([1, 10, 12, 103, 150, 149, 150, 122,
+ 1, 10, 12, 103, 150, 149, 150, 122])
+ shape2 = np.array([self.batch_size, num_unroll, 3, 4])
+ expected_seq3_batch1 = sparse_tensor.SparseTensorValue(
+ ind1_1, val1_1, shape1)
+ expected_seq3_batch2 = sparse_tensor.SparseTensorValue(
+ ind1_2, val1_2, shape1)
+ expected_seq4_batch1 = sparse_tensor.SparseTensorValue(
+ ind2_1, val2_1, shape2)
+ expected_seq4_batch2 = sparse_tensor.SparseTensorValue(
+ np.empty(shape=[0, 4], dtype=np.int64), np.array([]), shape2)
+
self._testBasics(
num_unroll=num_unroll,
length=None,
pad=True,
expected_seq1_batch1=expected_seq1_batch1,
- expected_seq2_batch1=expected_seq2_batch1,
expected_seq1_batch2=expected_seq1_batch2,
- expected_seq2_batch2=expected_seq2_batch2)
+ expected_seq2_batch1=expected_seq2_batch1,
+ expected_seq2_batch2=expected_seq2_batch2,
+ expected_seq3_batch1=expected_seq3_batch1,
+ expected_seq3_batch2=expected_seq3_batch2,
+ expected_seq4_batch1=expected_seq4_batch1,
+ expected_seq4_batch2=expected_seq4_batch2)
class PaddingTest(test.TestCase):
@@ -270,8 +411,8 @@ class PaddingTest(test.TestCase):
with ops.Graph().as_default() as g, self.test_session(graph=g):
sequences = {
"key_1": constant_op.constant([1, 2, 3]), # length 3
- "key_2": constant_op.constant([1.5, 2.5])
- } # length 2
+ "key_2": constant_op.constant([1.5, 2.5]) # length 2
+ }
_, padded_seq = sqss._padding(sequences, 2)
with self.assertRaisesOpError(
@@ -300,5 +441,63 @@ class PaddingTest(test.TestCase):
math_ops.reduce_all(math_ops.equal(val, padded_seq[key])).eval())
+class SparseTensorReConstructionTest(test.TestCase):
+
+ def testAddManyTakeManyRoundTripBatched(self):
+ with self.test_session(use_gpu=False) as sess:
+ # N == 4 because shape_value == [4, 5]
+ indices_value_1 = np.array([[0, 0], [0, 1], [2, 0]], dtype=np.int64)
+ values_value_1 = np.array([b"a", b"b", b"c"])
+ shape_value_1 = np.array([4, 5], dtype=np.int64)
+ sparse_tensor_1 = sparse_tensor.SparseTensor(
+ array_ops.placeholder(dtypes.int64),
+ array_ops.placeholder(dtypes.string),
+ array_ops.placeholder(dtypes.int64))
+ dict1 = {"key": sparse_tensor_1}
+ indices_value_2 = np.array([[1, 4], [2, 3]], dtype=np.int64)
+ values_value_2 = np.array([b"d", b"e"])
+ shape_value_2 = np.array([4, 5], dtype=np.int64)
+ sparse_tensor_2 = sparse_tensor.SparseTensor(
+ array_ops.placeholder(dtypes.int64),
+ array_ops.placeholder(dtypes.string),
+ array_ops.placeholder(dtypes.int64))
+ dict2 = {"key": sparse_tensor_2}
+
+ input_seq1, keys1, tensor_list1 = sqss._deconstruct_sparse_tensor_seq(
+ dict1, shared_name="a")
+ handles_1 = input_seq1["key"]
+ input_seq2, _, _ = sqss._deconstruct_sparse_tensor_seq(
+ dict2, shared_name="a")
+ handles_2 = input_seq2["key"]
+
+ combined_handles = array_ops.stack(
+ [handles_1[1], handles_1[2], handles_1[3],
+ handles_2[1], handles_2[2], handles_2[3]])
+ batched_dict = {"key": combined_handles}
+ sqss._reconstruct_sparse_tensor_seq(
+ batched_dict,
+ keys1,
+ tensor_list1,
+ batch_size=2,
+ num_unroll=3)
+
+ roundtrip_value, = sess.run(
+ [batched_dict["key"]],
+ feed_dict={sparse_tensor_1.indices: indices_value_1,
+ sparse_tensor_1.values: values_value_1,
+ sparse_tensor_1.dense_shape: shape_value_1,
+ sparse_tensor_2.indices: indices_value_2,
+ sparse_tensor_2.values: values_value_2,
+ sparse_tensor_2.dense_shape: shape_value_2})
+
+ self.assertAllEqual(roundtrip_value.indices,
+ np.array([[0, 1, 0], [1, 0, 4], [1, 1, 3]],
+ dtype=np.int64))
+ self.assertAllEqual(roundtrip_value.values,
+ np.array([b"c", b"d", b"e"]))
+ self.assertAllEqual(roundtrip_value.dense_shape,
+ np.array([2, 3, 5], dtype=np.int64))
+
+
if __name__ == "__main__":
test.main()
diff --git a/tensorflow/contrib/training/python/training/sequence_queueing_state_saver.py b/tensorflow/contrib/training/python/training/sequence_queueing_state_saver.py
index a4f753acca..19e0809be8 100644
--- a/tensorflow/contrib/training/python/training/sequence_queueing_state_saver.py
+++ b/tensorflow/contrib/training/python/training/sequence_queueing_state_saver.py
@@ -29,16 +29,23 @@ import six
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import errors
from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
from tensorflow.python.framework import tensor_shape
from tensorflow.python.framework import tensor_util
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import data_flow_ops
from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import sparse_ops
from tensorflow.python.ops import string_ops
from tensorflow.python.summary import summary
from tensorflow.python.training import queue_runner
+# pylint: disable=protected-access
+_restore_sparse = sparse_ops._take_many_sparse_from_tensors_map
+_store_sparse = sparse_ops._add_many_sparse_to_tensors_map
+# pylint: enable=protected-access
+
class _SequenceInputWrapper(object):
"""A wrapper object for storing sequence-related input.
@@ -1418,23 +1425,60 @@ def batch_sequences_with_states(input_key,
elif input_sequences:
# Assert that value_length is a multiple of num_unroll.
for key, value in input_sequences.items():
- value_length = array_ops.shape(value)[0]
- with ops.control_dependencies([
- control_flow_ops.Assert(
- math_ops.logical_and(
- math_ops.equal(value_length % num_unroll, 0),
- math_ops.not_equal(value_length, 0)),
- [
- string_ops.string_join([
- "Tensor %s first dimension should be a multiple of: " %
- key, string_ops.as_string(num_unroll),
- ", but saw value: ", string_ops.as_string(value_length),
- ". Consider setting pad=True."
- ])
- ])
- ]):
- input_sequences[key] = array_ops.identity(
- value, name="multiple_of_checked")
+ if (isinstance(value, sparse_tensor.SparseTensor) or
+ isinstance(value, sparse_tensor.SparseTensorValue)):
+ value_length = value.dense_shape[0]
+ with ops.control_dependencies([
+ control_flow_ops.Assert(
+ math_ops.logical_and(
+ math_ops.equal(value_length % num_unroll, 0),
+ math_ops.not_equal(value_length, 0)),
+ [
+ string_ops.string_join([
+ "SparseTensor %s first dimension should be a "
+ "multiple of: " % key,
+ string_ops.as_string(num_unroll),
+ ", but saw value: ",
+ string_ops.as_string(value_length),
+ ". Consider setting pad=True."])])]):
+ input_sequences[key] = sparse_tensor.SparseTensor(
+ indices=value.indices,
+ values=array_ops.identity(
+ value.values, name="multiple_of_checked"),
+ dense_shape=value.dense_shape)
+ else:
+ if not isinstance(value, ops.Tensor):
+ try:
+ value = ops.convert_to_tensor(value)
+ except TypeError:
+ raise TypeError(
+ "Unsupported input_sequences expected Tensor or SparseTensor "
+ "values, got: %s for key %s" % (str(type(value)), key))
+ value_length = array_ops.shape(value)[0]
+ with ops.control_dependencies([
+ control_flow_ops.Assert(
+ math_ops.logical_and(
+ math_ops.equal(value_length % num_unroll, 0),
+ math_ops.not_equal(value_length, 0)),
+ [
+ string_ops.string_join([
+ "Tensor %s first dimension should be a multiple "
+ "of: " % key,
+ string_ops.as_string(num_unroll),
+ ", but saw value: ",
+ string_ops.as_string(value_length),
+ ". Consider setting pad=True."
+ ])
+ ])
+ ]):
+ input_sequences[key] = array_ops.identity(
+ value, name="multiple_of_checked")
+
+ # Deconstruct SparseTensors in sequence into a dense Tensor before inputting
+ # to SQSS.
+ (transformed_input_seq,
+ sparse_tensor_keys,
+ tensor_list) = _deconstruct_sparse_tensor_seq(input_sequences)
# setup stateful queue reader
stateful_reader = SequenceQueueingStateSaver(
@@ -1442,7 +1486,7 @@ def batch_sequences_with_states(input_key,
num_unroll,
input_length=input_length,
input_key=input_key,
- input_sequences=input_sequences,
+ input_sequences=transformed_input_seq,
input_context=input_context,
initial_states=initial_states,
capacity=capacity,
@@ -1457,7 +1501,16 @@ def batch_sequences_with_states(input_key,
queue_closed_exception_types=(errors.OutOfRangeError,
errors.CancelledError))
queue_runner.add_queue_runner(q_runner)
- return stateful_reader.next_batch
+ batch = stateful_reader.next_batch
+
+ # Reconstruct SparseTensors in sequence.
+ _reconstruct_sparse_tensor_seq(
+ batch.sequences,
+ sparse_tensor_keys,
+ tensor_list,
+ batch_size,
+ num_unroll)
+ return batch
def _padding(sequences, num_unroll):
@@ -1489,38 +1542,187 @@ def _padding(sequences, num_unroll):
sequences_dict = {}
for key, value in sequences.items():
- sequences_dict[key] = ops.convert_to_tensor(value)
-
- lengths = [array_ops.shape(value)[0] for value in sequences_dict.values()]
- length = lengths[0]
- all_lengths_equal = [
- control_flow_ops.Assert(
- math_ops.equal(l, length), [
- string_ops.string_join([
- "All sequence lengths must match, but received lengths: ",
- string_ops.as_string(lengths)
- ])
- ]) for l in lengths
- ]
+ if not (isinstance(value, sparse_tensor.SparseTensor) or
+ isinstance(value, sparse_tensor.SparseTensorValue)):
+ sequences_dict[key] = ops.convert_to_tensor(value)
+ else:
+ sequences_dict[key] = value
+
+ lengths = [array_ops.shape(value)[0] for value in sequences_dict.values()
+ if isinstance(value, ops.Tensor)]
+ if lengths:
+ length = lengths[0]
+ all_lengths_equal = [
+ control_flow_ops.Assert(
+ math_ops.equal(l, length), [string_ops.string_join(
+ ["All sequence lengths must match, but received lengths: ",
+ string_ops.as_string(lengths)])])
+ for l in lengths]
+ length = control_flow_ops.with_dependencies(all_lengths_equal, length)
+ else: # Only have SparseTensors
+ sparse_lengths = [value.dense_shape[0] for value in sequences_dict.values()
+ if isinstance(value, sparse_tensor.SparseTensor)]
+ length = math_ops.maximum(sparse_lengths)
- length = control_flow_ops.with_dependencies(all_lengths_equal, length)
unroll = array_ops.constant(num_unroll)
padded_length = length + ((unroll - (length % unroll)) % unroll)
padded_sequences = {}
for key, value in sequences_dict.items():
- # 1. create shape of paddings
- # first dimension of value will be increased by num_paddings to
- # padded_length
- num_paddings = [padded_length - array_ops.shape(value)[0]]
- # the shape of the paddings that we concat with the original value will be
- # [num_paddings, tf.shape(value)[1], tf.shape(value)[2], ...,
- # tf.shape(value)[tf.rank(value) - 1])]
- padding_shape = array_ops.concat((num_paddings, array_ops.shape(value)[1:]),
- 0)
- # 2. fill padding shape with dummies
- dummy = array_ops.constant(
- "" if value.dtype == dtypes.string else 0, dtype=value.dtype)
- paddings = array_ops.fill(dims=padding_shape, value=dummy)
- # 3. concat values with paddings
- padded_sequences[key] = array_ops.concat([value, paddings], 0)
+ if isinstance(value, ops.Tensor):
+ # 1. create shape of paddings
+ # first dimension of value will be increased by num_paddings to
+ # padded_length
+ num_paddings = [padded_length - array_ops.shape(value)[0]]
+ # the shape of the paddings that we concat with the original value will be
+ # [num_paddings, tf.shape(value)[1], tf.shape(value)[2], ...,
+ # tf.shape(value)[tf.rank(value) - 1])]
+ padding_shape = array_ops.concat(
+ (num_paddings, array_ops.shape(value)[1:]), 0)
+ # 2. fill padding shape with dummies
+ dummy = array_ops.constant(
+ "" if value.dtype == dtypes.string else 0, dtype=value.dtype)
+ paddings = array_ops.fill(dims=padding_shape, value=dummy)
+ # 3. concat values with paddings
+ padded_sequences[key] = array_ops.concat([value, paddings], 0)
+ else:
+ padded_shape = array_ops.concat([[math_ops.to_int64(padded_length)],
+ value.dense_shape[1:]], 0)
+ padded_sequences[key] = sparse_tensor.SparseTensor(
+ indices=value.indices,
+ values=value.values,
+ dense_shape=padded_shape)
return length, padded_sequences
+
+
+def _deconstruct_sparse_tensor_seq(input_sequence, shared_name=None):
+ """Converts `SparseTensor` values into `Tensors` of IDs and meta data.
+
+ Given a dict of keys -> `Tensor` or `SparseTensor` transforms the
+ `SparseTensor` values into `Tensor` values of IDs by calling `_store_sparse`.
+ The IDs are pointers into and underlying `SparseTensorsMap` that is being
+ constructed. Additional meta data is returned in order to be able to
+ reconstruct `SparseTensor` values after batching and segmenting the IDs
+ `Tensor`.
+
+ Args:
+ input_sequence: dictionary with `Tensor` or `SparseTensor` values.
+ shared_name: The shared name for the underlying `SparseTensorsMap`
+ (optional, defaults to the name of the newly created op).
+ Returns:
+ A tuple `(sequence, sparse_tensor_keys, tensor_list)` where `sequence` is
+ dictionary with the same keys as `input_sequence` but only `Tensor` values,
+ `sparse_tensor_keys` is a list of the keys of the `SparseTensor` values that
+ were converted, and `tensor_list` is a list of the same length with
+ `Tensor` objects.
+ """
+ sparse_tensor_keys = [
+ k for k in sorted(input_sequence.keys())
+ if isinstance(input_sequence[k], sparse_tensor.SparseTensor)]
+ if not sparse_tensor_keys:
+ return input_sequence, None, sparse_tensor_keys
+ sparse_tensor_list = [input_sequence[k] for k in sparse_tensor_keys]
+ tensor_list = [_store_sparse(sp_tensor, shared_name=shared_name)
+ for sp_tensor in sparse_tensor_list]
+ transformed_input_seq = dict(input_sequence)
+ tensor_op_list = []
+ for i, k in enumerate(sparse_tensor_keys):
+ transformed_input_seq[k] = tensor_list[i]
+ tensor_op_list += [tensor_list[i].op]
+ return transformed_input_seq, sparse_tensor_keys, tensor_op_list
+
+
+def _reconstruct_sparse_tensor_seq(sequence,
+ sparse_tensor_keys,
+ tensor_op_list,
+ batch_size,
+ num_unroll):
+ """Inverse of _deconstruct_sparse_tensor_seq.
+
+ Given a dict of keys -> `Tensor` reconstructs `SparseTensor` values for keys
+ in `sparse_tensor_keys`. Their `Tensor` values are assumed to be IDs into the
+ underlying `SparseTensorsMap`. The `dense_shape` of the `SparseTensor`s is
+ `[batch_size, num_unroll, d_0, d_1, ..., d_n]` when the original
+ `SparseTensor` that got deconstructed with `_deconstruct_sparse_tensor_seq`
+ has a `dense_shape` of `[None, d_0, d_1, ..., d_n]`.
+
+ Args:
+ sequence: dictionary with only `Tensor` values that is being updated.
+ sparse_tensor_keys: list of the keys present in `sequence` identifying
+ `SparseTensor` values that should be reconstructed.
+ tensor_op_list: list of the same length as `sparse_tensor_keys` with
+ `Tensor` objects.
+ batch_size: int or int32 scalar `Tensor`, how large minibatches should
+ be.
+ num_unroll: Python integer, how many time steps were unrolled at a time.
+ """
+ def _flatten_tensor(tensor):
+ """Flattens `Tensor` of `shape [batch_size, num_unroll]` into 1D `Tensor`.
+
+ The main use of this function is to work around the limitation of
+ `_restore_sparse` to only accept 1D handles.
+
+ Args:
+ tensor: 2D `Tensor` of `shape [batch_size, num_unroll]`
+ Returns:
+ 1D `Tensor`.
+ """
+ return array_ops.reshape(tensor, [-1])
+
+ def _unflatten_sparse_tensor(sp_tensor):
+ """Recreates `[batch_size, num_unroll]` dimensions in the `SparseTensor`.
+
+ Counter-part of `_flatten_tensor` which is called on the input of
+ `_restore_sparse` while this method is called on the output of it.
+ Together they work around the limitation of `_restore_sparse` to only
+ accept 1D handles.
+
+ The `indices` in `sp_tensor` is a 2D `Tensor` of `shape [N, ndims]`, where
+ `N` is the number of `values` and `ndims` is the number of dimension in its
+ dense counterpart. Among `ndims` the first entry corresponds to the batch
+ dimension `[0, num_unroll * batch_size)` from which we need to recreate the
+ 2 dimensions `batch_size` and `num_unroll`.
+
+ The reason this reconstruction works is because the output of
+ `_restore_sparse` despite being a `SparseTensor` is actually dense w.r.t.
+ that first entry.
+
+ Args:
+ sp_tensor: A SparseTensor.
+ Returns:
+ A SparseTensor with a +1 higher rank than the input.
+ """
+ idx_batch = math_ops.to_int64(math_ops.floor(s.indices[:, 0] / num_unroll))
+ idx_time = math_ops.mod(s.indices[:, 0], num_unroll)
+ indices = array_ops.concat_v2([array_ops.expand_dims(idx_batch, 1),
+ array_ops.expand_dims(idx_time, 1),
+ s.indices[:, 1:]], axis=1)
+ dense_shape = array_ops.concat_v2(
+ [[batch_size], [num_unroll], s.dense_shape[1:]], axis=0)
+ return sparse_tensor.SparseTensor(
+ indices=indices,
+ values=sp_tensor.values,
+ dense_shape=dense_shape)
+
+ if not sparse_tensor_keys:
+ return
+ tensor_list = [sequence[k] for k in sparse_tensor_keys]
+ sp_tensors = [
+ _restore_sparse(sparse_map_op=i,
+ # Flatten the 2D Tensor [batch_size, num_unroll] of
+ # handles to a 1D Tensor.
+ # Reconstruct the dimensions later.
+ # TODO(b/34247140): Remove this workaround.
+ sparse_handles=_flatten_tensor(s), rank=None)
+ for i, s in zip(tensor_op_list, tensor_list)]
+ num_unroll = ops.convert_to_tensor(num_unroll, dtype=dtypes.int64,
+ name="num_unroll_int64")
+
+ # Recreate the [batch_size, num_unroll] dimensions in the SparseTensors.
+ # The dense_shape will have a +1 higher rank.
+ # TODO(b/34247140): Remove this workaround.
+ sp_tensors_higher_dim = [_unflatten_sparse_tensor(s) for s in sp_tensors]
+
+ # Set values to SparseTensors for sparse_tensor_keys.
+ for i, key in enumerate(sparse_tensor_keys):
+ sequence[key] = sp_tensors_higher_dim[i]
+ return
diff --git a/tensorflow/contrib/util/convert_graphdef_memmapped_format_test.cc b/tensorflow/contrib/util/convert_graphdef_memmapped_format_test.cc
index cb1e7577cf..096ca0f0cf 100644
--- a/tensorflow/contrib/util/convert_graphdef_memmapped_format_test.cc
+++ b/tensorflow/contrib/util/convert_graphdef_memmapped_format_test.cc
@@ -52,7 +52,7 @@ TEST(ConvertGraphdefMemmappedFormatTest, ConvertModel) {
test::FillFn<float>(&test_tensor2, [](int) -> float { return 3.0; });
auto root = Scope::NewRootScope().ExitOnError();
- ops::Output m = ops::MatMul(root, test_tensor1, test_tensor2);
+ Output m = ops::MatMul(root, test_tensor1, test_tensor2);
const string result_name = m.node()->name();
GraphDef graph_def;
@@ -103,7 +103,7 @@ TEST(ConvertGraphdefMemmappedFormatTest, NotSupportedTypesConvert) {
Tensor test_tensor2(DT_STRING, kTestTensorShape);
test::FillFn<string>(&test_tensor2, [](int) -> string { return "XYZ"; });
auto root = Scope::NewRootScope().ExitOnError();
- ops::Output m = ops::Add(root, test_tensor1, test_tensor2);
+ Output m = ops::Add(root, test_tensor1, test_tensor2);
const string result_name = m.node()->name();
GraphDef graph_def;
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index c27cc48805..72268c8824 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -258,6 +258,7 @@ cc_library(
"platform/net.h",
"platform/notification.h",
"platform/prefetch.h",
+ "platform/profile_utils/clock_cycle_profiler.h",
"platform/profile_utils/cpu_utils.h",
"platform/protobuf.h",
"platform/stacktrace.h",
diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc
index 85ce9d772a..38eb283b10 100644
--- a/tensorflow/core/common_runtime/direct_session.cc
+++ b/tensorflow/core/common_runtime/direct_session.cc
@@ -739,8 +739,7 @@ Status DirectSession::SendInputs(const NamedTensorList& inputs,
for (const auto& input : inputs) {
auto it = executors_and_keys->input_keys.find(input.first);
if (it == executors_and_keys->input_keys.end()) {
- return errors::InvalidArgument("'", input.first,
- "' is not a pre-defined feed!");
+ return errors::Internal("'", input.first, "' is not a pre-defined feed.");
}
const string& input_key = it->second;
@@ -775,9 +774,8 @@ Status DirectSession::RecvOutputs(const std::vector<string>& output_names,
const string& output_name = output_names[output_offset];
auto it = executors_and_keys->output_keys.find(output_name);
if (it == executors_and_keys->output_keys.end()) {
- return errors::InvalidArgument("'", output_name,
- "' was not defined as a fetch"
- " target in PRunSetup.");
+ return errors::Internal("'", output_name,
+ "' is not a pre-defined fetch.");
}
const string& output_key = it->second;
Tensor output_tensor;
diff --git a/tensorflow/core/common_runtime/gpu/gpu_stream_util_test.cc b/tensorflow/core/common_runtime/gpu/gpu_stream_util_test.cc
index 3aaaf87e79..b186c9d88c 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_stream_util_test.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_stream_util_test.cc
@@ -107,7 +107,7 @@ TEST_F(GpuStreamUtilTest, StreamOverrides) {
auto root = Scope::NewRootScope().ExitOnError();
ops::_Recv(root.WithOpName("input"), DT_FLOAT, "input", "/cpu:0", 0,
"/gpu:0");
- ops::Output n = ops::MatMul(root, {}, {});
+ Output n = ops::MatMul(root, {}, {});
ops::_Send(root.WithOpName("output"), n, "output", "/gpu:0", 0, "/cpu:0");
Graph g(OpRegistry::Global());
TF_ASSERT_OK(root.ToGraph(&g));
diff --git a/tensorflow/core/common_runtime/graph_optimizer.cc b/tensorflow/core/common_runtime/graph_optimizer.cc
index cd4bf579c9..e2be3a6086 100644
--- a/tensorflow/core/common_runtime/graph_optimizer.cc
+++ b/tensorflow/core/common_runtime/graph_optimizer.cc
@@ -18,131 +18,10 @@ limitations under the License.
#include "tensorflow/core/common_runtime/constant_folding.h"
#include "tensorflow/core/common_runtime/function.h"
#include "tensorflow/core/graph/algorithm.h"
+#include "tensorflow/core/graph/node_builder.h"
#include "tensorflow/core/graph/optimizer_cse.h"
namespace tensorflow {
-namespace {
-
-// Replaces occurrences of parallel_concat with the implementation based on
-// unsafe ops. Sets removed_any to true if any parallel_concats were removed;
-// leaves it untouched otherwise.
-// TODO(apassos) Use NodeBuilder.
-Status RemoveParallelConcat(bool* removed_any, Graph* g) {
- gtl::InlinedVector<Node*, 2> matches;
- for (Node* n : g->nodes()) {
- if (n->type_string() == "ParallelConcat") {
- matches.push_back(n);
- }
- }
- for (Node* n : matches) {
- AttrSlice n_attrs(n->def());
- auto make_node = [n, g, &n_attrs](string op) {
- NodeDef node;
- node.set_op(op);
- node.set_name(g->NewName(n->name()));
- node.set_device(n->def().device());
- string colo;
- if (GetNodeAttr(n_attrs, "_class", &colo).ok()) {
- AddNodeAttr("_class", colo, &node);
- }
- return node;
- };
- DataType dtype;
- TF_RETURN_IF_ERROR(GetNodeAttr(n_attrs, "T", &dtype));
- TensorShapeProto shape;
- TF_RETURN_IF_ERROR(GetNodeAttr(n_attrs, "shape", &shape));
- // Add the constant shape input to the start node.
- NodeDef shape_node_def = make_node("Const");
- AddNodeAttr("dtype", DT_INT32, &shape_node_def);
- TensorProto shape_tensor;
- shape_tensor.set_dtype(DT_INT32);
- shape_tensor.mutable_tensor_shape()->add_dim()->set_size(shape.dim_size());
- for (int i = 0; i < shape.dim_size(); ++i) {
- shape_tensor.add_int_val(shape.dim(i).size());
- }
- AddNodeAttr("value", shape_tensor, &shape_node_def);
- Status status = Status::OK();
- Node* shape_node = g->AddNode(shape_node_def, &status);
- if (!status.ok()) return status;
-
- // Add the start node
- NodeDef start_def = make_node("_ParallelConcatStart");
- AddNodeAttr("dtype", dtype, &start_def);
- AddNodeAttr("Tshape", DT_INT32, &start_def);
- AddNodeAttr("init", false, &start_def);
- start_def.add_input(shape_node_def.name());
- Node* start = g->AddNode(start_def, &status);
- if (!status.ok()) return status;
- // TODO(apassos): make the shape an attr of _ParallelStackBegin.
- g->AddEdge(shape_node, 0, start, 0);
-
- // Add all the inplace_updates.
- std::vector<string> control_dependencies;
- std::vector<Node*> control_nodes;
- int i = 0;
- for (const Edge* input_edge : n->in_edges()) {
- if (input_edge->IsControlEdge()) {
- g->AddControlEdge(input_edge->src(), start);
- continue;
- }
- // Constant index for the update node.
- // TODO(apassos): make _ParallelStackUpdate take this as an attr.
- NodeDef update_idx_def = make_node("Const");
- AddNodeAttr("dtype", DT_INT64, &update_idx_def);
- TensorProto index_tensor;
- index_tensor.set_dtype(DT_INT64);
- index_tensor.mutable_tensor_shape()->add_dim()->set_size(1);
- index_tensor.add_int64_val(i);
- AddNodeAttr("value", index_tensor, &update_idx_def);
- Node* index = g->AddNode(update_idx_def, &status);
- if (!status.ok()) return status;
-
- NodeDef update_def = make_node("_ParallelConcatUpdate");
- control_dependencies.push_back(update_def.name());
- AddNodeAttr("T", dtype, &update_def);
- AddNodeAttr("Tshape", DT_INT64, &update_def);
- update_def.add_input(start_def.name());
- update_def.add_input(update_idx_def.name());
- update_def.add_input(strings::StrCat(input_edge->src()->name(), ":",
- input_edge->src_output()));
- Node* update = g->AddNode(update_def, &status);
- if (!status.ok()) return status;
- g->AddEdge(start, 0, update, 0);
- g->AddEdge(index, 0, update, 1);
- g->AddEdge(input_edge->src(), input_edge->src_output(), update, 2);
- control_nodes.push_back(update);
-
- ++i;
- }
-
- // Add the final identity.
- NodeDef identity_def = make_node("Identity");
- AddNodeAttr("T", dtype, &identity_def);
- identity_def.add_input(start_def.name());
- for (const string& s : control_dependencies) {
- identity_def.add_input(strings::StrCat("^", s));
- }
- Node* identity_node = g->AddNode(identity_def, &status);
- if (!status.ok()) return status;
- g->AddEdge(start, 0, identity_node, 0);
- for (Node* inp : control_nodes) {
- g->AddControlEdge(inp, identity_node);
- }
-
- // Remove the node and redirect edges.
- for (auto* e : n->out_edges()) {
- if (e->IsControlEdge()) {
- g->AddControlEdge(identity_node, e->dst());
- } else {
- g->AddEdge(identity_node, 0, e->dst(), e->dst_input());
- }
- }
- g->RemoveNode(n);
- *removed_any = true;
- }
- return Status::OK();
-}
-}
GraphOptimizer::GraphOptimizer(const OptimizerOptions& opts) : opts_(opts) {
if (opts_.opt_level() >= OptimizerOptions::L1) {
@@ -166,11 +45,6 @@ void GraphOptimizer::Optimize(FunctionLibraryRuntime* runtime, Env* env,
DumpGraph("RemoveListArrayConverter", g);
changed = true;
}
- auto s = RemoveParallelConcat(&changed, g);
- if (!s.ok()) {
- // TODO(apassos): figure out how to halt here.
- LOG(WARNING) << s;
- }
if (opts_.do_function_inlining() && RemoveDeadNodes(g)) {
DumpGraph("RemoveDeadNodes", g);
changed = true;
diff --git a/tensorflow/core/common_runtime/parallel_concat_optimizer.cc b/tensorflow/core/common_runtime/parallel_concat_optimizer.cc
new file mode 100644
index 0000000000..ffbfbc74f1
--- /dev/null
+++ b/tensorflow/core/common_runtime/parallel_concat_optimizer.cc
@@ -0,0 +1,126 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/common_runtime/graph_optimizer.h"
+
+#include "tensorflow/core/common_runtime/constant_folding.h"
+#include "tensorflow/core/common_runtime/function.h"
+#include "tensorflow/core/common_runtime/optimization_registry.h"
+#include "tensorflow/core/graph/algorithm.h"
+#include "tensorflow/core/graph/node_builder.h"
+#include "tensorflow/core/graph/optimizer_cse.h"
+
+namespace tensorflow {
+namespace {
+
+// Replaces occurrences of parallel_concat with the implementation based on
+// unsafe ops. Sets removed_any to true if any parallel_concats were removed;
+// leaves it untouched otherwise.
+class ParallelConcatRemovePass : public GraphOptimizationPass {
+ public:
+ Status Run(const GraphOptimizationPassOptions& options) override {
+ if (options.graph == nullptr) {
+ // TODO(apassos) returning OK feels weird here as we can't do anything
+ // without a graph, but some tests require this.
+ return Status::OK();
+ }
+ Graph* g = options.graph->get();
+ if (g == nullptr) {
+ return errors::Internal(
+ "Parallel concat removal should happen before partitioning and a "
+ "graph should be available.");
+ }
+ gtl::InlinedVector<Node*, 2> matches;
+ for (Node* n : g->nodes()) {
+ if (n->type_string() == "ParallelConcat") {
+ matches.push_back(n);
+ }
+ }
+ for (Node* n : matches) {
+ AttrSlice n_attrs(n->def());
+ auto base_make_node = [n, g, &n_attrs](const string& op,
+ const string& name) {
+ NodeBuilder node_builder(name, op);
+ node_builder.Device(n->def().device());
+ string colo;
+ if (GetNodeAttr(n_attrs, "_class", &colo).ok()) {
+ node_builder.Attr("_class", colo);
+ }
+ return node_builder;
+ };
+ auto make_node = [n, g, &n_attrs, &base_make_node](string op) {
+ return base_make_node(
+ op, g->NewName(strings::StrCat(n->name(), "/Internal")));
+ };
+ DataType dtype;
+ TF_RETURN_IF_ERROR(GetNodeAttr(n_attrs, "T", &dtype));
+ TensorShapeProto shape;
+ TF_RETURN_IF_ERROR(GetNodeAttr(n_attrs, "shape", &shape));
+
+ // Add the start node
+ Node* start;
+ TF_RETURN_IF_ERROR(make_node("_ParallelConcatStart")
+ .Attr("shape", shape)
+ .Attr("dtype", dtype)
+ .Finalize(g, &start));
+
+ // Add all the inplace_updates.
+ std::vector<Node*> control_nodes;
+ int64 i = 0;
+ for (const Edge* input_edge : n->in_edges()) {
+ if (input_edge->IsControlEdge()) {
+ g->AddControlEdge(input_edge->src(), start);
+ continue;
+ }
+
+ Node* update;
+ TF_RETURN_IF_ERROR(
+ make_node("_ParallelConcatUpdate")
+ .Attr("loc", i)
+ .Input(start)
+ .Input(input_edge->src(), input_edge->src_output())
+ .Finalize(g, &update));
+ control_nodes.push_back(update);
+
+ ++i;
+ }
+
+ // Add the final identity.
+ NodeBuilder identity_def = base_make_node("Identity", n->name());
+ identity_def.Input(start, 0);
+ for (Node* s : control_nodes) {
+ identity_def.ControlInput(s);
+ }
+ Node* identity_node;
+ TF_RETURN_IF_ERROR(identity_def.Finalize(g, &identity_node));
+
+ // Remove the node and redirect edges.
+ for (auto* e : n->out_edges()) {
+ if (e->IsControlEdge()) {
+ g->AddControlEdge(identity_node, e->dst());
+ } else {
+ g->AddEdge(identity_node, 0, e->dst(), e->dst_input());
+ }
+ }
+ g->RemoveNode(n);
+ }
+ return Status::OK();
+ }
+};
+REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 0,
+ ParallelConcatRemovePass);
+
+} // namespace
+} // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/shape_refiner_test.cc b/tensorflow/core/common_runtime/shape_refiner_test.cc
index 420594d98a..f7d5a9cfc9 100644
--- a/tensorflow/core/common_runtime/shape_refiner_test.cc
+++ b/tensorflow/core/common_runtime/shape_refiner_test.cc
@@ -492,7 +492,7 @@ TEST(ShapeRefinerTest, ConstantValueAsShape_Shape) {
TF_ASSERT_OK(
NodeBuilder("in", pass == 0 ? "WithPartialShape" : "WithUnknownShape")
.Finalize(root.graph(), &input));
- auto shape = ops::Shape(root, ops::Output(input));
+ auto shape = ops::Shape(root, Output(input));
Node* result;
TF_ASSERT_OK(NodeBuilder("test", "TensorAsShapeInt32")
.Input(shape.node())
@@ -518,12 +518,13 @@ TEST(ShapeRefinerTest, ConstantValueAsShape_PackInt32) {
TF_ASSERT_OK(NodeBuilder("in", "NonConstScalarInt32")
.Finalize(root.graph(), &scalar_non_const));
- ops::InputList inputs{
- ops::Input(ops::Const<int32>(root, 10)),
- ops::Input(ops::Const<int32>(root, 20)),
- ops::Input(ops::Output(scalar_non_const)),
- ops::Input(ops::Const<int32>(root, 40)),
- };
+ InputList inputs{
+ // clang-format off
+ Input(ops::Const<int32>(root, 10)),
+ Input(ops::Const<int32>(root, 20)),
+ Input(Output(scalar_non_const)),
+ Input(ops::Const<int32>(root, 40)),
+ }; // clang-format on
auto pack = ops::Pack(root, inputs);
TF_ASSERT_OK(root.status());
@@ -549,12 +550,13 @@ TEST(ShapeRefinerTest, ConstantValueAsShape_PackInt64) {
TF_ASSERT_OK(NodeBuilder("in", "NonConstScalarInt64")
.Finalize(root.graph(), &scalar_non_const));
- ops::InputList inputs{
- ops::Input(ops::Const<int64>(root, 10LL)),
- ops::Input(ops::Const<int64>(root, 20LL)),
- ops::Input(ops::Output(scalar_non_const)),
- ops::Input(ops::Const<int64>(root, 1LL << 40)),
- };
+ InputList inputs{
+ // clang-format off
+ Input(ops::Const<int64>(root, 10LL)),
+ Input(ops::Const<int64>(root, 20LL)),
+ Input(Output(scalar_non_const)),
+ Input(ops::Const<int64>(root, 1LL << 40)),
+ }; // clang-format on
auto pack = ops::Pack(root, inputs);
TF_ASSERT_OK(root.status());
@@ -577,9 +579,9 @@ TEST(ShapeRefinerTest, ConstantValueAsShape_PackInt64) {
TEST(ShapeRefinerTest, ConstantValueAsShape_PackUnknownDim) {
Scope root = Scope::NewRootScope();
- ops::InputList inputs{
- ops::Input(ops::Const<int64>(root, 10LL)),
- ops::Input(ops::Const<int64>(root, -1LL)),
+ InputList inputs{
+ Input(ops::Const<int64>(root, 10LL)),
+ Input(ops::Const<int64>(root, -1LL)),
};
auto pack = ops::Pack(root, inputs);
TF_ASSERT_OK(root.status());
@@ -604,9 +606,9 @@ TEST(ShapeRefinerTest, ConstantValueAsShape_PackInvalidInput) {
Scope root = Scope::NewRootScope();
// Inputs are length 2 vectors instead of scalars.
- ops::InputList inputs{
- ops::Input(ops::Const<int64>(root, {10LL, 20LL})),
- ops::Input(ops::Const<int64>(root, {10LL, 21LL})),
+ InputList inputs{
+ Input(ops::Const<int64>(root, {10LL, 20LL})),
+ Input(ops::Const<int64>(root, {10LL, 21LL})),
};
auto pack = ops::Pack(root, inputs);
TF_ASSERT_OK(root.status());
@@ -633,10 +635,12 @@ TEST(ShapeRefinerTest, ConstantValueAsShape_Concat) {
TF_ASSERT_OK(NodeBuilder("in", "WithPartialShape").Finalize(g, &partial_1));
TF_ASSERT_OK(NodeBuilder("in", "WithPartialShape2").Finalize(g, &partial_2));
auto const_input = ops::Const(root, {9, 10, 11});
- ops::OutputList concat_inputs{
- ops::Shape(root, ops::Output(partial_1)),
- ops::Shape(root, ops::Output(partial_2)), const_input,
- };
+ OutputList concat_inputs{
+ // clang-format off
+ ops::Shape(root, Output(partial_1)),
+ ops::Shape(root, Output(partial_2)),
+ const_input,
+ }; // clang-format on
auto concat_dim = ops::Const(root, 0);
auto concat = ops::Concat(root, concat_dim, concat_inputs);
TF_ASSERT_OK(root.status());
@@ -673,11 +677,12 @@ TEST(ShapeRefinerTest, ConstantValueAsShape_ConcatWithUnknown) {
TF_ASSERT_OK(NodeBuilder("in", "WithPartialShape").Finalize(g, &partial_1));
TF_ASSERT_OK(NodeBuilder("in", "WithPartialShape2").Finalize(g, &partial_2));
TF_ASSERT_OK(NodeBuilder("in", "WithUnknownShape").Finalize(g, &unknown));
- ops::OutputList concat_inputs{
- ops::Shape(root, ops::Output(partial_1)),
- ops::Shape(root, ops::Output(partial_2)),
- ops::Shape(root, ops::Output(unknown)),
- };
+ OutputList concat_inputs{
+ // clang-format off
+ ops::Shape(root, Output(partial_1)),
+ ops::Shape(root, Output(partial_2)),
+ ops::Shape(root, Output(unknown)),
+ }; // clang-format on
auto concat_dim = ops::Const(root, 0);
auto concat = ops::Concat(root, concat_dim, concat_inputs);
TF_ASSERT_OK(root.status());
@@ -714,11 +719,12 @@ TEST(ShapeRefinerTest, ConstantValueAsShape_ConcatInvalidDimValue) {
TF_ASSERT_OK(NodeBuilder("in", "WithPartialShape").Finalize(g, &partial_1));
TF_ASSERT_OK(NodeBuilder("in", "WithPartialShape2").Finalize(g, &partial_2));
auto const_input = ops::Const(root, {9, -2, 11});
- ops::OutputList concat_inputs{
- ops::Shape(root, ops::Output(partial_1)),
- ops::Shape(root, ops::Output(partial_2)), //
+ OutputList concat_inputs{
+ // clang-format off
+ ops::Shape(root, Output(partial_1)),
+ ops::Shape(root, Output(partial_2)),
const_input,
- };
+ }; // clang-format on
auto concat_dim = ops::Const(root, 0);
auto concat = ops::Concat(root, concat_dim, concat_inputs);
TF_ASSERT_OK(root.status());
diff --git a/tensorflow/core/distributed_runtime/BUILD b/tensorflow/core/distributed_runtime/BUILD
index 73018ec258..e267414654 100644
--- a/tensorflow/core/distributed_runtime/BUILD
+++ b/tensorflow/core/distributed_runtime/BUILD
@@ -392,7 +392,7 @@ tf_cuda_cc_test(
name = "rpcbench_test",
size = "small",
srcs = ["rpcbench_test.cc"],
- linkstatic = tf_kernel_tests_linkstatic(),
+ linkstatic = 1,
tags = tf_cuda_tests_tags(),
deps = [
"//tensorflow/cc:cc_ops",
diff --git a/tensorflow/core/graph/graph_constructor.cc b/tensorflow/core/graph/graph_constructor.cc
index d155051273..44646e9241 100644
--- a/tensorflow/core/graph/graph_constructor.cc
+++ b/tensorflow/core/graph/graph_constructor.cc
@@ -67,6 +67,7 @@ class GraphConstructor {
: in.prefix + "/"),
input_map(in.input_map),
control_dependencies(in.control_dependencies),
+ return_tensors(in.return_tensors),
importing(true) {}
bool allow_internal_ops;
@@ -75,6 +76,7 @@ class GraphConstructor {
string prefix;
std::map<TensorId, TensorId> input_map;
std::vector<string> control_dependencies;
+ std::vector<TensorId> return_tensors;
// TODO(ashankar): This bool exists to separate out functionality required
// to make ImportGraphDef a close equivalent of Python's import_graph_def
@@ -88,11 +90,12 @@ class GraphConstructor {
};
static Status Construct(const Options& opts, const GraphDef* gdef, Graph* g,
- ShapeRefiner* refiner) {
+ ShapeRefiner* refiner,
+ std::vector<std::pair<Node*, int>>* return_tensors) {
TF_RETURN_IF_ERROR(CheckVersions(gdef->versions(), TF_GRAPH_DEF_VERSION,
TF_GRAPH_DEF_VERSION_MIN_PRODUCER,
"GraphDef", "graph"));
- GraphConstructor c(opts, gdef, g, refiner);
+ GraphConstructor c(opts, gdef, g, refiner, return_tensors);
const Status s = c.TryImport();
if (!s.ok()) c.Undo();
return s;
@@ -100,12 +103,14 @@ class GraphConstructor {
private:
GraphConstructor(const Options& opts, const GraphDef* gdef, Graph* g,
- ShapeRefiner* refiner)
+ ShapeRefiner* refiner,
+ std::vector<std::pair<Node*, int>>* return_tensors)
: opts_(opts),
gdef_(gdef),
g_(g),
original_versions_(g->versions()),
- refiner_(refiner) {}
+ refiner_(refiner),
+ return_tensors_(return_tensors) {}
Status TryImport() {
TF_RETURN_IF_ERROR(EnsureNoNameCollisions());
@@ -115,6 +120,7 @@ class GraphConstructor {
TF_RETURN_IF_ERROR(Convert());
TF_RETURN_IF_ERROR(AddBackEdges());
TF_RETURN_IF_ERROR(UpdateVersionDef());
+ TF_RETURN_IF_ERROR(PopulateReturnTensors());
FixupSourceAndSinkEdges(g_);
return Status::OK();
}
@@ -126,6 +132,7 @@ class GraphConstructor {
Status Convert();
Status AddBackEdges();
Status UpdateVersionDef();
+ Status PopulateReturnTensors();
void Undo();
@@ -156,6 +163,9 @@ class GraphConstructor {
ShapeRefiner* refiner_;
+ // May be null. Not owned.
+ std::vector<std::pair<Node*, int>>* return_tensors_;
+
// Mapping from node name to the index within gdef_
struct NodeInfo {
explicit NodeInfo(int i) : gdef_index(i), node(nullptr) {}
@@ -752,6 +762,36 @@ Status GraphConstructor::UpdateVersionDef() {
return Status::OK();
}
+Status GraphConstructor::PopulateReturnTensors() {
+ if (opts_.return_tensors.empty()) return Status::OK();
+ for (const TensorId& id : opts_.return_tensors) {
+ auto iter = opts_.input_map.find(id);
+ if (iter == opts_.input_map.end()) {
+ // Locate id in imported nodes
+ auto iter = gdef_nodes_.find(id.first);
+ if (iter == gdef_nodes_.end()) {
+ return errors::InvalidArgument(
+ "Requested return node '", id.first, "' not found in graph def");
+ }
+ int num_outputs = iter->second.node->num_outputs();
+ if ((id.second < 0 || id.second >= num_outputs) &&
+ id.second != Graph::kControlSlot) {
+ return errors::InvalidArgument(
+ "Invalid return output ", id.second, " of node '", id.first,
+ "', which has ", num_outputs, " outputs");
+ }
+ return_tensors_->push_back({iter->second.node, id.second});
+ } else {
+ // id was remapped to existing node
+ TensorId remapped_id = iter->second;
+ DCHECK_GT(existing_nodes_.count(remapped_id.first), 0);
+ Node* node = existing_nodes_[remapped_id.first];
+ return_tensors_->push_back({node, remapped_id.second});
+ }
+ }
+ return Status::OK();
+}
+
void GraphConstructor::Undo() {
for (const auto& iter : gdef_nodes_) {
if (iter.second.node != nullptr) {
@@ -780,16 +820,30 @@ Status GraphConstructor::MakeEdge(Node* src, int output_index, Node* dst,
Status ConvertGraphDefToGraph(const GraphConstructorOptions& opts,
const GraphDef& gdef, Graph* g) {
ShapeRefiner refiner(g->op_registry());
- return GraphConstructor::Construct(opts, &gdef, g, &refiner);
+ return GraphConstructor::Construct(opts, &gdef, g, &refiner, nullptr);
}
Status ImportGraphDef(const ImportGraphDefOptions& opts, const GraphDef& gdef,
- Graph* g, ShapeRefiner* refiner) {
+ Graph* g, ShapeRefiner* refiner,
+ std::vector<std::pair<Node*, int>>* return_tensors) {
ShapeRefiner default_refiner(g->op_registry());
if (refiner == nullptr) {
refiner = &default_refiner;
}
- return GraphConstructor::Construct(opts, &gdef, g, refiner);
+
+ if (!opts.return_tensors.empty()) {
+ if (return_tensors == nullptr) {
+ return errors::InvalidArgument(
+ "return_tensors argument to ImportNodeDef() must be non-null if "
+ "opts.return_tensors is non-empty");
+ }
+ if (!return_tensors->empty()) {
+ return errors::InvalidArgument(
+ "return_tensors argument to ImportNodeDef() should be empty (has "
+ "size ", return_tensors->size(), ")");
+ }
+ }
+ return GraphConstructor::Construct(opts, &gdef, g, refiner, return_tensors);
}
void CopyGraph(const Graph& src, Graph* dest) {
diff --git a/tensorflow/core/graph/graph_constructor.h b/tensorflow/core/graph/graph_constructor.h
index 61704913c3..186859d132 100644
--- a/tensorflow/core/graph/graph_constructor.h
+++ b/tensorflow/core/graph/graph_constructor.h
@@ -97,14 +97,31 @@ struct ImportGraphDefOptions {
// other nodes in `gdef`.
std::vector<string> control_dependencies;
+ // Tensors in `gdef` that will be returned via the `return_tensors` output
+ // parameter of `ImportGraphDef()`. If this list is non-empty, the caller must
+ // pass an empty vector to `ImportGraphDef()`. The vector will be populated
+ // with the imported nodes in `g`.
+ //
+ // Entries should not include `prefix`, i.e., each TensorId's name should be
+ // the name as it originally appears in `gdef`.
+ //
+ // If this contains a tensor that's also being remapped via `input_map`, the
+ // corresponding existing tensor in `g` will be returned.
+ std::vector<TensorId> return_tensors;
+
// TODO(ashankar): Enable handling of GraphDefs produced by newer binaries
// with ops that are not defined in the binary calling ImportGraphDef.
// Similar to the producer_op_list argument to import_graph_def in the
// python API.
};
-extern Status ImportGraphDef(const ImportGraphDefOptions& opts,
- const GraphDef& gdef, Graph* g,
- ShapeRefiner* refiner);
+
+// Each `return_tensors` entry is the requested node and output index. The index
+// is included in case the returned tensor has been remapped according to
+// `input_map`.
+extern Status ImportGraphDef(
+ const ImportGraphDefOptions& opts, const GraphDef& gdef, Graph* g,
+ ShapeRefiner* refiner,
+ std::vector<std::pair<Node*, int>>* return_tensors = nullptr);
// Make a copy of "src" into "*dest".
//
diff --git a/tensorflow/core/graph/graph_constructor_test.cc b/tensorflow/core/graph/graph_constructor_test.cc
index a173d3a627..9ce7a0fdf8 100644
--- a/tensorflow/core/graph/graph_constructor_test.cc
+++ b/tensorflow/core/graph/graph_constructor_test.cc
@@ -65,14 +65,17 @@ class GraphConstructorTest : public ::testing::Test {
EXPECT_EQ(original_graph_description, GraphDebugString());
}
- void ExpectError(const string& gdef_ascii, const ImportGraphDefOptions& opts,
- const std::vector<string>& expected_error_strs,
- ShapeRefiner* refiner = nullptr) {
+ void ExpectError(
+ const string& gdef_ascii, const ImportGraphDefOptions& opts,
+ const std::vector<string>& expected_error_strs,
+ ShapeRefiner* refiner = nullptr,
+ std::vector<std::pair<Node*, int>>* return_tensors = nullptr) {
// Used to verify that errors don't change graph
const string original_graph_description = GraphDebugString();
Convert(gdef_ascii);
- Status status = ImportGraphDef(opts, gdef_, &graph_, refiner);
+ Status status =
+ ImportGraphDef(opts, gdef_, &graph_, refiner, return_tensors);
EXPECT_FALSE(status.ok());
for (const string& error : expected_error_strs) {
@@ -90,9 +93,10 @@ class GraphConstructorTest : public ::testing::Test {
}
void ExpectOK(const string& gdef_ascii, const ImportGraphDefOptions& opts,
- ShapeRefiner* refiner = nullptr) {
+ ShapeRefiner* refiner = nullptr,
+ std::vector<std::pair<Node*, int>>* return_tensors = nullptr) {
Convert(gdef_ascii);
- Status s = ImportGraphDef(opts, gdef_, &graph_, refiner);
+ Status s = ImportGraphDef(opts, gdef_, &graph_, refiner, return_tensors);
EXPECT_EQ(Status::OK(), s) << s;
}
@@ -981,6 +985,104 @@ TEST_F(GraphConstructorTest, ImportGraphDef_InputMapDuplicateNodeNames) {
&refiner);
}
+TEST_F(GraphConstructorTest, ImportGraphDef_ReturnTensors) {
+ ShapeRefiner refiner(graph_.op_registry());
+
+ ImportGraphDefOptions opts;
+ opts.return_tensors.push_back({"input", 1});
+ opts.return_tensors.push_back({"t1", 0});
+ opts.return_tensors.push_back({"input", 0});
+ std::vector<std::pair<Node*, int>> return_tensors;
+ ExpectOK(
+ "node { name: 'input' op: 'TestInput' }"
+ "node { name: 't1' op: 'TestMul' input: ['input:0', 'input:1'] }",
+ opts, &refiner, &return_tensors);
+
+ // Sanity checks
+ EXPECT_TRUE(HasNode("input"));
+ EXPECT_TRUE(HasNode("t1"));
+ EXPECT_TRUE(HasEdge("input", 0, "t1", 0));
+ EXPECT_TRUE(HasEdge("input", 1, "t1", 1));
+
+ // Check return tensors
+ ASSERT_EQ(return_tensors.size(), 3);
+ EXPECT_EQ(return_tensors[0].first->name(), "input");
+ EXPECT_EQ(return_tensors[0].second, 1);
+ EXPECT_EQ(return_tensors[1].first->name(), "t1");
+ EXPECT_EQ(return_tensors[1].second, 0);
+ EXPECT_EQ(return_tensors[2].first->name(), "input");
+ EXPECT_EQ(return_tensors[2].second, 0);
+
+ // Test using prefix and returning element from input_map
+ opts.return_tensors.clear();
+ return_tensors.clear();
+ opts.prefix = "import";
+ opts.input_map[{"new_input", 1}] = {"input", 0};
+ opts.return_tensors.push_back({"new_input", 0});
+ opts.return_tensors.push_back({"new_input", 1});
+ ExpectOK("node { name: 'new_input' op: 'TestInput' }", opts, &refiner,
+ &return_tensors);
+
+ EXPECT_TRUE(HasNode("import/new_input"));
+
+ ASSERT_EQ(return_tensors.size(), 2);
+ EXPECT_EQ(return_tensors[0].first->name(), "import/new_input");
+ EXPECT_EQ(return_tensors[0].second, 0);
+ EXPECT_EQ(return_tensors[1].first->name(), "input");
+ EXPECT_EQ(return_tensors[1].second, 0);
+
+ // Test returning node remapped to source node
+ opts.prefix.clear();
+ opts.input_map.clear();
+ opts.return_tensors.clear();
+ return_tensors.clear();
+ opts.input_map[{"new_input", 0}] = {"_SOURCE", 0};
+ opts.return_tensors.push_back({"new_input", 0});
+ ExpectOK("node { name: 'new_input' op: 'TestInput' }", opts, &refiner,
+ &return_tensors);
+
+ EXPECT_TRUE(HasNode("new_input"));
+
+ ASSERT_EQ(return_tensors.size(), 1);
+ EXPECT_EQ(return_tensors[0].first->name(), "_SOURCE");
+ EXPECT_EQ(return_tensors[0].second, 0);
+}
+
+TEST_F(GraphConstructorTest, ImportGraphDef_ReturnTensorsErrors) {
+ // Passing in return_tensors with empty opts.return_tensors is OK
+ ImportGraphDefOptions opts;
+ std::vector<std::pair<Node*, int>> return_tensors;
+ ExpectOK("node { name: 'input' op: 'TestInput' }", opts, nullptr,
+ &return_tensors);
+
+ // Null return_tensors with non-empty opts.return_tensors
+ opts.return_tensors.push_back({"new_input", 0});
+ ExpectError("node { name: 'new_input' op: 'TestInput' }", opts,
+ {"return_tensors argument to ImportNodeDef() must be non-null "
+ "if opts.return_tensors is non-empty"});
+
+ // Non-empty return_tensors
+ return_tensors.push_back({nullptr, 0});
+ ExpectError("node { name: 'new_input' op: 'TestInput' }", opts,
+ {"return_tensors argument to ImportNodeDef() should be empty "
+ "(has size 1)"},
+ nullptr, &return_tensors);
+
+ // Requesting tensor that isn't in graph def
+ return_tensors.clear();
+ ExpectError("node { name: 'W1' op: 'TestParams' }", opts,
+ {"Requested return node 'new_input' not found in graph def"},
+ nullptr, &return_tensors);
+
+ // Requesting invalid node index
+ opts.return_tensors.clear();
+ opts.return_tensors.push_back({"new_input", 2});
+ ExpectError("node { name: 'new_input' op: 'TestInput' }", opts,
+ {"Invalid return output 2 of node 'new_input', which has 2 "
+ "outputs"},
+ nullptr, &return_tensors);
+}
+
TEST_F(GraphConstructorTest, ImportGraphDef_WithCycle) {
// Test graph produced in python using:
/*
diff --git a/tensorflow/core/graph/graph_partition_test.cc b/tensorflow/core/graph/graph_partition_test.cc
index d8322e6077..6d3dbc0abb 100644
--- a/tensorflow/core/graph/graph_partition_test.cc
+++ b/tensorflow/core/graph/graph_partition_test.cc
@@ -128,13 +128,13 @@ void CheckLoopConstruction(const GraphDef& graph_def) {
}
}
-REGISTER_OP("Input").Output("o: float");
+REGISTER_OP("FloatInput").Output("o: float");
REGISTER_OP("BoolInput").Output("o: bool");
REGISTER_OP("Combine").Input("a: float").Input("b: float").Output("o: float");
-ops::Output ConstructOp(const Scope& scope, const string& op_type,
- const gtl::ArraySlice<ops::Input>& inputs) {
- if (!scope.ok()) return ops::Output();
+Output ConstructOp(const Scope& scope, const string& op_type,
+ const gtl::ArraySlice<Input>& inputs) {
+ if (!scope.ok()) return Output();
const string unique_name = scope.GetUniqueNameForOp(op_type);
auto builder = NodeBuilder(unique_name, op_type);
for (auto const& input : inputs) {
@@ -143,19 +143,19 @@ ops::Output ConstructOp(const Scope& scope, const string& op_type,
scope.UpdateBuilder(&builder);
Node* ret;
scope.UpdateStatus(builder.Finalize(scope.graph(), &ret));
- if (!scope.ok()) return ops::Output();
- return ops::Output(ret);
+ if (!scope.ok()) return Output();
+ return Output(ret);
}
-ops::Output Input(const Scope& scope) {
- return ConstructOp(scope, "Input", {});
+Output FloatInput(const Scope& scope) {
+ return ConstructOp(scope, "FloatInput", {});
}
-ops::Output BoolInput(const Scope& scope) {
+Output BoolInput(const Scope& scope) {
return ConstructOp(scope, "BoolInput", {});
}
-ops::Output Combine(const Scope& scope, ops::Input a, ops::Input b) {
+Output Combine(const Scope& scope, Input a, Input b) {
return ConstructOp(scope, "Combine", {a, b});
}
@@ -196,21 +196,21 @@ class GraphPartitionTest : public ::testing::Test {
TEST_F(GraphPartitionTest, SingleDevice) {
using namespace ::tensorflow::ops; // NOLINT(build/namespaces)
- auto a1 = Input(in_.WithOpName("A1"));
+ auto a1 = FloatInput(in_.WithOpName("A1"));
Combine(in_.WithOpName("A2"), a1, a1);
Partition(ToGraphDef(), &partitions_);
EXPECT_EQ(1, partitions_.size());
- a1 = Input(scope_a_.WithOpName("A1"));
+ a1 = FloatInput(scope_a_.WithOpName("A1"));
Combine(scope_a_.WithOpName("A2"), a1, a1);
ExpectMatchA();
}
TEST_F(GraphPartitionTest, CrossDeviceData) {
using namespace ::tensorflow::ops; // NOLINT(build/namespaces)
- auto a1 = Input(in_.WithOpName("A1"));
- auto b1 = Input(in_.WithOpName("B1"));
+ auto a1 = FloatInput(in_.WithOpName("A1"));
+ auto b1 = FloatInput(in_.WithOpName("B1"));
Combine(in_.WithOpName("B2"), a1, b1);
Partition(ToGraphDef(), &partitions_);
@@ -218,11 +218,11 @@ TEST_F(GraphPartitionTest, CrossDeviceData) {
string a = "/job:a/replica:0/task:0/cpu:0";
string b = "/job:a/replica:0/task:0/cpu:1";
- a1 = Input(scope_a_.WithOpName("A1"));
+ a1 = FloatInput(scope_a_.WithOpName("A1"));
_Send(scope_a_.WithOpName("A1/_0"), a1, "edge_1_A1", a, 82, b);
ExpectMatchA();
- b1 = Input(scope_b_.WithOpName("B1"));
+ b1 = FloatInput(scope_b_.WithOpName("B1"));
auto recv =
_Recv(scope_b_.WithOpName("A1/_1"), DT_FLOAT, "edge_1_A1", a, 82, b);
Combine(scope_b_.WithOpName("B2"), recv, b1);
@@ -231,8 +231,8 @@ TEST_F(GraphPartitionTest, CrossDeviceData) {
TEST_F(GraphPartitionTest, CrossDeviceControl) {
using namespace ::tensorflow::ops; // NOLINT(build/namespaces)
- auto a1 = Input(in_.WithOpName("A1"));
- auto b1 = Input(in_.WithOpName("B1"));
+ auto a1 = FloatInput(in_.WithOpName("A1"));
+ auto b1 = FloatInput(in_.WithOpName("B1"));
Combine(in_.WithOpName("B2").WithControlDependencies(a1), b1, b1);
Partition(ToGraphDef(), &partitions_);
@@ -240,7 +240,7 @@ TEST_F(GraphPartitionTest, CrossDeviceControl) {
string a = "/job:a/replica:0/task:0/cpu:0";
string b = "/job:a/replica:0/task:0/cpu:1";
- a1 = Input(scope_a_.WithOpName("A1"));
+ a1 = FloatInput(scope_a_.WithOpName("A1"));
auto c = Const(scope_a_.WithOpName("A1/_0").WithControlDependencies(a1), {});
_Send(scope_a_.WithOpName("A1/_1"), c, "edge_3_A1", a, 82, b);
ExpectMatchA();
@@ -248,15 +248,15 @@ TEST_F(GraphPartitionTest, CrossDeviceControl) {
auto recv =
_Recv(scope_b_.WithOpName("A1/_2"), DT_FLOAT, "edge_3_A1", a, 82, b);
auto id = Identity(scope_b_.WithOpName("A1/_3"), recv);
- b1 = Input(scope_b_.WithOpName("B1"));
+ b1 = FloatInput(scope_b_.WithOpName("B1"));
Combine(scope_b_.WithOpName("B2").WithControlDependencies(id), b1, b1);
ExpectMatchB();
}
TEST_F(GraphPartitionTest, CrossDeviceData_MultiUse) {
using namespace ::tensorflow::ops; // NOLINT(build/namespaces)
- auto a1 = Input(in_.WithOpName("A1"));
- auto b1 = Input(in_.WithOpName("B1"));
+ auto a1 = FloatInput(in_.WithOpName("A1"));
+ auto b1 = FloatInput(in_.WithOpName("B1"));
Combine(in_.WithOpName("B2"), a1, b1);
Combine(in_.WithOpName("B3"), a1, a1);
@@ -265,13 +265,13 @@ TEST_F(GraphPartitionTest, CrossDeviceData_MultiUse) {
string a = "/job:a/replica:0/task:0/cpu:0";
string b = "/job:a/replica:0/task:0/cpu:1";
- a1 = Input(scope_a_.WithOpName("A1"));
+ a1 = FloatInput(scope_a_.WithOpName("A1"));
_Send(scope_a_.WithOpName("A1/_0"), a1, "edge_1_A1", a, 82, b);
ExpectMatchA();
auto recv =
_Recv(scope_b_.WithOpName("A1/_1"), DT_FLOAT, "edge_1_A1", a, 82, b);
- b1 = Input(scope_b_.WithOpName("B1"));
+ b1 = FloatInput(scope_b_.WithOpName("B1"));
Combine(scope_b_.WithOpName("B2"), recv, b1);
Combine(scope_b_.WithOpName("B3"), recv, recv);
ExpectMatchB();
@@ -279,17 +279,17 @@ TEST_F(GraphPartitionTest, CrossDeviceData_MultiUse) {
TEST_F(GraphPartitionTest, CrossDeviceControl_MultiUse) {
using namespace ::tensorflow::ops; // NOLINT(build/namespaces)
- auto a1 = Input(in_.WithOpName("A1"));
- auto b1 = Input(in_.WithOpName("B1"));
+ auto a1 = FloatInput(in_.WithOpName("A1"));
+ auto b1 = FloatInput(in_.WithOpName("B1"));
Combine(in_.WithOpName("B2").WithControlDependencies(a1), b1, b1);
- Input(in_.WithOpName("B3").WithControlDependencies(a1));
+ FloatInput(in_.WithOpName("B3").WithControlDependencies(a1));
Partition(ToGraphDef(), &partitions_);
EXPECT_EQ(2, partitions_.size());
string a = "/job:a/replica:0/task:0/cpu:0";
string b = "/job:a/replica:0/task:0/cpu:1";
- a1 = Input(scope_a_.WithOpName("A1"));
+ a1 = FloatInput(scope_a_.WithOpName("A1"));
auto c = Const(scope_a_.WithOpName("A1/_0").WithControlDependencies(a1), {});
_Send(scope_a_.WithOpName("A1/_1"), c, "edge_1_A1", a, 82, b);
ExpectMatchA();
@@ -297,25 +297,25 @@ TEST_F(GraphPartitionTest, CrossDeviceControl_MultiUse) {
auto recv =
_Recv(scope_b_.WithOpName("A1/_2"), DT_FLOAT, "edge_1_A1", a, 82, b);
auto id = Identity(scope_b_.WithOpName("A1/_3"), recv);
- b1 = Input(scope_b_.WithOpName("B1"));
+ b1 = FloatInput(scope_b_.WithOpName("B1"));
Combine(scope_b_.WithOpName("B2").WithControlDependencies(id), b1, b1);
- Input(scope_b_.WithOpName("B3").WithControlDependencies(id));
+ FloatInput(scope_b_.WithOpName("B3").WithControlDependencies(id));
ExpectMatchB();
}
TEST_F(GraphPartitionTest, CrossDevice_DataControl) {
using namespace ::tensorflow::ops; // NOLINT(build/namespaces)
- auto a1 = Input(in_.WithOpName("A1"));
- auto b1 = Input(in_.WithOpName("B1"));
+ auto a1 = FloatInput(in_.WithOpName("A1"));
+ auto b1 = FloatInput(in_.WithOpName("B1"));
Combine(in_.WithOpName("B2"), a1, b1);
- Input(in_.WithOpName("B3").WithControlDependencies(a1));
+ FloatInput(in_.WithOpName("B3").WithControlDependencies(a1));
Partition(ToGraphDef(), &partitions_);
EXPECT_EQ(2, partitions_.size());
string a = "/job:a/replica:0/task:0/cpu:0";
string b = "/job:a/replica:0/task:0/cpu:1";
- a1 = Input(scope_a_.WithOpName("A1"));
+ a1 = FloatInput(scope_a_.WithOpName("A1"));
auto c = Const(scope_a_.WithOpName("A1/_0").WithControlDependencies(a1), {});
// NOTE: Send 0 A1/_1 -> A1/_2 is not necessarily needed. We could
// use A1/_0 -> A1/_4 as the control as a minor optimization.
@@ -328,9 +328,9 @@ TEST_F(GraphPartitionTest, CrossDevice_DataControl) {
auto id1 = Identity(scope_b_.WithOpName("A1/_3"), recv1);
auto recv2 =
_Recv(scope_b_.WithOpName("A1/_5"), DT_FLOAT, "edge_2_A1", a, 82, b);
- b1 = Input(scope_b_.WithOpName("B1"));
+ b1 = FloatInput(scope_b_.WithOpName("B1"));
Combine(scope_b_.WithOpName("B2"), recv2, b1);
- Input(scope_b_.WithOpName("B3").WithControlDependencies(id1));
+ FloatInput(scope_b_.WithOpName("B3").WithControlDependencies(id1));
ExpectMatchB();
}
@@ -338,8 +338,7 @@ TEST_F(GraphPartitionTest, CrossDeviceLoop) {
using namespace ::tensorflow::ops; // NOLINT(build/namespaces)
auto a1 = BoolInput(in_.WithOpName("A1"));
auto a2 = Enter(in_.WithOpName("A2"), a1, "foo");
- auto a3 =
- Merge(in_.WithOpName("A3"), {a2, ops::Input("A5", 0, DT_BOOL)}).output;
+ auto a3 = Merge(in_.WithOpName("A3"), {a2, Input("A5", 0, DT_BOOL)}).output;
LoopCond(in_.WithOpName("A4"), a3);
auto b1 = Identity(in_.WithOpName("B1"), a3);
NextIteration(in_.WithOpName("A5"), b1);
@@ -351,8 +350,7 @@ TEST_F(GraphPartitionTest, CrossDeviceLoop1) {
using namespace ::tensorflow::ops; // NOLINT(build/namespaces)
auto a1 = BoolInput(in_.WithOpName("A1"));
auto a2 = Enter(in_.WithOpName("B2"), a1, "foo");
- auto a3 =
- Merge(in_.WithOpName("A3"), {a2, ops::Input("B5", 0, DT_BOOL)}).output;
+ auto a3 = Merge(in_.WithOpName("A3"), {a2, Input("B5", 0, DT_BOOL)}).output;
LoopCond(in_.WithOpName("A4"), a3);
auto b1 = Identity(in_.WithOpName("B1"), a3);
NextIteration(in_.WithOpName("B5"), b1);
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 82ed7d6b42..fb663e5f58 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -318,6 +318,19 @@ cc_library(
)
cc_library(
+ name = "record_input_op",
+ srcs = [
+ "record_input_op.cc",
+ "record_yielder.cc",
+ "record_yielder.h",
+ ],
+ deps = [
+ "//tensorflow/core:framework",
+ "//tensorflow/core:lib",
+ ],
+)
+
+cc_library(
name = "save_restore_tensor",
srcs = ["save_restore_tensor.cc"],
hdrs = ["save_restore_tensor.h"],
@@ -1177,6 +1190,7 @@ cc_library(
":priority_queue_op",
":queue_ops",
":random_shuffle_queue_op",
+ ":record_input_op",
":session_ops",
":sparse_conditional_accumulator_op",
":stack_ops",
@@ -1679,6 +1693,7 @@ tf_cc_tests(
":ops_util",
"//tensorflow/core:core_cpu",
"//tensorflow/core:framework",
+ "//tensorflow/core:lib_internal",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
@@ -3735,10 +3750,7 @@ filegroup(
"ctc_loss_op.*",
# Excluded due to experimental status:
"debug_ops.*",
- # Ops excluded because they do not build correctly for Android.
- # See b/29213790
"scatter_nd_op*",
- "sparse_matmul_op.*",
# Lib CURL is not supported on Android.
"bigquery*",
],
diff --git a/tensorflow/core/kernels/hexagon/BUILD b/tensorflow/core/kernels/hexagon/BUILD
index 1222093a7a..9263c062ba 100644
--- a/tensorflow/core/kernels/hexagon/BUILD
+++ b/tensorflow/core/kernels/hexagon/BUILD
@@ -72,12 +72,14 @@ tf_cc_test(
tf_kernel_library(
name = "graph_transferer",
srcs = [
+ "graph_transfer_utils.cc",
"graph_transferer.cc",
"hexagon_control_wrapper.cc",
"hexagon_ops_definitions.cc",
"i_graph_transfer_ops_definitions.cc",
],
hdrs = [
+ "graph_transfer_utils.h",
"graph_transferer.h",
"hexagon_control_wrapper.h",
"hexagon_ops_definitions.h",
diff --git a/tensorflow/core/kernels/hexagon/graph_transfer_utils.cc b/tensorflow/core/kernels/hexagon/graph_transfer_utils.cc
new file mode 100644
index 0000000000..c37e49f242
--- /dev/null
+++ b/tensorflow/core/kernels/hexagon/graph_transfer_utils.cc
@@ -0,0 +1,49 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/kernels/hexagon/graph_transfer_utils.h"
+
+#include "tensorflow/core/platform/logging.h"
+
+namespace tensorflow {
+
+/* static */ std::priority_queue<std::tuple<float, int, string>>
+GraphTransferUtils::GetTopNFloatResults(const float *const data,
+ const string *const labels,
+ const int element_count) {
+ CHECK(data != nullptr);
+ CHECK(labels != nullptr);
+ std::priority_queue<std::tuple<float, int, string>> queue;
+ for (int i = 0; i < element_count; ++i) {
+ queue.emplace(data[i], i, labels[i]);
+ }
+ return queue;
+}
+
+/* static */ void GraphTransferUtils::DumpTopNFloatResults(
+ const float *const data, const string *const labels,
+ const int element_count, const int top_n) {
+ std::priority_queue<std::tuple<float, int, string>> queue =
+ GetTopNFloatResults(data, labels, element_count);
+ LOG(INFO) << "=== Dump ranking ===";
+ for (int i = 0; i < top_n; ++i) {
+ const std::tuple<float, int, string> &entry = queue.top();
+ LOG(INFO) << i << ": " << std::get<1>(entry) << ", " << std::get<2>(entry)
+ << ", " << std::get<0>(entry);
+ queue.pop();
+ }
+}
+
+} // namespace tensorflow
diff --git a/tensorflow/core/kernels/hexagon/graph_transfer_utils.h b/tensorflow/core/kernels/hexagon/graph_transfer_utils.h
new file mode 100644
index 0000000000..85af9b5ce3
--- /dev/null
+++ b/tensorflow/core/kernels/hexagon/graph_transfer_utils.h
@@ -0,0 +1,41 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_PLATFORM_HEXAGON_GRAPH_TRANSFER_UTILS_H_
+#define TENSORFLOW_PLATFORM_HEXAGON_GRAPH_TRANSFER_UTILS_H_
+
+#include <queue>
+
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/platform/macros.h"
+
+namespace tensorflow {
+
+class GraphTransferUtils {
+ public:
+ static std::priority_queue<std::tuple<float, int, string>>
+ GetTopNFloatResults(const float *const data, const string *const labels,
+ const int element_count);
+ static void DumpTopNFloatResults(const float *const data,
+ const string *const labels,
+ const int element_count, const int top_n);
+
+ private:
+ TF_DISALLOW_COPY_AND_ASSIGN(GraphTransferUtils);
+};
+
+} // namespace tensorflow
+
+#endif // TENSORFLOW_PLATFORM_HEXAGON_GRAPH_TRANSFER_UTILS_H_
diff --git a/tensorflow/core/kernels/hexagon/graph_transferer.cc b/tensorflow/core/kernels/hexagon/graph_transferer.cc
index 5b2a95a371..662b935b90 100644
--- a/tensorflow/core/kernels/hexagon/graph_transferer.cc
+++ b/tensorflow/core/kernels/hexagon/graph_transferer.cc
@@ -38,14 +38,11 @@ const string INPUTS_NODE_PREFIX = "inputs_for_";
const string OUTPUTS_NODE_PREFIX = "outputs_for_";
const string DATA_NODE_PREFIX = "data_for_op_";
const string CONST_SHAPE_PREFIX = "const_shape_";
-const string PADDING_PREFIX = "NN_PAD_";
const string PADDING_ATTR_NAME = "padding";
const string STRIDES_ATTR_NAME = "strides";
const string KSIZE_ATTR_NAME = "ksize";
-const string PADDING_VALID_STR = "VALID";
-const string PADDING_SAME_STR = "SAME";
-const string PADDING_NA = "NA";
const string NULL_OUTPUT_NAME = "NULL";
+const int PADDING_NA_ID = 0; // VALID = 1, SAME = 2
// This is a temporary workaround to support android build
// where std::string is not supported even with c++11 option.
@@ -413,7 +410,6 @@ void GraphTransferer::RegisterConstantNode(
VLOG(1) << "Register constant node: " << node.name();
CHECK(node_name_to_id_cache_map_.count(node.name()) == 1);
const int id = node_name_to_id_cache_map_[node.name()];
- const string data_name = DATA_NODE_PREFIX + ToString(id);
const int output_node_size = node.num_outputs();
CHECK(output_node_size == 1);
// TODO(satok): support multiple outputs?
@@ -448,7 +444,6 @@ void GraphTransferer::RegisterConstantNode(
ConstNodeTransferParams{node.name(),
id,
{{shape[0], shape[1], shape[2], shape[3]}},
- data_name,
data_size});
// TODO(satok): Remove. Determine constant value without dryrun
if (!output_tensor_map.empty() && data_size != 0) {
@@ -474,7 +469,7 @@ int GraphTransferer::RegisterConstantShape(const std::vector<int>& shape) {
const int id = node_name_cache_list_.size() - 1;
node_name_to_id_cache_map_.emplace(shape_name, id);
const_node_transfer_params_list_.emplace_back(ConstNodeTransferParams{
- shape_name, id, {{shape[0], shape[1], shape[2], shape[3]}}, "", 0});
+ shape_name, id, {{shape[0], shape[1], shape[2], shape[3]}}, 0});
}
return node_name_to_id_cache_map_[shape_name];
}
@@ -545,17 +540,17 @@ void GraphTransferer::RegisterNodeWithPaddingAndStrides(
const int ksize_id = RegisterConstantShape(kernel_sizes);
extra_inputs.insert(extra_inputs.begin(), ksize_id);
}
- const std::string padding_str =
- padding == VALID ? PADDING_VALID_STR : PADDING_SAME_STR;
const int op_type_id = ops_definitions.GetOpIdFor(node.type_string());
CHECK(op_type_id >= 0 && op_type_id < ops_definitions.GetTotalOpsCount())
<< "Op " << node.type_string() << " not found in map(id = " << op_type_id
<< ")";
- AppendNodeParamsWithIoParams(shape_refiner, output_tensor_map, node,
- node.name(), id, node.type_string(), op_type_id,
- padding_str, node.num_inputs(), extra_inputs,
- node.num_outputs(), true /* append_input */,
- true /* append_output */);
+ // Safety check of padding id
+ CHECK(padding == Padding::VALID ? 1 : 2);
+ AppendNodeParamsWithIoParams(
+ shape_refiner, output_tensor_map, node, node.name(), id,
+ node.type_string(), op_type_id, static_cast<int>(padding),
+ node.num_inputs(), extra_inputs, node.num_outputs(),
+ true /* append_input */, true /* append_output */);
}
void GraphTransferer::RegisterInputNode(
@@ -570,7 +565,7 @@ void GraphTransferer::RegisterInputNode(
CHECK(op_type_id >= 0 && op_type_id < ops_definitions.GetTotalOpsCount());
AppendNodeParamsWithIoParams(
shape_refiner, output_tensor_map, node, node.name(), id,
- node.type_string(), op_type_id, PADDING_NA, node.num_inputs(), {},
+ node.type_string(), op_type_id, PADDING_NA_ID, node.num_inputs(), {},
node.num_outputs(), true /* append_input */, true /* append_output */);
}
@@ -587,7 +582,7 @@ void GraphTransferer::RegisterOutputNode(
// TODO(satok): Set output for output node?
AppendNodeParamsWithIoParams(
shape_refiner, output_tensor_map, node, node.name(), id,
- node.type_string(), op_type_id, PADDING_NA, node.num_inputs(), {},
+ node.type_string(), op_type_id, PADDING_NA_ID, node.num_inputs(), {},
0 /* outputs_size */, true /* append_input */, false /* append_output */);
}
@@ -604,7 +599,7 @@ void GraphTransferer::RegisterFlattenNode(
AppendNodeParamsWithIoParams(
shape_refiner, output_tensor_map, node, node.name(), id,
- node.type_string(), op_type_id, PADDING_NA, node.num_inputs(), {},
+ node.type_string(), op_type_id, PADDING_NA_ID, node.num_inputs(), {},
node.num_outputs(), true /* append_input */, true /* append_output */);
}
@@ -620,7 +615,7 @@ void GraphTransferer::RegisterGenericNode(
AppendNodeParamsWithIoParams(
shape_refiner, output_tensor_map, node, node.name(), id,
- node.type_string(), op_type_id, PADDING_NA, node.num_inputs(), {},
+ node.type_string(), op_type_id, PADDING_NA_ID, node.num_inputs(), {},
node.num_outputs(), true /* append_input */, true /* append_output */);
}
@@ -644,18 +639,13 @@ Status GraphTransferer::RegisterNodeIfAllInputsAreCached(
// CAVEAT: Append inputs and outputs params accordingly
void GraphTransferer::AppendNodeParams(const string& name, const int id,
const string& type, const int type_id,
- const string& padding_str,
- const int inputs_size,
+ const int padding, const int inputs_size,
const std::vector<int>& extra_inputs,
const int outputs_size) {
VLOG(1) << "Append node params: " << name;
- // TODO(satok): store padding as Padding?
- const string output_name = OUTPUTS_NODE_PREFIX + ToString(id);
node_transfer_params_list_.emplace_back(
- NodeTransferParams{name, id, type, type_id, PADDING_PREFIX + padding_str,
- INPUTS_NODE_PREFIX + ToString(id),
+ NodeTransferParams{name, id, type, type_id, padding,
inputs_size + static_cast<int>(extra_inputs.size()),
- outputs_size <= 0 ? NULL_OUTPUT_NAME : output_name,
static_cast<int>(outputs_size)});
}
@@ -738,7 +728,7 @@ void GraphTransferer::AppendNodeOutputParams(
void GraphTransferer::AppendNodeParamsWithIoParams(
const ShapeRefiner& shape_refiner, const OutputTensorMap& output_tensor_map,
const Node& node, const string& name, const int id, const string& type,
- const int type_id, const string& padding_str, const int inputs_size,
+ const int type_id, const int padding, const int inputs_size,
const std::vector<int>& extra_inputs, const int outputs_size,
const bool append_input_params, const bool append_output_params) {
VLOG(1) << "Append node with io params: " << node.name();
@@ -748,8 +738,8 @@ void GraphTransferer::AppendNodeParamsWithIoParams(
if (append_output_params) {
AppendNodeOutputParams(shape_refiner, output_tensor_map, id, node);
}
- AppendNodeParams(name, id, type, type_id, padding_str, inputs_size,
- extra_inputs, outputs_size);
+ AppendNodeParams(name, id, type, type_id, padding, inputs_size, extra_inputs,
+ outputs_size);
}
/* static */ std::array<int64, GraphTransferer::SHAPE_ARRAY_SIZE>
@@ -808,6 +798,20 @@ GraphTransferer::ToTensorShapeArray(const TensorShape& shape) {
}
}
+/* static */ string GraphTransferer::ToPaddingDebugString(const int padding) {
+ switch (padding) {
+ case 0:
+ return "NN_PAD_NA";
+ case Padding::VALID:
+ return "NN_PAD_VALID";
+ case Padding::SAME:
+ return "NN_PAD_SAME";
+ default:
+ CHECK(false);
+ return "";
+ }
+}
+
/* static */ void GraphTransferer::CheckShape(
const OutputTensorMap& output_tensor_map, const string& node_name,
const std::array<int64, SHAPE_ARRAY_SIZE>& expected) {
@@ -903,7 +907,10 @@ void GraphTransferer::DumpNodeTransferParams() const {
LOG(INFO) << "[ " << params.node_id << " \"" << params.name << "\" (Const)";
LOG(INFO) << " shape: " << params.shape[0] << params.shape[1]
<< params.shape[2] << params.shape[3];
- LOG(INFO) << " data_name: " << params.data_name;
+ LOG(INFO) << " data_name: "
+ << (params.data_size <= 0
+ ? ""
+ : DATA_NODE_PREFIX + ToString(params.node_id));
LOG(INFO) << " data_size: " << params.data_size << " bytes"
<< " ]";
}
@@ -911,11 +918,14 @@ void GraphTransferer::DumpNodeTransferParams() const {
LOG(INFO) << "*** Op Nodes ***";
for (const NodeTransferParams& params : node_transfer_params_list_) {
LOG(INFO) << "[ " << params.node_id << " \"" << params.name;
- LOG(INFO) << " type: " << params.type;
- LOG(INFO) << " padding: " << params.padding;
- LOG(INFO) << " inputs: " << params.inputs_name
+ LOG(INFO) << " type: " << params.type_name;
+ LOG(INFO) << " padding: " << ToPaddingDebugString(params.padding);
+ LOG(INFO) << " inputs: " << INPUTS_NODE_PREFIX + ToString(params.node_id)
<< ", size = " << params.inputs_size;
- LOG(INFO) << " outputs: " << params.outputs_name
+ LOG(INFO) << " outputs: "
+ << (params.outputs_size <= 0
+ ? NULL_OUTPUT_NAME
+ : (OUTPUTS_NODE_PREFIX + ToString(params.node_id)))
<< ", size = " << params.outputs_size << " ]";
}
LOG(INFO) << "******\n";
@@ -946,8 +956,10 @@ void GraphTransferer::DumpVerificationStringOfNodeTransferParams() const {
sstream << "---(CONST) [" << std::hex << params.node_id << std::dec << ","
<< params.shape[0] << "," << params.shape[1] << ","
<< params.shape[2] << "," << params.shape[3] << ","
- << params.data_name << "," << params.data_size << "," << params.name
- << "]";
+ << (params.data_size <= 0
+ ? ""
+ : DATA_NODE_PREFIX + ToString(params.node_id))
+ << "," << params.data_size << "," << params.name << "]";
LOG(INFO) << sstream.str();
}
LOG(INFO) << "Const node count = " << const_node_transfer_params_list_.size();
@@ -955,9 +967,13 @@ void GraphTransferer::DumpVerificationStringOfNodeTransferParams() const {
std::stringstream sstream;
sstream << "---(OP) [" << params.name.c_str() << "," << std::hex
<< params.node_id << std::dec << "," << params.soc_op_id << ","
- << params.padding << "," << params.inputs_name << ","
- << params.inputs_size << "," << params.outputs_name << ","
- << params.outputs_size << "," << params.type << "]";
+ << ToPaddingDebugString(params.padding) << ","
+ << INPUTS_NODE_PREFIX + ToString(params.node_id) << ","
+ << params.inputs_size << ","
+ << (params.outputs_size <= 0
+ ? NULL_OUTPUT_NAME
+ : (OUTPUTS_NODE_PREFIX + ToString(params.node_id)))
+ << "," << params.outputs_size << "," << params.type_name << "]";
LOG(INFO) << sstream.str();
}
LOG(INFO) << "Op node count = " << node_transfer_params_list_.size();
diff --git a/tensorflow/core/kernels/hexagon/graph_transferer.h b/tensorflow/core/kernels/hexagon/graph_transferer.h
index 7bc6293be8..d86452905f 100644
--- a/tensorflow/core/kernels/hexagon/graph_transferer.h
+++ b/tensorflow/core/kernels/hexagon/graph_transferer.h
@@ -52,21 +52,18 @@ class GraphTransferer {
struct NodeTransferParams {
string name;
int node_id;
- string type; // for debug info
+ string type_name;
int soc_op_id;
- string padding;
- string inputs_name; // for debug info TODO(satok): remove
+ int padding;
int inputs_size;
- string outputs_name; // for debug info TODO(satok): remove
int outputs_size;
};
// Const node parameters for transfer
struct ConstNodeTransferParams {
- string name; // for debug info
+ string name;
int node_id;
std::array<int64, MAX_SUPPORTED_RANK> shape;
- string data_name; // for debug info TODO(satok): remove
int data_size;
std::vector<uint8> data;
};
@@ -215,7 +212,7 @@ class GraphTransferer {
const OutputTensorMap& output_tensor_map);
void AppendNodeParams(const string& name, const int id, const string& type,
- const int type_id, const string& padding_str,
+ const int type_id, const int padding,
const int inputs_size,
const std::vector<int>& extra_inputs,
const int outputs_size);
@@ -235,13 +232,15 @@ class GraphTransferer {
const ShapeRefiner& shape_refiner,
const OutputTensorMap& output_tensor_map, const Node& node,
const string& name, const int id, const string& type, const int type_id,
- const string& padding_str, const int inputs_size,
+ const int padding, const int inputs_size,
const std::vector<int>& extra_inputs, const int outputs_size,
const bool append_input_params, const bool append_output_params);
static std::array<int64, SHAPE_ARRAY_SIZE> ToTensorShapeArray(
const TensorShape& shape);
+ static string ToPaddingDebugString(int padding);
+
static void CheckShape(const OutputTensorMap& output_tensor_map,
const string& node_name,
const std::array<int64, SHAPE_ARRAY_SIZE>& actual);
diff --git a/tensorflow/core/kernels/hexagon/graph_transferer_test.cc b/tensorflow/core/kernels/hexagon/graph_transferer_test.cc
index b9a4c8aff0..92b58083b9 100644
--- a/tensorflow/core/kernels/hexagon/graph_transferer_test.cc
+++ b/tensorflow/core/kernels/hexagon/graph_transferer_test.cc
@@ -69,10 +69,9 @@ class TestGraphTransferOpsDefinitions : public IGraphTransferOpsDefinitions {
static GraphDef CreateAddGraphDef() {
Scope root = Scope::NewRootScope();
- ops::Output node_a = ops::Const(root.WithOpName(NAME_A), NODE_A_VAL);
- ops::Output node_b = ops::Const(root.WithOpName(NAME_B), NODE_B_VAL);
- ops::Output node_add =
- ops::Add(root.WithOpName(NAME_A_PLUS_B), node_a, node_b);
+ Output node_a = ops::Const(root.WithOpName(NAME_A), NODE_A_VAL);
+ Output node_b = ops::Const(root.WithOpName(NAME_B), NODE_B_VAL);
+ Output node_add = ops::Add(root.WithOpName(NAME_A_PLUS_B), node_a, node_b);
GraphDef def;
TF_CHECK_OK(root.ToGraphDef(&def));
return def;
@@ -82,16 +81,16 @@ static GraphDef CreateConvGraphDef() {
Scope root = Scope::NewRootScope();
Tensor input_data(DT_FLOAT, TensorShape({1, 1, 1, 1}));
test::FillIota<float>(&input_data, 1.0f);
- ops::Output input =
- ops::Const(root.WithOpName("input"), ops::Input::Initializer(input_data));
+ Output input =
+ ops::Const(root.WithOpName("input"), Input::Initializer(input_data));
Tensor filter_data(DT_FLOAT, TensorShape({1, 1, 1, 1}));
test::FillIota<float>(&filter_data, 1.0f);
- ops::Output filter = ops::Const(root.WithOpName("filter"),
- ops::Input::Initializer(filter_data));
+ Output filter =
+ ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data));
const std::vector<int> strides{1, 1, 1, 1};
- ops::Output conv =
+ Output conv =
ops::Conv2D(root.WithOpName("conv"), input, filter, strides, "SAME");
- ops::Output softmax = ops::Softmax(root.WithOpName("softmax"), conv);
+ Output softmax = ops::Softmax(root.WithOpName("softmax"), conv);
GraphDef def;
TF_CHECK_OK(root.ToGraphDef(&def));
return def;
@@ -101,18 +100,18 @@ static GraphDef CreatePoolGraphDef() {
Scope root = Scope::NewRootScope();
Tensor input_data(DT_FLOAT, TensorShape({1, 1, 1, 1}));
test::FillIota<float>(&input_data, 1.0f);
- ops::Output input =
- ops::Const(root.WithOpName("input"), ops::Input::Initializer(input_data));
+ Output input =
+ ops::Const(root.WithOpName("input"), Input::Initializer(input_data));
Tensor filter_data(DT_FLOAT, TensorShape({1, 1, 1, 1}));
test::FillIota<float>(&filter_data, 1.0f);
- ops::Output filter = ops::Const(root.WithOpName("filter"),
- ops::Input::Initializer(filter_data));
+ Output filter =
+ ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data));
const std::vector<int> ksize{1, 1, 1, 1};
const std::vector<int> padding{0, 0, 0, 0};
const std::vector<int> strides{1, 1, 1, 1};
- ops::Output max_pool =
+ Output max_pool =
ops::MaxPool(root.WithOpName("maxpool"), input, ksize, strides, "SAME");
- ops::Output softmax = ops::Softmax(root.WithOpName("softmax"), max_pool);
+ Output softmax = ops::Softmax(root.WithOpName("softmax"), max_pool);
GraphDef def;
TF_CHECK_OK(root.ToGraphDef(&def));
return def;
@@ -352,10 +351,10 @@ TEST_F(GraphTransfererTest, LoadConvGraph) {
ASSERT_TRUE(params_conv != nullptr);
const int id = params_conv->node_id;
EXPECT_GE(id, 0);
- EXPECT_EQ("Conv2D", params_conv->type);
+ EXPECT_EQ("Conv2D", params_conv->type_name);
EXPECT_EQ(3, params_conv->inputs_size);
EXPECT_EQ(1, params_conv->outputs_size);
- EXPECT_EQ("NN_PAD_SAME", params_conv->padding);
+ EXPECT_EQ(Padding::SAME, params_conv->padding);
}
TEST_F(GraphTransfererTest, LoadMaxPoolGraph) {
@@ -378,10 +377,10 @@ TEST_F(GraphTransfererTest, LoadMaxPoolGraph) {
ASSERT_TRUE(params_max_pool != nullptr);
const int id = params_max_pool->node_id;
EXPECT_GE(id, 0);
- EXPECT_EQ("MaxPool", params_max_pool->type);
+ EXPECT_EQ("MaxPool", params_max_pool->type_name);
EXPECT_EQ(3, params_max_pool->inputs_size);
EXPECT_EQ(1, params_max_pool->outputs_size);
- EXPECT_EQ("NN_PAD_SAME", params_max_pool->padding);
+ EXPECT_EQ(Padding::SAME, params_max_pool->padding);
}
TEST(HexagonOpsDefinitions, CheckOpsDefinitions) {
diff --git a/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.cc b/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.cc
index ecebd3c599..ca29fcdd47 100644
--- a/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.cc
+++ b/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.cc
@@ -15,12 +15,9 @@ limitations under the License.
#include "tensorflow/core/kernels/hexagon/hexagon_control_wrapper.h"
-#include <queue>
-
#ifdef USE_HEXAGON_LIBS
#include "tensorflow/core/platform/hexagon/soc_interface.h"
#include "tensorflow/core/platform/profile_utils/cpu_utils.h"
-#include "tensorflow/core/platform/types.h"
#endif
namespace tensorflow {
@@ -28,7 +25,6 @@ namespace tensorflow {
const bool SHOW_DBG_IN_SOC = false;
const bool DBG_USE_DUMMY_INPUT = false;
const bool DBG_USE_SAMPLE_INPUT = false;
-const bool DBG_SHOW_RESULT = false;
const int64 FLAG_ENABLE_PANDA_BINARY_INPUT = 0x01;
#ifdef USE_HEXAGON_LIBS
@@ -145,18 +141,15 @@ bool HexagonControlWrapper::SetupGraph(
output_count = std::get<1>(output_ptr_and_count);
CHECK(output_count > 0);
}
-
- // TODO(satok): Do not use string. Use enum instead.
- const string padding = params.padding;
int padding_id = -1;
- if (padding == "NN_PAD_NA") {
+ if (params.padding == 0) {
padding_id = 0;
- } else if (padding == "NN_PAD_SAME") {
+ } else if (params.padding == Padding::SAME) {
padding_id = 1;
- } else if (padding == "NN_PAD_VALID") {
+ } else if (params.padding == Padding::VALID) {
padding_id = 2;
} else {
- CHECK(false) << "Unsupported padding " << padding;
+ CHECK(false);
}
soc_interface_AppendNode(params.name.c_str(), node_id + NODE_ID_OFFSET,
op_id, padding_id, input_ptr, input_count,
@@ -213,12 +206,6 @@ bool HexagonControlWrapper::ReadOutputNode(
// TODO: Accept all results
std::get<2>(output) = DT_FLOAT;
outputs->emplace_back(output);
- if (DBG_SHOW_RESULT) {
- const int byte_size = std::get<1>(output);
- const int element_count = byte_size / sizeof(float);
- const float* float_array = reinterpret_cast<float*>(std::get<0>(output));
- DumpTopNFloatResults(float_array, element_count, 10 /* top_n */);
- }
return true;
}
@@ -240,19 +227,4 @@ bool HexagonControlWrapper::ReadOutputNode(const string,
}
#endif
-void HexagonControlWrapper::DumpTopNFloatResults(const float* data,
- const float element_count,
- const int top_n) {
- std::priority_queue<std::tuple<float, int>> queue;
- for (int i = 0; i < element_count; ++i) {
- queue.emplace(data[i], i);
- }
- LOG(INFO) << "=== Dump ranking ===";
- for (int i = 0; i < top_n; ++i) {
- const std::tuple<float, int>& entry = queue.top();
- LOG(INFO) << i << ": " << std::get<1>(entry) << ", " << std::get<0>(entry);
- queue.pop();
- }
-}
-
} // namespace tensorflow
diff --git a/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.h b/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.h
index dfae5aa5e2..0ba0b323cb 100644
--- a/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.h
+++ b/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.h
@@ -46,9 +46,6 @@ class HexagonControlWrapper final : public ISocControlWrapper {
// CAVEAT: Need offset as HVX library reserves some ids
static constexpr int NODE_ID_OFFSET = 0x10000;
- void DumpTopNFloatResults(const float *data, const float element_count,
- const int top_n);
-
// Dummy float array for input node.
// TODO(satok): Use actual data passed by FillInputNode and remove
std::vector<float> dummy_input_float_;
diff --git a/tensorflow/core/kernels/hexagon/hexagon_graph_execution_test.cc b/tensorflow/core/kernels/hexagon/hexagon_graph_execution_test.cc
index d06fb5fabc..81e49bd147 100644
--- a/tensorflow/core/kernels/hexagon/hexagon_graph_execution_test.cc
+++ b/tensorflow/core/kernels/hexagon/hexagon_graph_execution_test.cc
@@ -17,10 +17,15 @@ limitations under the License.
// -o /tmp/tensorflow_inception_v3_stripped_optimized_quantized.pb
// adb push /tmp/tensorflow_inception_v3_stripped_optimized_quantized.pb \
// /data/local/tmp
+// $ curl
+// https://storage.googleapis.com/download.tensorflow.org/models/imagenet_comp_graph_label_strings.txt
+// -o /tmp/imagenet_comp_graph_label_strings.txt
+// adb push /tmp/imagenet_comp_graph_label_strings.txt /data/local/tmp
#include <memory>
#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/kernels/hexagon/graph_transfer_utils.h"
#include "tensorflow/core/kernels/hexagon/graph_transferer.h"
#include "tensorflow/core/kernels/hexagon/hexagon_control_wrapper.h"
#include "tensorflow/core/kernels/hexagon/hexagon_ops_definitions.h"
@@ -29,7 +34,9 @@ limitations under the License.
#include "tensorflow/core/lib/core/casts.h"
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/lib/strings/str_util.h"
#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/profile_utils/clock_cycle_profiler.h"
#include "tensorflow/core/platform/test.h"
namespace tensorflow {
@@ -40,6 +47,43 @@ const bool DBG_DUMP_FLOAT_DATA = false;
const int WIDTH = 299;
const int HEIGHT = 299;
const int DEPTH = 3;
+const int EXPECTED_FIRST_RESULT_ID = 59;
+const int EXECUTION_REPEAT_COUNT = 3;
+
+static void DumpTop10Results(
+ const std::vector<ISocControlWrapper::ByteArray>& outputs) {
+ CHECK(outputs.size() == 1);
+ const int byte_size = std::get<1>(outputs.at(0));
+ const int element_count = byte_size / sizeof(float);
+ const float* float_array =
+ reinterpret_cast<float*>(std::get<0>(outputs.at(0)));
+ const string label_filename =
+ "/data/local/tmp/imagenet_comp_graph_label_strings.txt";
+ string label_str;
+ TF_CHECK_OK(ReadFileToString(Env::Default(), label_filename, &label_str));
+ std::vector<string> labels = str_util::Split(label_str, '\n');
+ GraphTransferUtils::DumpTopNFloatResults(
+ float_array, labels.data(),
+ std::min(element_count, static_cast<int>(labels.size())),
+ 10 /* show top_n results */);
+}
+
+static void CheckFirstResult(
+ const std::vector<ISocControlWrapper::ByteArray>& outputs,
+ const int expected_first_id) {
+ EXPECT_GE(outputs.size(), 1);
+ const int byte_size = std::get<1>(outputs.at(0));
+ const int element_count = byte_size / sizeof(float);
+ const float* float_array =
+ reinterpret_cast<float*>(std::get<0>(outputs.at(0)));
+ EXPECT_GE(element_count, 1);
+ std::vector<string> labels(element_count);
+ std::priority_queue<std::tuple<float, int, string>> queue =
+ GraphTransferUtils::GetTopNFloatResults(float_array, labels.data(),
+ element_count);
+ const std::tuple<float, int, string>& entry = queue.top();
+ EXPECT_EQ(expected_first_id, std::get<1>(entry));
+}
// CAVEAT: This test only runs when you specify hexagon library using
// makefile.
@@ -77,12 +121,17 @@ TEST(GraphTransferer, RunInceptionV3OnHexagonExample) {
const int fsize = bmp.size();
LOG(INFO) << "Read " << image_filename << ", size = " << fsize << "bytes";
const int64 pixel_count = WIDTH * HEIGHT * DEPTH;
+ CHECK(fsize >= 22 /* pos of height */ + sizeof(int));
+ CHECK(bmp.data() != nullptr);
uint8* const img_bytes = bit_cast<uint8*>(bmp.data());
const int header_size = *(reinterpret_cast<int*>(img_bytes + 10));
+ LOG(INFO) << "header size = " << header_size;
const int size = *(reinterpret_cast<int*>(img_bytes + 14));
+ LOG(INFO) << "image size = " << size;
const int width = *(reinterpret_cast<int*>(img_bytes + 18));
+ LOG(INFO) << "width = " << width;
const int height = *(reinterpret_cast<int*>(img_bytes + 22));
- LOG(INFO) << header_size << ", " << size << ", " << width << ", " << height;
+ LOG(INFO) << "height = " << height;
CHECK(fsize >= (WIDTH + 1) * WIDTH * 3 + header_size);
uint8* const bmp_pixels = &img_bytes[header_size];
@@ -129,12 +178,23 @@ TEST(GraphTransferer, RunInceptionV3OnHexagonExample) {
hexagon_control_wrapper.FillInputNode("Mul", ba);
// 4. Execute graph
- hexagon_control_wrapper.ExecuteGraph();
+ profile_utils::CpuUtils::EnableClockCycleProfiling(true);
+ ClockCycleProfiler prof;
+ for (int i = 0; i < EXECUTION_REPEAT_COUNT; ++i) {
+ prof.Start();
+ hexagon_control_wrapper.ExecuteGraph();
+ prof.Stop();
+ }
- // 5. Read output node's outputs
+ // 5-1. Read output node's outputs
std::vector<ISocControlWrapper::ByteArray> outputs;
hexagon_control_wrapper.ReadOutputNode("softmax", &outputs);
+ // 5-2. Dump results
+ DumpTop10Results(outputs);
+ CheckFirstResult(outputs, EXPECTED_FIRST_RESULT_ID);
+ prof.DumpStatistics("Graph Execution");
+
// 6. Teardown graph in hexagon
hexagon_control_wrapper.TeardownGraph();
diff --git a/tensorflow/core/kernels/image_resizer_state.h b/tensorflow/core/kernels/image_resizer_state.h
index 8870937422..33383d16a8 100644
--- a/tensorflow/core/kernels/image_resizer_state.h
+++ b/tensorflow/core/kernels/image_resizer_state.h
@@ -90,6 +90,18 @@ struct ImageResizerState {
errors::InvalidArgument("input image must be of non-zero size"));
height_scale = CalculateResizeScale(in_height, out_height, align_corners_);
width_scale = CalculateResizeScale(in_width, out_width, align_corners_);
+
+ // Guard against overflows
+ OP_REQUIRES(context,
+ ceilf((out_height - 1) * height_scale) <=
+ static_cast<float>(std::numeric_limits<int64>::max()),
+ errors::InvalidArgument(
+ "input image height scale would cause an overflow"));
+ OP_REQUIRES(
+ context,
+ ceilf((out_width - 1) * width_scale) <= static_cast<float>(INT_MAX),
+ errors::InvalidArgument(
+ "input image width scale would cause an overflow"));
}
// Calculates all the required variables, and allocates the output.
diff --git a/tensorflow/core/kernels/inplace_ops.cc b/tensorflow/core/kernels/inplace_ops.cc
index 5f1f5b652c..b44f2f5465 100644
--- a/tensorflow/core/kernels/inplace_ops.cc
+++ b/tensorflow/core/kernels/inplace_ops.cc
@@ -29,39 +29,24 @@ typedef Eigen::ThreadPoolDevice CPUDevice;
namespace functor {
template <typename T>
-Status DoInplaceUpdate(const CPUDevice& d, InplaceOpType op,
- const Tensor& value, const Tensor& loc, Tensor* output) {
- auto Tloc = loc.flat<int64>();
+Status DoParallelConcatUpdate(const CPUDevice& d, const Tensor& value,
+ int32 loc, Tensor* output) {
auto Tvalue = value.flat_outer_dims<T>();
auto Toutput = output->flat_outer_dims<T>();
auto nrows = Toutput.dimension(0);
- for (int64 j = 0; j < Tloc.size(); ++j) {
- auto r = (Tloc(j) % nrows + nrows) % nrows; // Guard index range.
- switch (op) {
- case I_UPDATE:
- Toutput.template chip<0>(r).device(d) = Tvalue.template chip<0>(j);
- break;
- case I_ADD:
- Toutput.template chip<0>(r).device(d) += Tvalue.template chip<0>(j);
- break;
- case I_SUB:
- Toutput.template chip<0>(r).device(d) -= Tvalue.template chip<0>(j);
- break;
- default:
- return errors::InvalidArgument("Unsupported inplace operation", op);
- }
- }
+ auto r = (loc % nrows + nrows) % nrows; // Guard index range.
+ Toutput.template chip<0>(r).device(d) = Tvalue.template chip<0>(0);
return Status::OK();
}
template <>
-Status DoInplace(const CPUDevice& d, InplaceOpType op, const Tensor& value,
- const Tensor& loc, Tensor* output) {
+Status DoParallelConcat(const CPUDevice& d, const Tensor& value, int32 loc,
+ Tensor* output) {
CHECK_EQ(value.dtype(), output->dtype());
switch (value.dtype()) {
#define CASE(type) \
case DataTypeToEnum<type>::value: \
- return DoInplaceUpdate<type>(d, op, value, loc, output);
+ return DoParallelConcatUpdate<type>(d, value, loc, output);
TF_CALL_NUMBER_TYPES(CASE);
#undef CASE
default:
@@ -73,19 +58,17 @@ Status DoInplace(const CPUDevice& d, InplaceOpType op, const Tensor& value,
namespace {
-// TODO(apassos): validate the shapes better.
-class InplaceOpBase : public OpKernel {
+template <typename Device>
+class ParallelConcatUpdate : public OpKernel {
public:
- explicit InplaceOpBase(OpKernelConstruction* ctx) : OpKernel(ctx) {}
+ explicit ParallelConcatUpdate(OpKernelConstruction* ctx) : OpKernel(ctx) {
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("loc", &loc_));
+ }
void Compute(OpKernelContext* ctx) override {
auto value = ctx->input(0);
- auto loc = ctx->input(1);
- auto update = ctx->input(2);
+ auto update = ctx->input(1);
- OP_REQUIRES(ctx, TensorShapeUtils::IsVector(loc.shape()),
- errors::InvalidArgument("loc must be a vector. ",
- loc.shape().DebugString()));
OP_REQUIRES(
ctx, value.dims() == update.dims(),
errors::InvalidArgument("value and update shape doesn't match: ",
@@ -98,67 +81,39 @@ class InplaceOpBase : public OpKernel {
value.shape().DebugString(), " vs. ",
update.shape().DebugString()));
}
- OP_REQUIRES(ctx, loc.dim_size(0) == update.dim_size(0),
- errors::InvalidArgument("loc and update shape doesn't match: ",
- loc.shape().DebugString(), " vs. ",
+ OP_REQUIRES(ctx, 1 == update.dim_size(0),
+ errors::InvalidArgument("update shape doesn't match: ",
update.shape().DebugString()));
Tensor output = value; // This creates an alias intentionally.
- OP_REQUIRES_OK(ctx, DoCompute(ctx, update, loc, &output));
+ const auto& d = ctx->eigen_device<Device>();
+ OP_REQUIRES_OK(
+ ctx, ::tensorflow::functor::DoParallelConcat(d, update, loc_, &output));
ctx->set_output(0, output);
}
- protected:
- virtual Status DoCompute(OpKernelContext* ctx, const Tensor& value,
- const Tensor& loc, Tensor* output) = 0;
-};
-
-template <typename Device, functor::InplaceOpType op>
-class InplaceOp : public InplaceOpBase {
- public:
- explicit InplaceOp(OpKernelConstruction* ctx) : InplaceOpBase(ctx) {}
-
- protected:
- Status DoCompute(OpKernelContext* ctx, const Tensor& value, const Tensor& loc,
- Tensor* output) override {
- const auto& d = ctx->eigen_device<Device>();
- return ::tensorflow::functor::DoInplace(d, op, value, loc, output);
- }
+ private:
+ int32 loc_;
};
template <typename Device, typename T>
-class EmptyOp : public OpKernel {
+class ParallelConcatStart : public OpKernel {
public:
- explicit EmptyOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
- OP_REQUIRES_OK(ctx, ctx->GetAttr("init", &init_));
+ explicit ParallelConcatStart(OpKernelConstruction* ctx) : OpKernel(ctx) {
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("shape", &shape_));
}
void Compute(OpKernelContext* ctx) override {
- const Tensor& shape = ctx->input(0);
- OP_REQUIRES(
- ctx, TensorShapeUtils::IsVector(shape.shape()),
- errors::InvalidArgument("shape must be a vector of int32, got shape ",
- shape.shape().DebugString()));
- auto dims = shape.flat<int32>();
- TensorShape out_shape;
- OP_REQUIRES_OK(ctx, TensorShapeUtils::MakeShape(
- reinterpret_cast<const int32*>(dims.data()),
- dims.size(), &out_shape));
Tensor* out = nullptr;
// We do not know whether the output will be used on GPU. Setting it to be
// gpu-compatible for now.
AllocatorAttributes attr;
attr.set_gpu_compatible(true);
- OP_REQUIRES_OK(ctx, ctx->allocate_output(0, out_shape, &out, attr));
-
- if (init_) {
- functor::SetZeroFunctor<Device, T>()(ctx->eigen_device<Device>(),
- out->flat<T>());
- }
+ OP_REQUIRES_OK(ctx, ctx->allocate_output(0, shape_, &out, attr));
}
private:
- bool init_;
+ TensorShape shape_;
};
class FailureKernel : public OpKernel {
@@ -176,16 +131,15 @@ class FailureKernel : public OpKernel {
REGISTER_KERNEL_BUILDER(Name("_ParallelConcatUpdate") \
.Device(DEVICE_CPU) \
.TypeConstraint<type>("T"), \
- InplaceOp<CPUDevice, functor::I_UPDATE>);
+ ParallelConcatUpdate<CPUDevice>);
TF_CALL_NUMBER_TYPES(REGISTER)
#undef REGISTER
#define REGISTER_EMPTY(type) \
REGISTER_KERNEL_BUILDER(Name("_ParallelConcatStart") \
.Device(DEVICE_CPU) \
- .HostMemory("shape") \
.TypeConstraint<type>("dtype"), \
- EmptyOp<CPUDevice, type>)
+ ParallelConcatStart<CPUDevice, type>)
TF_CALL_POD_STRING_TYPES(REGISTER_EMPTY)
#undef REGISTER_EMPTY
@@ -204,9 +158,8 @@ typedef Eigen::GpuDevice GPUDevice;
#define REGISTER_EMPTY(type) \
REGISTER_KERNEL_BUILDER(Name("_ParallelConcatStart") \
.Device(DEVICE_GPU) \
- .HostMemory("shape") \
.TypeConstraint<type>("dtype"), \
- EmptyOp<GPUDevice, type>);
+ ParallelConcatStart<GPUDevice, type>);
TF_CALL_GPU_NUMBER_TYPES(REGISTER_EMPTY)
#undef REGISTER_EMPTY
@@ -221,7 +174,7 @@ TF_CALL_GPU_NUMBER_TYPES(REGISTER_PARALLEL_CONCAT);
REGISTER_KERNEL_BUILDER(Name("_ParallelConcatUpdate") \
.Device(DEVICE_GPU) \
.TypeConstraint<type>("T"), \
- InplaceOp<GPUDevice, functor::I_UPDATE>);
+ ParallelConcatUpdate<GPUDevice>);
TF_CALL_GPU_NUMBER_TYPES(REGISTER)
#undef REGISTER
@@ -231,11 +184,10 @@ TF_CALL_GPU_NUMBER_TYPES(REGISTER)
REGISTER_KERNEL_BUILDER(Name("_ParallelConcatUpdate")
.Device(DEVICE_GPU)
.HostMemory("value")
- .HostMemory("loc")
.HostMemory("update")
.HostMemory("output")
.TypeConstraint<int32>("T"),
- InplaceOp<CPUDevice, functor::I_UPDATE>);
+ ParallelConcatUpdate<CPUDevice>);
#endif
} // end namespace
diff --git a/tensorflow/core/kernels/inplace_ops_functor.h b/tensorflow/core/kernels/inplace_ops_functor.h
index 6cb15eda91..53529f5165 100644
--- a/tensorflow/core/kernels/inplace_ops_functor.h
+++ b/tensorflow/core/kernels/inplace_ops_functor.h
@@ -22,19 +22,9 @@ limitations under the License.
namespace tensorflow {
namespace functor {
-// Inplace update/add/sub values in 'y'. It computes
-// y[i, :] = v if op is I_UPDATE
-// y[i, :] += v if op is I_ADD
-// y[i, :] -= v if op is I_SUB
-enum InplaceOpType {
- I_UPDATE, // x = y
- I_ADD, // x += y
- I_SUB, // x -= y
-};
-
template <typename Device>
-Status DoInplace(const Device& device, InplaceOpType op, const Tensor& value,
- const Tensor& loc, Tensor* output);
+Status DoParallelConcat(const Device& device, const Tensor& value, int32 loc,
+ Tensor* output);
} // end namespace functor
} // end namespace tensorflow
diff --git a/tensorflow/core/kernels/inplace_ops_functor_gpu.cu.cc b/tensorflow/core/kernels/inplace_ops_functor_gpu.cu.cc
index 8e70f4575d..8467360435 100644
--- a/tensorflow/core/kernels/inplace_ops_functor_gpu.cu.cc
+++ b/tensorflow/core/kernels/inplace_ops_functor_gpu.cu.cc
@@ -26,72 +26,43 @@ namespace functor {
typedef Eigen::GpuDevice Device;
-template <typename T, InplaceOpType op>
-__global__ void DoInplaceOpKernel(int nthreads, const int64 rows,
- const int64 cols, const int64 n, const T* src,
- const int64* rowids, T* dst) {
+template <typename T>
+__global__ void DoParallelConcatOpKernel(int nthreads, const int64 rows,
+ const int64 cols, int32 loc,
+ const T* src, T* dst) {
CUDA_1D_KERNEL_LOOP(idx, nthreads) {
- int64 r = idx / cols;
int64 c = idx % cols;
- r = (rowids[r] % rows + rows) % rows; // Guard index range.
+ int64 r = (loc % rows + rows) % rows; // Guard index range.
T* p = dst + r * cols + c;
const T* q = src + idx;
- switch (op) {
- case I_UPDATE:
- *p = ldg(q);
- break;
- case I_ADD:
- *p += ldg(q);
- break;
- case I_SUB:
- *p -= ldg(q);
- break;
- }
+ *p = ldg(q);
}
}
template <typename T>
-Status DoInplaceUpdate(const Device& d, InplaceOpType op, const Tensor& value,
- const Tensor& loc, Tensor* output) {
+Status DoParallelConcatUpdate(const Device& d, const Tensor& value, int32 loc,
+ Tensor* output) {
const int64 nelem = value.NumElements();
CudaLaunchConfig cfg = GetCudaLaunchConfig(nelem, d);
auto Toutput = output->flat_outer_dims<T>();
const int64 nrows = Toutput.dimension(0);
const int64 ncols = Toutput.dimension(1);
- const int64 n = loc.NumElements();
const T* src = value.flat<T>().data();
- const int64* rowids = loc.flat<int64>().data();
T* dst = output->flat<T>().data();
- switch (op) {
- case I_UPDATE:
- DoInplaceOpKernel<T, I_UPDATE>
- <<<cfg.block_count, cfg.thread_per_block, 0, d.stream()>>>(
- cfg.virtual_thread_count, nrows, ncols, n, src, rowids, dst);
- break;
- case I_ADD:
- DoInplaceOpKernel<T, I_ADD>
- <<<cfg.block_count, cfg.thread_per_block, 0, d.stream()>>>(
- cfg.virtual_thread_count, nrows, ncols, n, src, rowids, dst);
- break;
- case I_SUB:
- DoInplaceOpKernel<T, I_SUB>
- <<<cfg.block_count, cfg.thread_per_block, 0, d.stream()>>>(
- cfg.virtual_thread_count, nrows, ncols, n, src, rowids, dst);
- break;
- default:
- return errors::InvalidArgument("Unsupported operation type", op);
- }
+ DoParallelConcatOpKernel<T>
+ <<<cfg.block_count, cfg.thread_per_block, 0, d.stream()>>>(
+ cfg.virtual_thread_count, nrows, ncols, loc, src, dst);
return Status::OK();
}
template <>
-Status DoInplace(const Device& d, InplaceOpType op, const Tensor& value,
- const Tensor& loc, Tensor* output) {
+Status DoParallelConcat(const Device& d, const Tensor& value, int32 loc,
+ Tensor* output) {
CHECK_EQ(value.dtype(), output->dtype());
switch (value.dtype()) {
-#define CASE(type) \
- case DataTypeToEnum<type>::value: \
- return DoInplaceUpdate<type>(d, op, value, loc, output); \
+#define CASE(type) \
+ case DataTypeToEnum<type>::value: \
+ return DoParallelConcatUpdate<type>(d, value, loc, output); \
break;
CASE(float)
diff --git a/tensorflow/core/kernels/record_input_op.cc b/tensorflow/core/kernels/record_input_op.cc
new file mode 100644
index 0000000000..878996c9d6
--- /dev/null
+++ b/tensorflow/core/kernels/record_input_op.cc
@@ -0,0 +1,67 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/resource_mgr.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/kernels/record_yielder.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/env.h"
+
+namespace tensorflow {
+
+class RecordInputOp : public OpKernel {
+ public:
+ explicit RecordInputOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
+#define GETATTR(TYPE, FIELD) \
+ TYPE FIELD; \
+ OP_REQUIRES_OK(ctx, ctx->GetAttr(#FIELD, &FIELD));
+
+ GETATTR(string, file_pattern);
+ GETATTR(int64, file_random_seed);
+ GETATTR(float, file_shuffle_shift_ratio);
+ GETATTR(int64, file_buffer_size);
+ GETATTR(int64, file_parallelism);
+ GETATTR(int64, batch_size);
+#undef GETATTR
+
+ RecordYielder::Options yopts;
+ yopts.file_pattern = file_pattern;
+ yopts.seed = file_random_seed;
+ yopts.bufsize = file_buffer_size;
+ yopts.file_shuffle_shift_ratio = file_shuffle_shift_ratio;
+ yopts.parallelism = file_parallelism;
+ yielder_ = std::unique_ptr<RecordYielder>(new RecordYielder(ctx, yopts));
+
+ batch_size_ = batch_size;
+ }
+
+ void Compute(OpKernelContext* ctx) override {
+ Tensor out(DT_STRING, {batch_size_});
+ auto t_out = out.flat<string>();
+ for (int i = 0; i < batch_size_; ++i) {
+ OP_REQUIRES_OK(ctx, yielder_->YieldOne(&t_out(i)));
+ }
+ ctx->set_output(0, out);
+ }
+
+ private:
+ int64 batch_size_;
+ std::unique_ptr<RecordYielder> yielder_;
+};
+
+REGISTER_KERNEL_BUILDER(Name("RecordInput").Device(DEVICE_CPU), RecordInputOp);
+} // namespace tensorflow
diff --git a/tensorflow/core/kernels/record_yielder.cc b/tensorflow/core/kernels/record_yielder.cc
new file mode 100644
index 0000000000..e391752289
--- /dev/null
+++ b/tensorflow/core/kernels/record_yielder.cc
@@ -0,0 +1,216 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/kernels/record_yielder.h"
+
+#include "tensorflow/core/lib/io/record_reader.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/platform/env.h"
+
+namespace tensorflow {
+
+RecordYielder::RecordYielder(OpKernelConstruction* context,
+ const RecordYielder::Options& opts)
+ : opts_(opts),
+ thread_(new thread::ThreadPool(context->env(), "record_yielder",
+ 1 + opts.parallelism)),
+ epoch_(0),
+ rnd_(opts.seed) {
+ thread_->Schedule([this]() { MainLoop(); });
+}
+
+RecordYielder::~RecordYielder() {
+ {
+ mutex_lock l(mu_);
+ stop_ = true;
+ buf_empty_.notify_all();
+ buf_enough_.notify_all();
+ buf_not_full_.notify_all();
+ }
+ main_loop_done_.WaitForNotification();
+ delete thread_;
+}
+
+Status RecordYielder::YieldOne(string* value) {
+ mutex_lock l(mu_);
+ while (!BufEnough()) {
+ buf_enough_.wait(l);
+ }
+ if (status_.ok()) {
+ bool notify_no_longer_full = !BufNotFull();
+ CHECK(!stop_ && !buf_.empty());
+ *value = std::move(buf_.back());
+ buf_.pop_back();
+ ++num_records_yielded_in_epoch_;
+ // Assumption is that an epoch always has something in the buffer
+ // until it ends. If the input pipeline was slower than the consumers
+ // by a lot this might not be true. Not sure how to handle.
+ if (buf_.empty()) {
+ buf_empty_.notify_all();
+ }
+ if (notify_no_longer_full) {
+ buf_not_full_.notify_all();
+ }
+ }
+ return status_;
+}
+
+struct RecordYielder::Shard {
+ int index; // Shard index.
+ std::vector<string> filenames; // File names given to this shard.
+ Notification done; // Notified when this shard is done.
+ Status status; // Shard status.
+};
+
+bool RecordYielder::ShouldFinish(const Status& s) {
+ mutex_lock l(mu_);
+ status_.Update(s);
+ return stop_ || !status_.ok();
+}
+
+static Status MatchFiles(const string& patterns,
+ std::vector<string>* filenames) {
+ for (const auto& file_pattern : str_util::Split(patterns, ',')) {
+ std::vector<string> tmp_filenames;
+ TF_RETURN_IF_ERROR(
+ Env::Default()->GetMatchingPaths(file_pattern, &tmp_filenames));
+ filenames->insert(filenames->end(),
+ std::make_move_iterator(tmp_filenames.begin()),
+ std::make_move_iterator(tmp_filenames.end()));
+ }
+ return Status::OK();
+}
+
+void RecordYielder::MainLoop() {
+ while (true) {
+ ++epoch_;
+ num_records_yielded_in_epoch_ = 0;
+
+ // Finds all files.
+ std::vector<string> filenames;
+ Status s = MatchFiles(opts_.file_pattern, &filenames);
+ if (ShouldFinish(s)) break;
+
+ if (filenames.empty()) {
+ s = errors::NotFound("Found no files at ", opts_.file_pattern);
+ if (ShouldFinish(s)) break;
+ }
+
+ // Shuffles these files according to the epoch # and random seed.
+ std::mt19937_64 shuffle_rnd(
+ Hash64(reinterpret_cast<char*>(&epoch_), sizeof(epoch_), opts_.seed));
+ std::shuffle(filenames.begin(), filenames.end(), shuffle_rnd);
+
+ // Left-shift the filename list.
+ const int64 num = filenames.size();
+ int64 shift;
+ if (0 <= opts_.file_shuffle_shift_ratio &&
+ opts_.file_shuffle_shift_ratio < 1) {
+ shift = opts_.file_shuffle_shift_ratio * num;
+ std::rotate(filenames.begin(), filenames.begin() + shift,
+ filenames.end());
+ }
+
+ // Shards files and use one thread to go through each shard.
+ const int N = opts_.parallelism;
+ std::vector<Shard> shards(N);
+ for (int i = 0; i < N; ++i) {
+ Shard* shard = &shards[i];
+ shard->index = i;
+ for (int j = i; j < filenames.size(); j += N) {
+ shard->filenames.push_back(filenames[j]);
+ }
+ thread_->Schedule([this, shard]() { ShardLoop(shard); });
+ }
+ for (int i = 0; i < N; ++i) {
+ shards[i].done.WaitForNotification();
+ s.Update(shards[i].status);
+ }
+ if (ShouldFinish(s)) break;
+
+ // Starts the next epoch once all buffered records are consumed.
+ {
+ mutex_lock l(mu_);
+ epoch_end_ = true;
+ while (!BufEmpty()) {
+ buf_empty_.wait(l);
+ }
+ epoch_end_ = false;
+ }
+ }
+ main_loop_done_.Notify();
+}
+
+bool RecordYielder::Add(std::vector<string>* values) {
+ mutex_lock l(mu_);
+ while (!BufNotFull()) {
+ buf_not_full_.wait(l);
+ }
+ while (BufNotFull() && !values->empty()) {
+ // Adds values->back(). Swaps its position with another random
+ // element.
+ auto index = rnd_() % (buf_.size() + 1);
+ if (index == buf_.size()) {
+ buf_.push_back(std::move(values->back()));
+ } else {
+ buf_.push_back(std::move(buf_[index]));
+ buf_[index] = std::move(values->back());
+ }
+ values->pop_back();
+ }
+ if (BufEnough()) {
+ buf_enough_.notify_all();
+ }
+ return stop_;
+}
+
+void RecordYielder::ShardLoop(Shard* shard) {
+ std::vector<string> values;
+ const int64 kRecords = 16;
+ for (const string& filename : shard->filenames) {
+ std::unique_ptr<RandomAccessFile> file;
+ if (ShouldFinish(Status::OK())) break;
+ Status s = Env::Default()->NewRandomAccessFile(filename, &file);
+ if (!s.ok()) {
+ shard->status = errors::InvalidArgument("Can't open ", filename);
+ break;
+ }
+ io::RecordReader rdr(file.get());
+ uint64 offset = 0;
+ string record;
+ while (true) {
+ Status s = rdr.ReadRecord(&offset, &record);
+ if (s.ok()) {
+ values.emplace_back(std::move(record));
+ if (values.size() >= kRecords && Add(&values)) {
+ shard->status = errors::Aborted("stopped");
+ break;
+ }
+ } else if (errors::IsOutOfRange(s)) {
+ break;
+ } else {
+ shard->status = s;
+ break;
+ }
+ }
+ }
+ // Adds the remaining values of this shard to buf_.
+ while (!values.empty()) {
+ Add(&values);
+ }
+ shard->done.Notify();
+}
+
+} // namespace tensorflow
diff --git a/tensorflow/core/kernels/record_yielder.h b/tensorflow/core/kernels/record_yielder.h
new file mode 100644
index 0000000000..503644f3b8
--- /dev/null
+++ b/tensorflow/core/kernels/record_yielder.h
@@ -0,0 +1,157 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_KERNELS_RECORD_YIELDER_H_
+#define TENSORFLOW_KERNELS_RECORD_YIELDER_H_
+
+#include <atomic>
+#include <random>
+#include <string>
+#include <vector>
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/notification.h"
+#include "tensorflow/core/lib/core/threadpool.h"
+#include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/platform/thread_annotations.h"
+
+namespace tensorflow {
+
+// RecordYielder produces value records from a set of tfrecord files
+// in a random order.
+//
+// It guarantees that:
+// 1) all records in tfrecords are yielded within every epoch;
+// 2) each record is yielded only once within every epoch;
+// 3) the order in which records are yielded are highly randomized.
+// 4) the peak memory usage is roughly avg record size *
+// (opts.bufsize + opts.parellelism * 16).
+//
+// Usage example:
+// RecordYielder::Options opts;
+// opts.file_pattern = "input-*";
+// opts.seed = 301;
+// opts.bufsize = 1000000; // A randomized buffer with 1M records.
+// opts.parallelism = 8; // Uses 8 tfrecord iterators to iterate
+// // through all files.
+// RecordYielder yielder(opts);
+// string val;
+// while (true) {
+// yielder.YieldOne(&val);
+// // process val
+// }
+//
+// RecordYielder can be accessed by multiple threads concurrently.
+class RecordYielder {
+ public:
+ struct Options {
+ // Glob pattern for tfrecords.
+ string file_pattern;
+
+ // Random seed. It determines how data files are shuffled and how
+ // records are shuffled.
+ int64 seed = 0;
+
+ // Each epoch, all files are first shuffled according to the
+ // random seed and the epoch number, and then all files are
+ // left-shifted by file_shuffle_shift_ratio * num_files slots. If
+ // file_shuffle_shift_ratio is not within [0, 1), the
+ // implementation clip it to [0, 1).
+ float file_shuffle_shift_ratio = 0;
+
+ // Randomization buffer keeps these many records.
+ uint64 bufsize = 1;
+
+ // Uses these many concurrent tfrecord iterators to iterate through
+ // tfrecords.
+ int32 parallelism = 1;
+ };
+
+ explicit RecordYielder(OpKernelConstruction* context,
+ const RecordYielder::Options& opts);
+ ~RecordYielder();
+
+ RecordYielder(const RecordYielder&) = delete;
+ RecordYielder& operator=(const RecordYielder&) = delete;
+
+ // Yields one 'value'.
+ Status YieldOne(string* value);
+
+ // Returns the current epoch number.
+ int64 current_epoch() const { return epoch_; }
+
+ private:
+ typedef RecordYielder ME;
+
+ Options opts_;
+
+ // Backgrounds threads. Owned.
+ thread::ThreadPool* thread_;
+
+ // Epoch number.
+ std::atomic<int64> epoch_;
+
+ mutex mu_;
+
+ // Turned to true when this is deleted.
+ bool stop_ GUARDED_BY(mu_) = false;
+ Status status_ GUARDED_BY(mu_);
+
+ // PRG used for randomization.
+ std::mt19937_64 rnd_ GUARDED_BY(mu_);
+
+ // Randomization buffer.
+ std::vector<string> buf_ GUARDED_BY(mu_);
+
+ // True iff we are draining an epoch.
+ bool epoch_end_ = false;
+
+ int64 num_records_yielded_in_epoch_ = 0;
+
+ // Trigger when the main loop has exited.
+ Notification main_loop_done_;
+
+ // condition_variables.
+ condition_variable buf_empty_;
+ bool BufEmpty() const SHARED_LOCKS_REQUIRED(mu_) {
+ return stop_ || buf_.empty();
+ }
+
+ condition_variable buf_not_full_;
+ bool BufNotFull() const SHARED_LOCKS_REQUIRED(mu_) {
+ return stop_ || buf_.size() < opts_.bufsize;
+ }
+
+ condition_variable buf_enough_;
+ bool BufEnough() const SHARED_LOCKS_REQUIRED(mu_) {
+ // NOTE: Unless we are finishing an epoch, we want to make sure
+ // the buf_ contains enough randomized elements before yielding
+ // any.
+ return stop_ || !status_.ok() || (epoch_end_ && !buf_.empty()) ||
+ (!epoch_end_ &&
+ buf_.size() >= std::max<int64>(1, opts_.bufsize / 2));
+ }
+
+ void MainLoop();
+ struct Shard;
+ void ShardLoop(Shard* shard);
+ bool ShouldFinish(const Status& s);
+ bool Add(std::vector<string>* values);
+};
+
+} // namespace tensorflow
+
+#endif // TENSORFLOW_KERNELS_RECORD_YIELDER_H_
diff --git a/tensorflow/core/kernels/resize_bilinear_op.cc b/tensorflow/core/kernels/resize_bilinear_op.cc
index 6dfe871c52..85d28d2c64 100644
--- a/tensorflow/core/kernels/resize_bilinear_op.cc
+++ b/tensorflow/core/kernels/resize_bilinear_op.cc
@@ -64,6 +64,201 @@ class ResizeBilinearOp : public OpKernel {
bool align_corners_;
};
+namespace {
+// Compute the interpolation indices only once.
+struct CachedInterpolation {
+ int64 lower; // Lower source index used in the interpolation
+ int64 upper; // Upper source index used in the interpolation
+ // 1-D linear iterpolation scale (see:
+ // https://en.wikipedia.org/wiki/Bilinear_interpolation)
+ float lerp;
+ // How many consecutive points use the same lower & upper indices
+ int consecutive;
+};
+
+enum ImageScalePattern { SCALE_UP, SIMILAR, SCALE_DOWN };
+
+inline ImageScalePattern compute_image_scale_pattern(const int64 out_height,
+ const int64 out_width,
+ const int64 in_height,
+ const int64 in_width) {
+ if (in_height * 2 < out_height || in_width * 2 < out_width) {
+ return SCALE_UP;
+ } else if (out_height * 2 < in_height || out_width * 2 < in_width) {
+ return SCALE_DOWN;
+ } else {
+ return SIMILAR;
+ }
+}
+
+inline int compute_scratch_size(const int64 out_height, const int64 out_width,
+ const int64 in_height, const int64 in_width,
+ const int channels,
+ const ImageScalePattern scale_pattern) {
+ // Allocate a CachedInterpolation for each y, and each x in the out-height,
+ // plus 2 extra to avoid extra branches in the
+ // CachedInterpolation.consecutive computation.
+ const int cached_computation_size =
+ sizeof(CachedInterpolation) * (out_height + out_width + 2);
+ if (scale_pattern == SCALE_DOWN) {
+ return cached_computation_size;
+ } else {
+ // In order to avoid paying the cost of data type conversion multiple times,
+ // we must allocate a temporary image as well.
+ const int tmp_image_size = sizeof(float) * in_height * in_width * channels;
+ // We batch up all memory allocations into a single malloc call for
+ // performance reasons.
+ return cached_computation_size + tmp_image_size;
+ }
+}
+
+inline void compute_interpolation_weights(const ImageScalePattern scale_pattern,
+ const int64 out_size,
+ const int64 in_size,
+ const float scale,
+ CachedInterpolation* interpolation) {
+ interpolation[out_size].lower = 0;
+ interpolation[out_size].upper = 0;
+ interpolation[out_size].consecutive = 0;
+ for (int64 i = out_size - 1; i >= 0; --i) {
+ const float in = i * scale;
+ interpolation[i].lower = static_cast<int64>(in);
+ interpolation[i].upper = std::min(interpolation[i].lower + 1, in_size - 1);
+ interpolation[i].lerp = in - interpolation[i].lower;
+ interpolation[i].consecutive =
+ interpolation[i + 1].lower == interpolation[i].lower &&
+ interpolation[i + 1].upper == interpolation[i].upper
+ ? interpolation[i + 1].consecutive + 1
+ : 1;
+ }
+}
+
+template <typename T>
+struct Converter {
+ static inline const float* convert_image_to_float(
+ typename TTypes<T, 4>::ConstTensor images, const int batch_index,
+ const int64 in_height, const int64 in_width, const int channels,
+ std::vector<float>* converted_image_v) {
+ converted_image_v->resize(in_height * in_width * channels);
+ float* converted_image = converted_image_v->data();
+ for (int64 y = 0; y < in_height; ++y) {
+ for (int64 x = 0; x < in_width; ++x) {
+ for (int c = 0; c < channels; ++c) {
+ converted_image[y * in_width * channels + x * channels + c] =
+ static_cast<float>(images(batch_index, y, x, c));
+ }
+ }
+ }
+ return converted_image;
+ }
+};
+
+template <>
+struct Converter<float> {
+ static inline const float* convert_image_to_float(
+ typename TTypes<float, 4>::ConstTensor images, const int b,
+ const int64 in_height, const int64 in_width, const int channels,
+ std::vector<float>* converted_image_v) {
+ return images.data() + (b * in_height * in_width * channels);
+ }
+};
+
+/**
+ * Computes the bilinear interpolation from the appropriate 4 float points
+ * and the linear interpolation weights.
+ */
+inline float compute_lerp(const float top_left, const float top_right,
+ const float bottom_left, const float bottom_right,
+ const float x_lerp, const float y_lerp) {
+ const float top = top_left + (top_right - top_left) * x_lerp;
+ const float bottom = bottom_left + (bottom_right - bottom_left) * x_lerp;
+ return top + (bottom - top) * y_lerp;
+}
+
+template <typename T>
+inline void scale_down_image(typename TTypes<T, 4>::ConstTensor images,
+ const int batch_size, const int64 out_height,
+ const int64 out_width, const int channels,
+ const std::vector<CachedInterpolation>& xs,
+ const std::vector<CachedInterpolation>& ys,
+ typename TTypes<float, 4>::Tensor output) {
+ // Do not eagerly convert all input data points, as we ignore most.
+ for (int b = 0; b < batch_size; ++b) {
+ // Compute the interpolation
+ for (int64 y = 0; y < out_height; ++y) {
+ for (int64 x = 0; x < out_width; ++x) {
+ for (int c = 0; c < channels; ++c) {
+ const float top_left(images(b, ys[y].lower, xs[x].lower, c));
+ const float top_right(images(b, ys[y].lower, xs[x].upper, c));
+ const float bottom_left(images(b, ys[y].upper, xs[x].lower, c));
+ const float bottom_right(images(b, ys[y].upper, xs[x].upper, c));
+ output(b, y, x, c) =
+ compute_lerp(top_left, top_right, bottom_left, bottom_right,
+ xs[x].lerp, ys[y].lerp);
+ }
+ }
+ }
+ }
+}
+
+inline void scale_up_image(const float* input_image, const int batch_index,
+ const int64 out_height, const int64 out_width,
+ const int channels, const int64 in_height,
+ const int64 in_width,
+ const std::vector<CachedInterpolation>& xs,
+ const std::vector<CachedInterpolation>& ys,
+ typename TTypes<float, 4>::Tensor output) {
+ for (int64 y = 0; y < out_height; y += ys[y].consecutive) {
+ const int64 in_y_lower = ys[y].lower * in_width * channels;
+ const int64 in_y_upper = ys[y].upper * in_width * channels;
+ for (int64 x = 0; x < out_width; x += xs[x].consecutive) {
+ const int64 in_x_lower = xs[x].lower * channels;
+ const int64 in_x_upper = xs[x].upper * channels;
+ for (int c = 0; c < channels; ++c) {
+ const float top_left = input_image[in_y_lower + in_x_lower + c];
+ const float top_right = input_image[in_y_lower + in_x_upper + c];
+ const float bottom_left = input_image[in_y_upper + in_x_lower + c];
+ const float bottom_right = input_image[in_y_upper + in_x_upper + c];
+ for (int64 y_inner = y; y_inner < y + ys[y].consecutive; ++y_inner) {
+ for (int64 x_inner = x; x_inner < x + xs[x].consecutive; ++x_inner) {
+ output(batch_index, y_inner, x_inner, c) =
+ compute_lerp(top_left, top_right, bottom_left, bottom_right,
+ xs[x_inner].lerp, ys[y_inner].lerp);
+ }
+ }
+ }
+ }
+ }
+}
+
+inline void scale_similar_image(const float* input_image, const int b,
+ const int64 out_height, const int64 out_width,
+ const int channels, const int64 in_height,
+ const int64 in_width,
+ const std::vector<CachedInterpolation>& xs,
+ const std::vector<CachedInterpolation>& ys,
+ typename TTypes<float, 4>::Tensor output) {
+ // Compute the interpolation
+ for (int64 y = 0; y < out_height; ++y) {
+ const int64 in_y_lower = ys[y].lower * in_width * channels;
+ const int64 in_y_upper = ys[y].upper * in_width * channels;
+ // Similar-sized images do not have a set of inner loops.
+ for (int64 x = 0; x < out_width; ++x) {
+ const int64 in_x_lower = xs[x].lower * channels;
+ const int64 in_x_upper = xs[x].upper * channels;
+ for (int c = 0; c < channels; ++c) {
+ const float top_left = input_image[in_y_lower + in_x_lower + c];
+ const float top_right = input_image[in_y_lower + in_x_upper + c];
+ const float bottom_left = input_image[in_y_upper + in_x_lower + c];
+ const float bottom_right = input_image[in_y_upper + in_x_upper + c];
+ output(b, y, x, c) = compute_lerp(top_left, top_right, bottom_left,
+ bottom_right, xs[x].lerp, ys[y].lerp);
+ }
+ }
+ }
+}
+} // namespace
+
// Partial specialization of ResizeBilinear functor for a CPUDevice.
namespace functor {
template <typename T>
@@ -71,7 +266,7 @@ struct ResizeBilinear<CPUDevice, T> {
void operator()(const CPUDevice& d, typename TTypes<T, 4>::ConstTensor images,
const float height_scale, const float width_scale,
typename TTypes<float, 4>::Tensor output) {
- const int batch = images.dimension(0);
+ const int batch_size = images.dimension(0);
const int64 in_height = images.dimension(1);
const int64 in_width = images.dimension(2);
const int channels = images.dimension(3);
@@ -79,31 +274,41 @@ struct ResizeBilinear<CPUDevice, T> {
const int64 out_height = output.dimension(1);
const int64 out_width = output.dimension(2);
- for (int b = 0; b < batch; ++b) {
- for (int y = 0; y < out_height; ++y) {
- const float in_y = y * height_scale;
- const int64 top_y_index = static_cast<int64>(floorf(in_y));
- const int64 bottom_y_index =
- std::min(static_cast<int64>(ceilf(in_y)), in_height - 1);
- const float y_lerp = in_y - top_y_index;
- for (int x = 0; x < out_width; ++x) {
- const float in_x = x * width_scale;
- const int64 left_x_index = static_cast<int64>(floorf(in_x));
- const int64 right_x_index =
- std::min(static_cast<int64>(ceilf(in_x)), in_width - 1);
- const float x_lerp = in_x - left_x_index;
- for (int c = 0; c < channels; ++c) {
- const float top_left(images(b, top_y_index, left_x_index, c));
- const float top_right(images(b, top_y_index, right_x_index, c));
- const float bottom_left(images(b, bottom_y_index, left_x_index, c));
- const float bottom_right(
- images(b, bottom_y_index, right_x_index, c));
- const float top = top_left + (top_right - top_left) * x_lerp;
- const float bottom =
- bottom_left + (bottom_right - bottom_left) * x_lerp;
- output(b, y, x, c) = top + (bottom - top) * y_lerp;
- }
- }
+ // Handle no-op resizes efficiently.
+ if (out_height == in_height && out_width == in_width) {
+ output = images.template cast<float>();
+ return;
+ }
+
+ const ImageScalePattern scale_pattern =
+ compute_image_scale_pattern(out_height, out_width, in_height, in_width);
+ std::vector<CachedInterpolation> ys(out_height + 1);
+ std::vector<CachedInterpolation> xs(out_width + 1);
+ std::vector<float> converted_image_v;
+
+ // Compute the cached interpolation weights on the x and y dimensions.
+ compute_interpolation_weights(scale_pattern, out_height, in_height,
+ height_scale, ys.data());
+ compute_interpolation_weights(scale_pattern, out_width, in_width,
+ width_scale, xs.data());
+
+ if (scale_pattern == SCALE_UP) {
+ for (int b = 0; b < batch_size; ++b) {
+ const float* converted_image = Converter<T>::convert_image_to_float(
+ images, b, in_height, in_width, channels, &converted_image_v);
+ scale_up_image(converted_image, b, out_height, out_width, channels,
+ in_height, in_width, xs, ys, output);
+ }
+ } else if (scale_pattern == SCALE_DOWN) {
+ // Do not eagerly convert all input data points, as we ignore most.
+ scale_down_image<T>(images, batch_size, out_height, out_width, channels,
+ xs, ys, output);
+ } else {
+ for (int b = 0; b < batch_size; ++b) {
+ const float* converted_image = Converter<T>::convert_image_to_float(
+ images, b, in_height, in_width, channels, &converted_image_v);
+ scale_similar_image(converted_image, b, out_height, out_width, channels,
+ in_height, in_width, xs, ys, output);
}
}
}
diff --git a/tensorflow/core/kernels/resize_bilinear_op_test.cc b/tensorflow/core/kernels/resize_bilinear_op_test.cc
index 32acdf2df8..a4f1120578 100644
--- a/tensorflow/core/kernels/resize_bilinear_op_test.cc
+++ b/tensorflow/core/kernels/resize_bilinear_op_test.cc
@@ -25,6 +25,7 @@ limitations under the License.
#include "tensorflow/core/kernels/ops_testutil.h"
#include "tensorflow/core/kernels/ops_util.h"
#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/lib/random/random.h"
#include "tensorflow/core/platform/test.h"
namespace tensorflow {
@@ -39,6 +40,74 @@ class ResizeBilinearOpTest : public OpsTestBase {
.Finalize(node_def()));
TF_EXPECT_OK(InitOp());
}
+
+ const Tensor* AddRandomImageInput(const TensorShape& shape) {
+ CHECK_GT(input_types_.size(), inputs_.size())
+ << "Adding more inputs than types; perhaps you need to call MakeOp";
+ CHECK_EQ(shape.dims(), 4) << "All images must have 4 dimensions.";
+ bool is_ref = IsRefType(input_types_[inputs_.size()]);
+ Tensor* input = new Tensor(device_->GetAllocator(AllocatorAttributes()),
+ DataTypeToEnum<float>::v(), shape);
+ input->flat<float>().setRandom();
+ tensors_.push_back(input);
+ if (is_ref) {
+ CHECK_EQ(RemoveRefType(input_types_[inputs_.size()]),
+ DataTypeToEnum<float>::v());
+ inputs_.push_back({&lock_for_refs_, input});
+ } else {
+ CHECK_EQ(input_types_[inputs_.size()], DataTypeToEnum<float>::v());
+ inputs_.push_back({nullptr, input});
+ }
+ return input;
+ }
+
+ // This is the straight forward unoptimized implementation of resize bilinear
+ // We use this to confirm that the optimized version is exactly identical.
+ void ResizeBilinearBaseline(TTypes<float, 4>::ConstTensor images,
+ TTypes<float, 4>::Tensor output) {
+ const int batch = images.dimension(0);
+ const int64 in_height = images.dimension(1);
+ const int64 in_width = images.dimension(2);
+ const int channels = images.dimension(3);
+
+ ASSERT_EQ(batch, output.dimension(0));
+ ASSERT_EQ(channels, output.dimension(3));
+
+ const int64 out_height = output.dimension(1);
+ const int64 out_width = output.dimension(2);
+
+ const float height_scale = in_height / static_cast<float>(out_height);
+ const float width_scale = in_width / static_cast<float>(out_width);
+
+ for (int b = 0; b < batch; ++b) {
+ for (int64 y = 0; y < out_height; ++y) {
+ const float in_y = y * height_scale;
+ const int64 top_y_index = static_cast<int64>(floorf(in_y));
+ const int64 bottom_y_index =
+ std::min(static_cast<int64>(ceilf(in_y)), in_height - 1);
+ const float y_lerp = in_y - top_y_index;
+ for (int64 x = 0; x < out_width; ++x) {
+ const float in_x = x * width_scale;
+ const int64 left_x_index = static_cast<int64>(floorf(in_x));
+ const int64 right_x_index =
+ std::min(static_cast<int64>(ceilf(in_x)), in_width - 1);
+ const float x_lerp = in_x - left_x_index;
+ for (int c = 0; c < channels; ++c) {
+ const float top_left = images(b, top_y_index, left_x_index, c);
+ const float top_right = images(b, top_y_index, right_x_index, c);
+ const float bottom_left =
+ images(b, bottom_y_index, left_x_index, c);
+ const float bottom_right =
+ images(b, bottom_y_index, right_x_index, c);
+ const float top = top_left + (top_right - top_left) * x_lerp;
+ const float bottom =
+ bottom_left + (bottom_right - bottom_left) * x_lerp;
+ output(b, y, x, c) = top + (bottom - top) * y_lerp;
+ }
+ }
+ }
+ }
+ }
};
class ResizeBilinearOpAlignCornersTest : public OpsTestBase {
@@ -68,6 +137,23 @@ TEST_F(ResizeBilinearOpTest, TestBilinear2x2To1x1) {
test::ExpectTensorEqual<float>(expected, *GetOutput(0));
}
+TEST_F(ResizeBilinearOpTest, TestBilinearRandom2x2To1x1) {
+ const Tensor* input = AddRandomImageInput(TensorShape({1, 2, 2, 1}));
+ AddInputFromArray<int32>(TensorShape({2}), {1, 1});
+ TF_ASSERT_OK(RunOpKernel());
+
+ // When scaling down, we have to arbitrarily pick a pixel from the
+ // original input. In this case, we choose the top/left most pixel.
+ Tensor* output = GetOutput(0);
+ std::unique_ptr<Tensor> expected(
+ new Tensor(device_->GetAllocator(AllocatorAttributes()),
+ DataTypeToEnum<float>::v(), TensorShape({1, 1, 1, 1})));
+ ResizeBilinearBaseline(input->tensor<float, 4>(),
+ expected->tensor<float, 4>());
+ EXPECT_EQ(input->flat<float>()(0), output->flat<float>()(0));
+ test::ExpectTensorEqual<float>(*expected.get(), *output);
+}
+
TEST_F(ResizeBilinearOpAlignCornersTest, TestBilinearAlignCorners2x2To1x1) {
// Input:
// 1, 2
@@ -302,6 +388,62 @@ TEST_F(ResizeBilinearOpTest, TestBilinear2x2To4x4) {
test::ExpectTensorEqual<float>(expected, *GetOutput(0));
}
+TEST_F(ResizeBilinearOpTest, TestBilinearRandom183x299To299x299) {
+ const TensorShape shape({1, 183, 299, 1});
+ const Tensor* input = AddRandomImageInput(shape);
+ AddInputFromArray<int32>(TensorShape({2}), {299, 299});
+ TF_ASSERT_OK(RunOpKernel());
+
+ std::unique_ptr<Tensor> expected(
+ new Tensor(device_->GetAllocator(AllocatorAttributes()),
+ DataTypeToEnum<float>::v(), TensorShape({1, 299, 299, 1})));
+ ResizeBilinearBaseline(input->tensor<float, 4>(),
+ expected->tensor<float, 4>());
+ test::ExpectTensorEqual<float>(*expected, *GetOutput(0));
+}
+
+TEST_F(ResizeBilinearOpTest, TestBilinearRandom141x186To299x299) {
+ const TensorShape shape({1, 141, 186, 1});
+ const Tensor* input = AddRandomImageInput(shape);
+ AddInputFromArray<int32>(TensorShape({2}), {299, 299});
+ TF_ASSERT_OK(RunOpKernel());
+
+ std::unique_ptr<Tensor> expected(
+ new Tensor(device_->GetAllocator(AllocatorAttributes()),
+ DataTypeToEnum<float>::v(), TensorShape({1, 299, 299, 1})));
+ ResizeBilinearBaseline(input->tensor<float, 4>(),
+ expected->tensor<float, 4>());
+ test::ExpectTensorEqual<float>(*expected, *GetOutput(0));
+}
+
+TEST_F(ResizeBilinearOpTest, TestBilinearRandom749x603To299x299) {
+ const TensorShape shape({1, 749, 603, 1});
+ const Tensor* input = AddRandomImageInput(shape);
+ AddInputFromArray<int32>(TensorShape({2}), {299, 299});
+ TF_ASSERT_OK(RunOpKernel());
+
+ std::unique_ptr<Tensor> expected(
+ new Tensor(device_->GetAllocator(AllocatorAttributes()),
+ DataTypeToEnum<float>::v(), TensorShape({1, 299, 299, 1})));
+ ResizeBilinearBaseline(input->tensor<float, 4>(),
+ expected->tensor<float, 4>());
+ test::ExpectTensorEqual<float>(*expected, *GetOutput(0));
+}
+
+TEST_F(ResizeBilinearOpTest, TestBilinearRandom299x299To299x299) {
+ const TensorShape shape({1, 299, 299, 1});
+ const Tensor* input = AddRandomImageInput(shape);
+ AddInputFromArray<int32>(TensorShape({2}), {299, 299});
+ TF_ASSERT_OK(RunOpKernel());
+
+ std::unique_ptr<Tensor> expected(
+ new Tensor(device_->GetAllocator(AllocatorAttributes()),
+ DataTypeToEnum<float>::v(), TensorShape({1, 299, 299, 1})));
+ ResizeBilinearBaseline(input->tensor<float, 4>(),
+ expected->tensor<float, 4>());
+ test::ExpectTensorEqual<float>(*expected, *GetOutput(0));
+}
+
TEST_F(ResizeBilinearOpTest, TestInvalidOutputSize) {
AddInputFromArray<float>(TensorShape({1, 2, 2, 1}), {1, 2, 3, 4});
AddInputFromArray<int32>(TensorShape({2}), {0, 0});
diff --git a/tensorflow/core/kernels/sparse_matmul_op.cc b/tensorflow/core/kernels/sparse_matmul_op.cc
index 6c4f20a23a..6a3f3dfc77 100644
--- a/tensorflow/core/kernels/sparse_matmul_op.cc
+++ b/tensorflow/core/kernels/sparse_matmul_op.cc
@@ -1386,21 +1386,21 @@ void wrapper_libxsmm_spmdm_createSparseSlice_generic_thread(
void wrapper_libxsmm_spmdm_compute_generic_thread(
empty_type_wrapper<bfloat16>, const libxsmm_spmdm_handle* handle,
char transA, char transB, const bfloat16* alpha,
- libxsmm_CSR_sparseslice* A_sparse, const bfloat16* B, const bfloat16* beta,
- float* C, int block_id, int tid, int nthreads) {
+ libxsmm_CSR_sparseslice* A_sparse, const bfloat16* B, char transC,
+ const bfloat16* beta, float* C, int block_id, int tid, int nthreads) {
return libxsmm_spmdm_compute_bfloat16_thread(
handle, transA, transB, reinterpret_cast<const uint16*>(alpha), A_sparse,
- reinterpret_cast<const uint16*>(B), 'N', reinterpret_cast<const uint16*>(beta),
- C, block_id, tid, nthreads);
+ reinterpret_cast<const uint16*>(B), transC,
+ reinterpret_cast<const uint16*>(beta), C, block_id, tid, nthreads);
}
void wrapper_libxsmm_spmdm_compute_generic_thread(
empty_type_wrapper<float>, const libxsmm_spmdm_handle* handle, char transA,
char transB, const float* alpha, libxsmm_CSR_sparseslice* A_sparse,
- const float* B, const float* beta, float* C, int block_id, int tid,
- int nthreads) {
+ const float* B, char transC, const float* beta, float* C, int block_id,
+ int tid, int nthreads) {
return libxsmm_spmdm_compute_fp32_thread(handle, transA, transB, alpha,
- A_sparse, B, 'N', beta, C, block_id, tid,
- nthreads);
+ A_sparse, B, transC, beta, C,
+ block_id, tid, nthreads);
}
class PinnedToCurrentCPU {
@@ -1438,7 +1438,7 @@ inline void LibxsmmSparseMatMul<TL, TR>::Compute(
const typename LibxsmmSparseMatMul<TL, TR>::ConstMatrixMapR& right,
bool transpose_left, const DeviceBase::CpuWorkerThreads* thread_pool,
bool transpose_output, MatrixMap* output) {
- if (transpose_output || transpose_left) {
+ if (false) {
// Not handled by libxsmm currently
SparseMatMul<TL, TR>::Compute(
nullptr /* Assumes no cached data for fallback */, left, right,
@@ -1455,7 +1455,6 @@ inline void LibxsmmSparseMatMul<TL, TR>::Compute(
(transpose_output ? output->dimension(1) : output->dimension(0)));
CHECK_EQ(right_dim1,
(transpose_output ? output->dimension(0) : output->dimension(1)));
- CHECK(!transpose_output);
if (left_dim0 < 32 || left_dim1 < 32 || right_dim1 < 32) {
// Causes problems in libxsmm
SparseMatMul<TL, TR>::Compute(
@@ -1482,7 +1481,7 @@ inline void LibxsmmSparseMatMul<TL, TR>::Compute(
if (work_item >= total_num_creation_blocks) break;
wrapper_libxsmm_spmdm_createSparseSlice_generic_thread(
empty_type_wrapper<TL>{}, &entry->handle,
- (transpose_left ? 'T' : 'N'), left_data, entry->output_csr, work_item,
+ (transpose_left ? 'Y' : 'N'), left_data, entry->output_csr, work_item,
i, num_threads);
}
});
@@ -1504,8 +1503,9 @@ inline void LibxsmmSparseMatMul<TL, TR>::Compute(
const TL beta(0.0); // Stored in a variable so we can get a pointer
wrapper_libxsmm_spmdm_compute_generic_thread(
empty_type_wrapper<TL>{}, &entry->handle,
- (transpose_left ? 'T' : 'N'), 'N', &alpha, entry->output_csr,
- right_data, &beta, output_data, work_item, i, num_threads);
+ (transpose_left ? 'Y' : 'N'), 'N', &alpha, entry->output_csr,
+ right_data, (transpose_output ? 'Y' : 'N'), &beta, output_data,
+ work_item, i, num_threads);
}
});
// Put handle + CSR storage back into cache
diff --git a/tensorflow/core/kernels/variable_ops.h b/tensorflow/core/kernels/variable_ops.h
index d8d8831702..2839c3d8cf 100644
--- a/tensorflow/core/kernels/variable_ops.h
+++ b/tensorflow/core/kernels/variable_ops.h
@@ -53,29 +53,29 @@ class VariableOp : public OpKernel {
dtype_ = RemoveRefType(context->output_type(0));
}
- ~VariableOp() override {
- if (var_) var_->Unref();
- }
-
void Compute(OpKernelContext* ctx) override {
mutex_lock l(init_mu_);
- if (var_ == nullptr) {
- OP_REQUIRES_OK(ctx, cinfo_.Init(ctx->resource_manager(), def(),
- true /* use name() */));
- auto creator = [this](Var** var) {
- *var = new Var(dtype_);
- (*var)->tensor()->set_shape(shape_);
- return Status::OK();
- };
- OP_REQUIRES_OK(ctx,
- cinfo_.resource_manager()->LookupOrCreate<Var>(
- cinfo_.container(), cinfo_.name(), &var_, creator));
+ if (!initialized_) {
+ OP_REQUIRES_OK(
+ ctx,
+ cinfo_.Init(ctx->resource_manager(), def(), true /* use name() */));
+ initialized_ = true;
}
+ auto creator = [this](Var** var) {
+ *var = new Var(dtype_);
+ (*var)->tensor()->set_shape(shape_);
+ return Status::OK();
+ };
+ Var* var;
+ OP_REQUIRES_OK(ctx,
+ cinfo_.resource_manager()->LookupOrCreate<Var>(
+ cinfo_.container(), cinfo_.name(), &var, creator));
// Output a reference to our tensor, so it may be updated.
//
- // As long as *this is alive, the ref we return here is valid
- // because *this owns a ref on var_.
- ctx->set_output_ref(0, var_->mu(), var_->tensor());
+ // As long as the resource manager hasn't been cleared the ref we return
+ // here is valid because it owns a ref on var.
+ ctx->set_output_ref(0, var->mu(), var->tensor());
+ var->Unref();
}
private:
@@ -84,7 +84,7 @@ class VariableOp : public OpKernel {
mutex init_mu_;
ContainerInfo cinfo_ GUARDED_BY(init_mu_);
- Var* var_ GUARDED_BY(init_mu_) = nullptr;
+ bool initialized_ GUARDED_BY(init_mu_){false};
TF_DISALLOW_COPY_AND_ASSIGN(VariableOp);
};
diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index 7ce667675d..d61e7b32de 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -1226,11 +1226,9 @@ Equivalent to np.full
// --------------------------------------------------------------------------
REGISTER_OP("_ParallelConcatStart")
- .Input("shape: Tshape")
.Output("output: dtype")
+ .Attr("shape: shape")
.Attr("dtype: type")
- .Attr("Tshape: {int32, int64} = DT_INT32")
- .Attr("init: bool = false")
.SetIsStateful()
.SetShapeFn([](InferenceContext* c) {
ShapeHandle out;
@@ -1246,44 +1244,27 @@ conjunction with inplace operations.
shape: 1-D `Tensor` indicating the shape of the output.
dtype: The element type of the returned tensor.
-init: `bool` indicating whether or not to zero the allocated memory.
output: An empty Tensor of the specified type.
)doc");
// --------------------------------------------------------------------------
REGISTER_OP("_ParallelConcatUpdate")
.Input("value: T")
- .Input("loc: Tshape")
.Input("update: T")
.Output("output: T")
.Attr("T: type")
- .Attr("Tshape: {int32, int64} = DT_INT32")
+ .Attr("loc: int")
.SetShapeFn(shape_inference::UnchangedShape)
.Doc(R"doc(
Updates input `value` at `loc` with `update`.
-If `loc` is None, `value` and `update` must be the same size.
-```
-value = update
-```
-
-If `loc` is a scalar, `value` has rank 1 higher than `update`
-```
-value[i, :] = update
-```
-
-If `loc` is a vector, `value` has the same rank as `update`
-```
-value[loc, :] = update
-```
-
If you use this function you will almost certainly want to add
a control dependency as done in the implementation of parallel_stack to
avoid race conditions.
value: A `Tensor` object that will be updated in-place.
-loc: A scalar or 1-D `Tensor` indicating the indices of the first dimension
- such that value[loc, :] is updated.
+loc: A scalar indicating the index of the first dimension such that
+ value[loc, :] is updated.
update: A `Tensor` of rank one less than `value` if `loc` is a scalar,
otherwise of rank equal to `value` that contains the new values
for `value`.
@@ -1917,7 +1898,7 @@ This op first slices `input` along the dimension `batch_dim`, and for each
slice `i`, reverses the first `seq_lengths[i]` elements along
the dimension `seq_dim`.
-The elements of `seq_lengths` must obey `seq_lengths[i] < input.dims[seq_dim]`,
+The elements of `seq_lengths` must obey `seq_lengths[i] <= input.dims[seq_dim]`,
and `seq_lengths` must be a vector of length `input.dims[batch_dim]`.
The output slice `i` along dimension `batch_dim` is then given by input
@@ -1970,7 +1951,7 @@ output[2:, :, 3, :, ...] = input[2:, :, 3, :, ...]
input: The input to reverse.
seq_lengths: 1-D with length `input.dims(batch_dim)` and
- `max(seq_lengths) < input.dims(seq_dim)`
+ `max(seq_lengths) <= input.dims(seq_dim)`
seq_dim: The dimension which is partially reversed.
batch_dim: The dimension along which reversal is performed.
output: The partially reversed input. It has the same shape as `input`.
diff --git a/tensorflow/core/ops/compat/ops_history.v0.pbtxt b/tensorflow/core/ops/compat/ops_history.v0.pbtxt
index cfb7504664..49297ae409 100644
--- a/tensorflow/core/ops/compat/ops_history.v0.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v0.pbtxt
@@ -23219,6 +23219,53 @@ op {
}
}
op {
+ name: "RecordInput"
+ output_arg {
+ name: "records"
+ type: DT_STRING
+ }
+ attr {
+ name: "file_pattern"
+ type: "string"
+ }
+ attr {
+ name: "file_random_seed"
+ type: "int"
+ default_value {
+ i: 301
+ }
+ }
+ attr {
+ name: "file_shuffle_shift_ratio"
+ type: "float"
+ default_value {
+ f: 0
+ }
+ }
+ attr {
+ name: "file_buffer_size"
+ type: "int"
+ default_value {
+ i: 10000
+ }
+ }
+ attr {
+ name: "file_parallelism"
+ type: "int"
+ default_value {
+ i: 16
+ }
+ }
+ attr {
+ name: "batch_size"
+ type: "int"
+ default_value {
+ i: 32
+ }
+ }
+ is_stateful: true
+}
+op {
name: "ReduceJoin"
input_arg {
name: "inputs"
diff --git a/tensorflow/core/ops/data_flow_ops.cc b/tensorflow/core/ops/data_flow_ops.cc
index 54e766e8e9..a19d9483a1 100644
--- a/tensorflow/core/ops/data_flow_ops.cc
+++ b/tensorflow/core/ops/data_flow_ops.cc
@@ -2211,4 +2211,27 @@ dequeue with many fewer capabilities and options. This Op is optimized for
performance.
)doc");
+REGISTER_OP("RecordInput")
+ .Output("records: string")
+ .Attr("file_pattern: string")
+ .Attr("file_random_seed: int = 301")
+ .Attr("file_shuffle_shift_ratio: float = 0")
+ .Attr("file_buffer_size: int = 10000")
+ .Attr("file_parallelism: int = 16")
+ .Attr("batch_size: int = 32")
+ .SetIsStateful()
+ .SetShapeFn(shape_inference::UnknownShape)
+ .Doc(R"doc(
+Emits randomized records.
+
+records: A tensor of shape [batch_size].
+file_pattern: Glob pattern for the data files.
+file_random_seed: Random seeds used to produce randomized records.
+file_shuffle_shift_ratio: Shifts the list of files after the list is randomly
+ shuffled.
+file_buffer_size: The randomization shuffling buffer.
+file_parallelism: How many sstables are opened and concurrently iterated over.
+batch_size: The batch size.
+)doc");
+
} // namespace tensorflow
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index e631c289c6..937e9f588c 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -15028,6 +15028,61 @@ op {
description: "Specifically, `grad = -dy * y*y`, where `y = 1/x`, and `dy`\nis the corresponding input gradient."
}
op {
+ name: "RecordInput"
+ output_arg {
+ name: "records"
+ description: "A tensor of shape [batch_size]."
+ type: DT_STRING
+ }
+ attr {
+ name: "file_pattern"
+ type: "string"
+ description: "Glob pattern for the data files."
+ }
+ attr {
+ name: "file_random_seed"
+ type: "int"
+ default_value {
+ i: 301
+ }
+ description: "Random seeds used to produce randomized records."
+ }
+ attr {
+ name: "file_shuffle_shift_ratio"
+ type: "float"
+ default_value {
+ f: 0
+ }
+ description: "Shifts the list of files after the list is randomly\nshuffled."
+ }
+ attr {
+ name: "file_buffer_size"
+ type: "int"
+ default_value {
+ i: 10000
+ }
+ description: "The randomization shuffling buffer."
+ }
+ attr {
+ name: "file_parallelism"
+ type: "int"
+ default_value {
+ i: 16
+ }
+ description: "How many sstables are opened and concurrently iterated over."
+ }
+ attr {
+ name: "batch_size"
+ type: "int"
+ default_value {
+ i: 32
+ }
+ description: "The batch size."
+ }
+ summary: "Emits randomized records."
+ is_stateful: true
+}
+op {
name: "ReduceJoin"
input_arg {
name: "inputs"
@@ -17453,7 +17508,7 @@ op {
}
input_arg {
name: "seq_lengths"
- description: "1-D with length `input.dims(batch_dim)` and\n`max(seq_lengths) < input.dims(seq_dim)`"
+ description: "1-D with length `input.dims(batch_dim)` and\n`max(seq_lengths) <= input.dims(seq_dim)`"
type_attr: "Tlen"
}
output_arg {
@@ -17492,7 +17547,7 @@ op {
}
}
summary: "Reverses variable length slices."
- description: "This op first slices `input` along the dimension `batch_dim`, and for each\nslice `i`, reverses the first `seq_lengths[i]` elements along\nthe dimension `seq_dim`.\n\nThe elements of `seq_lengths` must obey `seq_lengths[i] < input.dims[seq_dim]`,\nand `seq_lengths` must be a vector of length `input.dims[batch_dim]`.\n\nThe output slice `i` along dimension `batch_dim` is then given by input\nslice `i`, with the first `seq_lengths[i]` slices along dimension\n`seq_dim` reversed.\n\nFor example:\n\n```prettyprint\n# Given this:\nbatch_dim = 0\nseq_dim = 1\ninput.dims = (4, 8, ...)\nseq_lengths = [7, 2, 3, 5]\n\n# then slices of input are reversed on seq_dim, but only up to seq_lengths:\noutput[0, 0:7, :, ...] = input[0, 7:0:-1, :, ...]\noutput[1, 0:2, :, ...] = input[1, 2:0:-1, :, ...]\noutput[2, 0:3, :, ...] = input[2, 3:0:-1, :, ...]\noutput[3, 0:5, :, ...] = input[3, 5:0:-1, :, ...]\n\n# while entries past seq_lens are copied through:\noutput[0, 7:, :, ...] = input[0, 7:, :, ...]\noutput[1, 2:, :, ...] = input[1, 2:, :, ...]\noutput[2, 3:, :, ...] = input[2, 3:, :, ...]\noutput[3, 2:, :, ...] = input[3, 2:, :, ...]\n```\n\nIn contrast, if:\n\n```prettyprint\n# Given this:\nbatch_dim = 2\nseq_dim = 0\ninput.dims = (8, ?, 4, ...)\nseq_lengths = [7, 2, 3, 5]\n\n# then slices of input are reversed on seq_dim, but only up to seq_lengths:\noutput[0:7, :, 0, :, ...] = input[7:0:-1, :, 0, :, ...]\noutput[0:2, :, 1, :, ...] = input[2:0:-1, :, 1, :, ...]\noutput[0:3, :, 2, :, ...] = input[3:0:-1, :, 2, :, ...]\noutput[0:5, :, 3, :, ...] = input[5:0:-1, :, 3, :, ...]\n\n# while entries past seq_lens are copied through:\noutput[7:, :, 0, :, ...] = input[7:, :, 0, :, ...]\noutput[2:, :, 1, :, ...] = input[2:, :, 1, :, ...]\noutput[3:, :, 2, :, ...] = input[3:, :, 2, :, ...]\noutput[2:, :, 3, :, ...] = input[2:, :, 3, :, ...]\n```"
+ description: "This op first slices `input` along the dimension `batch_dim`, and for each\nslice `i`, reverses the first `seq_lengths[i]` elements along\nthe dimension `seq_dim`.\n\nThe elements of `seq_lengths` must obey `seq_lengths[i] <= input.dims[seq_dim]`,\nand `seq_lengths` must be a vector of length `input.dims[batch_dim]`.\n\nThe output slice `i` along dimension `batch_dim` is then given by input\nslice `i`, with the first `seq_lengths[i]` slices along dimension\n`seq_dim` reversed.\n\nFor example:\n\n```prettyprint\n# Given this:\nbatch_dim = 0\nseq_dim = 1\ninput.dims = (4, 8, ...)\nseq_lengths = [7, 2, 3, 5]\n\n# then slices of input are reversed on seq_dim, but only up to seq_lengths:\noutput[0, 0:7, :, ...] = input[0, 7:0:-1, :, ...]\noutput[1, 0:2, :, ...] = input[1, 2:0:-1, :, ...]\noutput[2, 0:3, :, ...] = input[2, 3:0:-1, :, ...]\noutput[3, 0:5, :, ...] = input[3, 5:0:-1, :, ...]\n\n# while entries past seq_lens are copied through:\noutput[0, 7:, :, ...] = input[0, 7:, :, ...]\noutput[1, 2:, :, ...] = input[1, 2:, :, ...]\noutput[2, 3:, :, ...] = input[2, 3:, :, ...]\noutput[3, 2:, :, ...] = input[3, 2:, :, ...]\n```\n\nIn contrast, if:\n\n```prettyprint\n# Given this:\nbatch_dim = 2\nseq_dim = 0\ninput.dims = (8, ?, 4, ...)\nseq_lengths = [7, 2, 3, 5]\n\n# then slices of input are reversed on seq_dim, but only up to seq_lengths:\noutput[0:7, :, 0, :, ...] = input[7:0:-1, :, 0, :, ...]\noutput[0:2, :, 1, :, ...] = input[2:0:-1, :, 1, :, ...]\noutput[0:3, :, 2, :, ...] = input[3:0:-1, :, 2, :, ...]\noutput[0:5, :, 3, :, ...] = input[5:0:-1, :, 3, :, ...]\n\n# while entries past seq_lens are copied through:\noutput[7:, :, 0, :, ...] = input[7:, :, 0, :, ...]\noutput[2:, :, 1, :, ...] = input[2:, :, 1, :, ...]\noutput[3:, :, 2, :, ...] = input[3:, :, 2, :, ...]\noutput[2:, :, 3, :, ...] = input[2:, :, 3, :, ...]\n```"
}
op {
name: "ReverseV2"
diff --git a/tensorflow/core/platform/profile_utils/clock_cycle_profiler.cc b/tensorflow/core/platform/profile_utils/clock_cycle_profiler.cc
new file mode 100644
index 0000000000..6f852a653f
--- /dev/null
+++ b/tensorflow/core/platform/profile_utils/clock_cycle_profiler.cc
@@ -0,0 +1,37 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/platform/profile_utils/clock_cycle_profiler.h"
+
+#include <chrono>
+
+namespace tensorflow {
+
+void ClockCycleProfiler::DumpStatistics(const string& tag) {
+ CHECK(!IsStarted());
+ const double average_clock_cycle = GetAverageClockCycle();
+ const double count = GetCount();
+ const std::chrono::duration<double> average_time =
+ profile_utils::CpuUtils::ConvertClockCycleToTime(
+ static_cast<int64>(average_clock_cycle + 0.5));
+ LOG(INFO) << tag << ": average = "
+ << std::chrono::duration_cast<std::chrono::microseconds>(
+ average_time)
+ .count()
+ << " us (" << average_clock_cycle << " cycles)"
+ << ", count = " << count;
+}
+
+} // namespace tensorflow
diff --git a/tensorflow/core/platform/profile_utils/clock_cycle_profiler.h b/tensorflow/core/platform/profile_utils/clock_cycle_profiler.h
new file mode 100644
index 0000000000..876bb9c020
--- /dev/null
+++ b/tensorflow/core/platform/profile_utils/clock_cycle_profiler.h
@@ -0,0 +1,104 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_PLATFORM_PROFILE_UTILS_CLOCK_CYCLE_PROFILER_H_
+#define TENSORFLOW_PLATFORM_PROFILE_UTILS_CLOCK_CYCLE_PROFILER_H_
+
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/platform/profile_utils/cpu_utils.h"
+
+namespace tensorflow {
+
+class ClockCycleProfiler {
+ public:
+ ClockCycleProfiler() = default;
+
+ // Start counting clock cycle.
+ inline void Start() {
+ CHECK(!IsStarted()) << "Profiler has been already started.";
+ start_clock_ = GetCurrentClockCycleInternal();
+ }
+
+ // Stop counting clock cycle.
+ inline void Stop() {
+ CHECK(IsStarted()) << "Profiler is not started yet.";
+ AccumulateClockCycle();
+ }
+
+ // Get how many times Start() is called.
+ inline double GetCount() {
+ CHECK(!IsStarted());
+ return count_;
+ }
+
+ // Get average clock cycle.
+ inline double GetAverageClockCycle() {
+ CHECK(!IsStarted());
+ return average_clock_cycle_;
+ }
+
+ // TODO(satok): Support more statistics (e.g. standard deviation)
+ // Get worst clock cycle.
+ inline double GetWorstClockCycle() {
+ CHECK(!IsStarted());
+ return worst_clock_cycle_;
+ }
+
+ // Dump statistics
+ void DumpStatistics(const string& tag);
+
+ private:
+ inline uint64 GetCurrentClockCycleInternal() {
+ const uint64 clockCycle = profile_utils::CpuUtils::GetCurrentClockCycle();
+ if (clockCycle <= 0) {
+ if (valid_) {
+ LOG(WARNING) << "GetCurrentClockCycle is not implemented."
+ << " Return 1 instead.";
+ valid_ = false;
+ }
+ return 1;
+ } else {
+ return clockCycle;
+ }
+ }
+
+ inline bool IsStarted() const { return start_clock_ > 0; }
+
+ inline void AccumulateClockCycle() {
+ const uint64 now = GetCurrentClockCycleInternal();
+ const double clock_diff = static_cast<double>(now - start_clock_);
+ const double next_count = count_ + 1.0;
+ const double next_count_inv = 1.0 / next_count;
+ const double next_ave_cpu_clock =
+ next_count_inv * (average_clock_cycle_ * count_ + clock_diff);
+ count_ = next_count;
+ average_clock_cycle_ = next_ave_cpu_clock;
+ worst_clock_cycle_ = std::max(worst_clock_cycle_, clock_diff);
+ start_clock_ = 0;
+ }
+
+ uint64 start_clock_{0};
+ double count_{0.0};
+ double average_clock_cycle_{0.0};
+ double worst_clock_cycle_{0.0};
+ bool valid_{true};
+
+ TF_DISALLOW_COPY_AND_ASSIGN(ClockCycleProfiler);
+};
+
+} // namespace tensorflow
+
+#endif // TENSORFLOW_PLATFORM_PROFILE_UTILS_CLOCK_CYCLE_PROFILER_H_
diff --git a/tensorflow/core/platform/profile_utils/cpu_utils_test.cc b/tensorflow/core/platform/profile_utils/cpu_utils_test.cc
index 7cbd994661..fccc4d38a7 100644
--- a/tensorflow/core/platform/profile_utils/cpu_utils_test.cc
+++ b/tensorflow/core/platform/profile_utils/cpu_utils_test.cc
@@ -16,6 +16,7 @@ limitations under the License.
#include "tensorflow/core/platform/profile_utils/cpu_utils.h"
#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/profile_utils/clock_cycle_profiler.h"
#include "tensorflow/core/platform/test.h"
namespace tensorflow {
@@ -68,5 +69,18 @@ TEST_F(CpuUtilsTest, CheckMicroSecPerClock) {
}
}
+TEST_F(CpuUtilsTest, SimpleUsageOfClockCycleProfiler) {
+ static constexpr int LOOP_COUNT = 10;
+ ClockCycleProfiler prof;
+ for (int i = 0; i < LOOP_COUNT; ++i) {
+ prof.Start();
+ prof.Stop();
+ }
+ EXPECT_EQ(LOOP_COUNT, static_cast<int>(prof.GetCount() + 0.5));
+ if (DBG) {
+ prof.DumpStatistics("CpuUtilsTest");
+ }
+}
+
} // namespace profile_utils
} // namespace tensorflow
diff --git a/tensorflow/examples/android/BUILD b/tensorflow/examples/android/BUILD
index 0c1cea5fc3..c795ba67a8 100644
--- a/tensorflow/examples/android/BUILD
+++ b/tensorflow/examples/android/BUILD
@@ -39,7 +39,6 @@ cc_binary(
"notap",
],
deps = [
- ":demo_proto_lib_cc",
"//tensorflow/contrib/android:android_tensorflow_inference_jni",
"//tensorflow/core:android_tensorflow_lib",
LINKER_SCRIPT,
@@ -118,20 +117,3 @@ filegroup(
)
exports_files(["AndroidManifest.xml"])
-
-load(
- "//tensorflow/core:platform/default/build_config.bzl",
- "tf_proto_library",
-)
-
-tf_proto_library(
- name = "demo_proto_lib",
- srcs = glob(
- ["**/*.proto"],
- ),
- cc_api_version = 2,
- visibility = ["//visibility:public"],
-)
-
-# -----------------------------------------------------------------------------
-# Google-internal targets go here (must be at the end).
diff --git a/tensorflow/examples/android/jni/box_coder_jni.cc b/tensorflow/examples/android/jni/box_coder_jni.cc
deleted file mode 100644
index be85414fc1..0000000000
--- a/tensorflow/examples/android/jni/box_coder_jni.cc
+++ /dev/null
@@ -1,92 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// This file loads the box coder mappings.
-
-#include <android/asset_manager.h>
-#include <android/asset_manager_jni.h>
-#include <android/bitmap.h>
-
-#include <jni.h>
-#include <pthread.h>
-#include <sys/stat.h>
-#include <unistd.h>
-#include <map>
-#include <queue>
-#include <sstream>
-#include <string>
-
-#include "tensorflow/contrib/android/jni/jni_utils.h"
-#include "tensorflow/core/platform/env.h"
-#include "tensorflow/core/platform/types.h"
-
-#include "tensorflow/examples/android/proto/box_coder.pb.h"
-
-#define TENSORFLOW_METHOD(METHOD_NAME) \
- Java_org_tensorflow_demo_TensorFlowMultiBoxDetector_##METHOD_NAME // NOLINT
-
-#ifdef __cplusplus
-extern "C" {
-#endif // __cplusplus
-
-JNIEXPORT void JNICALL TENSORFLOW_METHOD(loadCoderOptions)(
- JNIEnv* env, jobject thiz, jobject java_asset_manager, jstring location,
- jfloatArray priors);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif // __cplusplus
-
-JNIEXPORT void JNICALL TENSORFLOW_METHOD(loadCoderOptions)(
- JNIEnv* env, jobject thiz, jobject java_asset_manager, jstring location,
- jfloatArray priors) {
- AAssetManager* const asset_manager =
- AAssetManager_fromJava(env, java_asset_manager);
- LOG(INFO) << "Acquired AssetManager.";
-
- const std::string location_str = GetString(env, location);
-
- org_tensorflow_demo::MultiBoxCoderOptions multi_options;
-
- LOG(INFO) << "Reading file to proto: " << location_str;
- ReadFileToProtoOrDie(asset_manager, location_str.c_str(), &multi_options);
-
- LOG(INFO) << "Read file. " << multi_options.box_coder_size() << " entries.";
-
- jboolean iCopied = JNI_FALSE;
- jfloat* values = env->GetFloatArrayElements(priors, &iCopied);
-
- const int array_length = env->GetArrayLength(priors);
- LOG(INFO) << "Array length: " << array_length
- << " (/8 = " << (array_length / 8) << ")";
- CHECK_EQ(array_length % 8, 0);
-
- const int num_items =
- std::min(array_length / 8, multi_options.box_coder_size());
-
- for (int i = 0; i < num_items; ++i) {
- const org_tensorflow_demo::BoxCoderOptions& options =
- multi_options.box_coder(i);
-
- for (int j = 0; j < 4; ++j) {
- const org_tensorflow_demo::BoxCoderPrior& prior = options.priors(j);
- values[i * 8 + j * 2] = prior.mean();
- values[i * 8 + j * 2 + 1] = prior.stddev();
- }
- }
- env->ReleaseFloatArrayElements(priors, values, 0);
-
- LOG(INFO) << "Read " << num_items << " options";
-}
diff --git a/tensorflow/examples/android/proto/box_coder.proto b/tensorflow/examples/android/proto/box_coder.proto
deleted file mode 100644
index 8576294110..0000000000
--- a/tensorflow/examples/android/proto/box_coder.proto
+++ /dev/null
@@ -1,42 +0,0 @@
-syntax = "proto2";
-
-package org_tensorflow_demo;
-
-// Prior for a single feature (like minimum x coordinate, width, area, etc.)
-message BoxCoderPrior {
- optional float mean = 1 [default = 0.0];
- optional float stddev = 2 [default = 1.0];
-};
-
-// Box encoding/decoding configuration for a single box.
-message BoxCoderOptions {
- // Number of priors must match the number of values used to encoded
- // values which is derived from the use_... flags below.
- repeated BoxCoderPrior priors = 1;
-
- // Minimum/maximum X/Y of the four corners are used as features.
- // Order: MinX, MinY, MaxX, MaxY.
- // Number of values: 4.
- optional bool use_corners = 2 [default = true];
-
- // Width and height of the box in this order.
- // Number of values: 2.
- optional bool use_width_height = 3 [default = false];
-
- // Coordinates of the center of the box.
- // Order: X, Y.
- // Number of values: 2.
- optional bool use_center = 4 [default = false];
-
- // Area of the box.
- // Number of values: 1.
- optional bool use_area = 5 [default = false];
-};
-
-// Options for MultiBoxCoder which is a encoder/decoder for a fixed number of
-// boxes.
-// A list of BoxCoderOptions that allows for storing multiple box coder options
-// in a single file.
-message MultiBoxCoderOptions {
- repeated BoxCoderOptions box_coder = 1;
-};
diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/DetectorActivity.java b/tensorflow/examples/android/src/org/tensorflow/demo/DetectorActivity.java
index 9ab5a7108a..d06f2d3c0f 100644
--- a/tensorflow/examples/android/src/org/tensorflow/demo/DetectorActivity.java
+++ b/tensorflow/examples/android/src/org/tensorflow/demo/DetectorActivity.java
@@ -60,7 +60,7 @@ public class DetectorActivity extends CameraActivity implements OnImageAvailable
private static final String MB_OUTPUT_NAMES = "output_locations/Reshape,output_scores/Reshape";
private static final String MB_MODEL_FILE = "file:///android_asset/multibox_model.pb";
private static final String MB_LOCATION_FILE =
- "file:///android_asset/multibox_location_priors.pb";
+ "file:///android_asset/multibox_location_priors.txt";
// Configuration values for tiny-yolo-voc. Note that the graph is not included with TensorFlow and
// must be manually placed in the assets/ directory by the user.
diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/TensorFlowMultiBoxDetector.java b/tensorflow/examples/android/src/org/tensorflow/demo/TensorFlowMultiBoxDetector.java
index e438956c7d..34a4361626 100644
--- a/tensorflow/examples/android/src/org/tensorflow/demo/TensorFlowMultiBoxDetector.java
+++ b/tensorflow/examples/android/src/org/tensorflow/demo/TensorFlowMultiBoxDetector.java
@@ -19,10 +19,16 @@ import android.content.res.AssetManager;
import android.graphics.Bitmap;
import android.graphics.RectF;
import android.os.Trace;
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.PriorityQueue;
+import java.util.StringTokenizer;
import org.tensorflow.contrib.android.TensorFlowInferenceInterface;
import org.tensorflow.demo.env.Logger;
@@ -80,7 +86,7 @@ public class TensorFlowMultiBoxDetector implements Classifier {
final float imageStd,
final String inputName,
final String outputName) {
- TensorFlowMultiBoxDetector d = new TensorFlowMultiBoxDetector();
+ final TensorFlowMultiBoxDetector d = new TensorFlowMultiBoxDetector();
d.inputName = inputName;
d.inputSize = inputSize;
d.imageMean = imageMean;
@@ -89,7 +95,11 @@ public class TensorFlowMultiBoxDetector implements Classifier {
d.boxPriors = new float[numLocations * 8];
- d.loadCoderOptions(assetManager, locationFilename, d.boxPriors);
+ try {
+ d.loadCoderOptions(assetManager, locationFilename, d.boxPriors);
+ } catch (final IOException e) {
+ throw new RuntimeException("Error initializing box priors from " + locationFilename);
+ }
// Pre-allocate buffers.
d.outputNames = outputName.split(",");
@@ -110,9 +120,42 @@ public class TensorFlowMultiBoxDetector implements Classifier {
private TensorFlowMultiBoxDetector() {}
- // Load BoxCoderOptions from native code.
- private native void loadCoderOptions(
- AssetManager assetManager, String locationFilename, float[] boxPriors);
+ private void loadCoderOptions(
+ final AssetManager assetManager, final String locationFilename, final float[] boxPriors)
+ throws IOException {
+ // Try to be intelligent about opening from assets or sdcard depending on prefix.
+ final String assetPrefix = "file:///android_asset/";
+ InputStream is;
+ if (locationFilename.startsWith(assetPrefix)) {
+ is = assetManager.open(locationFilename.split(assetPrefix)[1]);
+ } else {
+ is = new FileInputStream(locationFilename);
+ }
+
+ // Read values. Number of values per line doesn't matter, as long as they are separated
+ // by commas and/or whitespace, and there are exactly numLocations * 8 values total.
+ // Values are in the order mean, std for each consecutive corner of each box, for a total of 8
+ // per location.
+ final BufferedReader reader = new BufferedReader(new InputStreamReader(is));
+ int priorIndex = 0;
+ String line;
+ while ((line = reader.readLine()) != null) {
+ final StringTokenizer st = new StringTokenizer(line, ", ");
+ while (st.hasMoreTokens()) {
+ final String token = st.nextToken();
+ try {
+ final float number = Float.parseFloat(token);
+ boxPriors[priorIndex++] = number;
+ } catch (final NumberFormatException e) {
+ // Silently ignore.
+ }
+ }
+ }
+ if (priorIndex != boxPriors.length) {
+ throw new RuntimeException(
+ "BoxPrior length mismatch: " + priorIndex + " vs " + boxPriors.length);
+ }
+ }
private float[] decodeLocationsEncoding(final float[] locationEncoding) {
final float[] locations = new float[locationEncoding.length];
@@ -216,7 +259,7 @@ public class TensorFlowMultiBoxDetector implements Classifier {
}
@Override
- public void enableStatLogging(boolean debug) {
+ public void enableStatLogging(final boolean debug) {
inferenceInterface.enableStatLogging(debug);
}
diff --git a/tensorflow/examples/label_image/main.cc b/tensorflow/examples/label_image/main.cc
index 544b1b2738..08e6e4544a 100644
--- a/tensorflow/examples/label_image/main.cc
+++ b/tensorflow/examples/label_image/main.cc
@@ -97,7 +97,7 @@ Status ReadTensorFromImageFile(string file_name, const int input_height,
file_name);
// Now try to figure out what kind of file it is and decode it.
const int wanted_channels = 3;
- Output image_reader;
+ tensorflow::Output image_reader;
if (tensorflow::StringPiece(file_name).ends_with(".png")) {
image_reader = DecodePng(root.WithOpName("png_reader"), file_reader,
DecodePng::Channels(wanted_channels));
diff --git a/tensorflow/g3doc/api_docs/python/array_ops.md b/tensorflow/g3doc/api_docs/python/array_ops.md
index 2dcf6bcca6..cb30382c6b 100644
--- a/tensorflow/g3doc/api_docs/python/array_ops.md
+++ b/tensorflow/g3doc/api_docs/python/array_ops.md
@@ -1109,7 +1109,7 @@ This op first slices `input` along the dimension `batch_axis`, and for each
slice `i`, reverses the first `seq_lengths[i]` elements along
the dimension `seq_axis`.
-The elements of `seq_lengths` must obey `seq_lengths[i] < input.dims[seq_dim]`,
+The elements of `seq_lengths` must obey `seq_lengths[i] <= input.dims[seq_dim]`,
and `seq_lengths` must be a vector of length `input.dims[batch_dim]`.
The output slice `i` along dimension `batch_axis` is then given by input
@@ -1166,7 +1166,7 @@ output[2:, :, 3, :, ...] = input[2:, :, 3, :, ...]
* <b>`input`</b>: A `Tensor`. The input to reverse.
* <b>`seq_lengths`</b>: A `Tensor`. Must be one of the following types: `int32`, `int64`.
1-D with length `input.dims(batch_dim)` and
- `max(seq_lengths) < input.dims(seq_dim)`
+ `max(seq_lengths) <= input.dims(seq_dim)`
* <b>`seq_axis`</b>: An `int`. The dimension which is partially reversed.
* <b>`batch_axis`</b>: An optional `int`. Defaults to `0`.
The dimension along which reversal is performed.
diff --git a/tensorflow/g3doc/api_docs/python/contrib.graph_editor.md b/tensorflow/g3doc/api_docs/python/contrib.graph_editor.md
index 303a99020c..b5aae70911 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.graph_editor.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.graph_editor.md
@@ -1794,6 +1794,9 @@ This handler is typically used to transform a hidden input tensors.
Add the transformed elem to the (renamed) collections of elem.
+A collection is renamed only if is not a known key, as described in
+`tf.GraphKeys`.
+
##### Args:
diff --git a/tensorflow/g3doc/api_docs/python/contrib.learn.md b/tensorflow/g3doc/api_docs/python/contrib.learn.md
index 1e515d6490..fb790e2f1e 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.learn.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.learn.md
@@ -485,22 +485,19 @@ The signature of the input_fn accepted by export is changing to be consistent wi
- - -
-#### `tf.contrib.learn.Estimator.export_savedmodel(*args, **kwargs)` {#Estimator.export_savedmodel}
-
-Exports inference graph as a SavedModel into given dir. (experimental)
-
-THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and without warning.
+#### `tf.contrib.learn.Estimator.export_savedmodel(export_dir_base, serving_input_fn, default_output_alternative_key=None, assets_extra=None, as_text=False)` {#Estimator.export_savedmodel}
+Exports inference graph as a SavedModel into given dir.
##### Args:
* <b>`export_dir_base`</b>: A string containing a directory to write the exported
graph and checkpoints.
-* <b>`input_fn`</b>: A function that takes no argument and
+* <b>`serving_input_fn`</b>: A function that takes no argument and
returns an `InputFnOps`.
* <b>`default_output_alternative_key`</b>: the name of the head to serve when none is
- specified.
+ specified. Not needed for single-headed models.
* <b>`assets_extra`</b>: A dict specifying how to populate the assets.extra directory
within the exported SavedModel. Each key should give the destination
path (including the filename) relative to the assets.extra directory.
@@ -509,7 +506,6 @@ THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and with
renaming it is specified as
`{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`.
* <b>`as_text`</b>: whether to write the SavedModel proto in text format.
-* <b>`exports_to_keep`</b>: Number of exports to keep.
##### Returns:
@@ -1038,22 +1034,19 @@ See BaseEstimator.export.
- - -
-#### `tf.contrib.learn.DNNClassifier.export_savedmodel(*args, **kwargs)` {#DNNClassifier.export_savedmodel}
-
-Exports inference graph as a SavedModel into given dir. (experimental)
-
-THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and without warning.
+#### `tf.contrib.learn.DNNClassifier.export_savedmodel(export_dir_base, serving_input_fn, default_output_alternative_key=None, assets_extra=None, as_text=False)` {#DNNClassifier.export_savedmodel}
+Exports inference graph as a SavedModel into given dir.
##### Args:
* <b>`export_dir_base`</b>: A string containing a directory to write the exported
graph and checkpoints.
-* <b>`input_fn`</b>: A function that takes no argument and
+* <b>`serving_input_fn`</b>: A function that takes no argument and
returns an `InputFnOps`.
* <b>`default_output_alternative_key`</b>: the name of the head to serve when none is
- specified.
+ specified. Not needed for single-headed models.
* <b>`assets_extra`</b>: A dict specifying how to populate the assets.extra directory
within the exported SavedModel. Each key should give the destination
path (including the filename) relative to the assets.extra directory.
@@ -1062,7 +1055,6 @@ THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and with
renaming it is specified as
`{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`.
* <b>`as_text`</b>: whether to write the SavedModel proto in text format.
-* <b>`exports_to_keep`</b>: Number of exports to keep.
##### Returns:
@@ -1466,22 +1458,19 @@ See BaseEstimator.export.
- - -
-#### `tf.contrib.learn.DNNRegressor.export_savedmodel(*args, **kwargs)` {#DNNRegressor.export_savedmodel}
-
-Exports inference graph as a SavedModel into given dir. (experimental)
-
-THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and without warning.
+#### `tf.contrib.learn.DNNRegressor.export_savedmodel(export_dir_base, serving_input_fn, default_output_alternative_key=None, assets_extra=None, as_text=False)` {#DNNRegressor.export_savedmodel}
+Exports inference graph as a SavedModel into given dir.
##### Args:
* <b>`export_dir_base`</b>: A string containing a directory to write the exported
graph and checkpoints.
-* <b>`input_fn`</b>: A function that takes no argument and
+* <b>`serving_input_fn`</b>: A function that takes no argument and
returns an `InputFnOps`.
* <b>`default_output_alternative_key`</b>: the name of the head to serve when none is
- specified.
+ specified. Not needed for single-headed models.
* <b>`assets_extra`</b>: A dict specifying how to populate the assets.extra directory
within the exported SavedModel. Each key should give the destination
path (including the filename) relative to the assets.extra directory.
@@ -1490,7 +1479,6 @@ THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and with
renaming it is specified as
`{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`.
* <b>`as_text`</b>: whether to write the SavedModel proto in text format.
-* <b>`exports_to_keep`</b>: Number of exports to keep.
##### Returns:
@@ -1890,22 +1878,19 @@ See BaseEstimator.export.
- - -
-#### `tf.contrib.learn.LinearClassifier.export_savedmodel(*args, **kwargs)` {#LinearClassifier.export_savedmodel}
-
-Exports inference graph as a SavedModel into given dir. (experimental)
-
-THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and without warning.
+#### `tf.contrib.learn.LinearClassifier.export_savedmodel(export_dir_base, serving_input_fn, default_output_alternative_key=None, assets_extra=None, as_text=False)` {#LinearClassifier.export_savedmodel}
+Exports inference graph as a SavedModel into given dir.
##### Args:
* <b>`export_dir_base`</b>: A string containing a directory to write the exported
graph and checkpoints.
-* <b>`input_fn`</b>: A function that takes no argument and
+* <b>`serving_input_fn`</b>: A function that takes no argument and
returns an `InputFnOps`.
* <b>`default_output_alternative_key`</b>: the name of the head to serve when none is
- specified.
+ specified. Not needed for single-headed models.
* <b>`assets_extra`</b>: A dict specifying how to populate the assets.extra directory
within the exported SavedModel. Each key should give the destination
path (including the filename) relative to the assets.extra directory.
@@ -1914,7 +1899,6 @@ THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and with
renaming it is specified as
`{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`.
* <b>`as_text`</b>: whether to write the SavedModel proto in text format.
-* <b>`exports_to_keep`</b>: Number of exports to keep.
##### Returns:
@@ -2276,22 +2260,19 @@ See BaseEstimator.export.
- - -
-#### `tf.contrib.learn.LinearRegressor.export_savedmodel(*args, **kwargs)` {#LinearRegressor.export_savedmodel}
-
-Exports inference graph as a SavedModel into given dir. (experimental)
-
-THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and without warning.
+#### `tf.contrib.learn.LinearRegressor.export_savedmodel(export_dir_base, serving_input_fn, default_output_alternative_key=None, assets_extra=None, as_text=False)` {#LinearRegressor.export_savedmodel}
+Exports inference graph as a SavedModel into given dir.
##### Args:
* <b>`export_dir_base`</b>: A string containing a directory to write the exported
graph and checkpoints.
-* <b>`input_fn`</b>: A function that takes no argument and
+* <b>`serving_input_fn`</b>: A function that takes no argument and
returns an `InputFnOps`.
* <b>`default_output_alternative_key`</b>: the name of the head to serve when none is
- specified.
+ specified. Not needed for single-headed models.
* <b>`assets_extra`</b>: A dict specifying how to populate the assets.extra directory
within the exported SavedModel. Each key should give the destination
path (including the filename) relative to the assets.extra directory.
@@ -2300,7 +2281,6 @@ THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and with
renaming it is specified as
`{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`.
* <b>`as_text`</b>: whether to write the SavedModel proto in text format.
-* <b>`exports_to_keep`</b>: Number of exports to keep.
##### Returns:
diff --git a/tensorflow/g3doc/api_docs/python/contrib.learn.monitors.md b/tensorflow/g3doc/api_docs/python/contrib.learn.monitors.md
index c7e32f0437..dae7162a0d 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.learn.monitors.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.learn.monitors.md
@@ -2384,7 +2384,7 @@ Can do early stopping on validation metrics if `early_stopping_rounds` is
provided.
- - -
-#### `tf.contrib.learn.monitors.ValidationMonitor.__init__(x=None, y=None, input_fn=None, batch_size=None, eval_steps=None, every_n_steps=100, metrics=None, early_stopping_rounds=None, early_stopping_metric='loss', early_stopping_metric_minimize=True, name=None)` {#ValidationMonitor.__init__}
+#### `tf.contrib.learn.monitors.ValidationMonitor.__init__(x=None, y=None, input_fn=None, batch_size=None, eval_steps=None, every_n_steps=100, metrics=None, hooks=None, early_stopping_rounds=None, early_stopping_metric='loss', early_stopping_metric_minimize=True, name=None)` {#ValidationMonitor.__init__}
Initializes a ValidationMonitor.
@@ -2399,6 +2399,8 @@ Initializes a ValidationMonitor.
* <b>`every_n_steps`</b>: Check for new checkpoints to evaluate every N steps. If a
new checkpoint is found, it is evaluated. See `EveryN`.
* <b>`metrics`</b>: See `BaseEstimator.evaluate`.
+* <b>`hooks`</b>: A list of `SessionRunHook` hooks to pass to the
+ `Estimator`'s `evaluate` function.
* <b>`early_stopping_rounds`</b>: `int`. If the metric indicated by
`early_stopping_metric` does not change according to
`early_stopping_metric_minimize` for this many steps, then training
diff --git a/tensorflow/g3doc/api_docs/python/contrib.linalg.md b/tensorflow/g3doc/api_docs/python/contrib.linalg.md
index cbbffb1e78..509dc10e93 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.linalg.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.linalg.md
@@ -237,7 +237,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperator.batch_shape_dynamic(name='batch_shape_dynamic')` {#LinearOperator.batch_shape_dynamic}
+#### `tf.contrib.linalg.LinearOperator.batch_shape_tensor(name='batch_shape_tensor')` {#LinearOperator.batch_shape_tensor}
Shape of batch dimensions of this operator, determined at runtime.
@@ -287,7 +287,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperator.domain_dimension_dynamic(name='domain_dimension_dynamic')` {#LinearOperator.domain_dimension_dynamic}
+#### `tf.contrib.linalg.LinearOperator.domain_dimension_tensor(name='domain_dimension_tensor')` {#LinearOperator.domain_dimension_tensor}
Dimension (in the sense of vector spaces) of the domain of this operator.
@@ -380,7 +380,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperator.range_dimension_dynamic(name='range_dimension_dynamic')` {#LinearOperator.range_dimension_dynamic}
+#### `tf.contrib.linalg.LinearOperator.range_dimension_tensor(name='range_dimension_tensor')` {#LinearOperator.range_dimension_tensor}
Dimension (in the sense of vector spaces) of the range of this operator.
@@ -416,7 +416,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperator.shape_dynamic(name='shape_dynamic')` {#LinearOperator.shape_dynamic}
+#### `tf.contrib.linalg.LinearOperator.shape_tensor(name='shape_tensor')` {#LinearOperator.shape_tensor}
Shape of this `LinearOperator`, determined at runtime.
@@ -497,7 +497,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperator.tensor_rank_dynamic(name='tensor_rank_dynamic')` {#LinearOperator.tensor_rank_dynamic}
+#### `tf.contrib.linalg.LinearOperator.tensor_rank_tensor(name='tensor_rank_tensor')` {#LinearOperator.tensor_rank_tensor}
Rank (in the sense of tensors) of matrix corresponding to this operator.
@@ -720,7 +720,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorDiag.batch_shape_dynamic(name='batch_shape_dynamic')` {#LinearOperatorDiag.batch_shape_dynamic}
+#### `tf.contrib.linalg.LinearOperatorDiag.batch_shape_tensor(name='batch_shape_tensor')` {#LinearOperatorDiag.batch_shape_tensor}
Shape of batch dimensions of this operator, determined at runtime.
@@ -770,7 +770,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorDiag.domain_dimension_dynamic(name='domain_dimension_dynamic')` {#LinearOperatorDiag.domain_dimension_dynamic}
+#### `tf.contrib.linalg.LinearOperatorDiag.domain_dimension_tensor(name='domain_dimension_tensor')` {#LinearOperatorDiag.domain_dimension_tensor}
Dimension (in the sense of vector spaces) of the domain of this operator.
@@ -863,7 +863,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorDiag.range_dimension_dynamic(name='range_dimension_dynamic')` {#LinearOperatorDiag.range_dimension_dynamic}
+#### `tf.contrib.linalg.LinearOperatorDiag.range_dimension_tensor(name='range_dimension_tensor')` {#LinearOperatorDiag.range_dimension_tensor}
Dimension (in the sense of vector spaces) of the range of this operator.
@@ -899,7 +899,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorDiag.shape_dynamic(name='shape_dynamic')` {#LinearOperatorDiag.shape_dynamic}
+#### `tf.contrib.linalg.LinearOperatorDiag.shape_tensor(name='shape_tensor')` {#LinearOperatorDiag.shape_tensor}
Shape of this `LinearOperator`, determined at runtime.
@@ -980,7 +980,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorDiag.tensor_rank_dynamic(name='tensor_rank_dynamic')` {#LinearOperatorDiag.tensor_rank_dynamic}
+#### `tf.contrib.linalg.LinearOperatorDiag.tensor_rank_tensor(name='tensor_rank_tensor')` {#LinearOperatorDiag.tensor_rank_tensor}
Rank (in the sense of tensors) of matrix corresponding to this operator.
@@ -1237,7 +1237,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorIdentity.batch_shape_dynamic(name='batch_shape_dynamic')` {#LinearOperatorIdentity.batch_shape_dynamic}
+#### `tf.contrib.linalg.LinearOperatorIdentity.batch_shape_tensor(name='batch_shape_tensor')` {#LinearOperatorIdentity.batch_shape_tensor}
Shape of batch dimensions of this operator, determined at runtime.
@@ -1287,7 +1287,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorIdentity.domain_dimension_dynamic(name='domain_dimension_dynamic')` {#LinearOperatorIdentity.domain_dimension_dynamic}
+#### `tf.contrib.linalg.LinearOperatorIdentity.domain_dimension_tensor(name='domain_dimension_tensor')` {#LinearOperatorIdentity.domain_dimension_tensor}
Dimension (in the sense of vector spaces) of the domain of this operator.
@@ -1380,7 +1380,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorIdentity.range_dimension_dynamic(name='range_dimension_dynamic')` {#LinearOperatorIdentity.range_dimension_dynamic}
+#### `tf.contrib.linalg.LinearOperatorIdentity.range_dimension_tensor(name='range_dimension_tensor')` {#LinearOperatorIdentity.range_dimension_tensor}
Dimension (in the sense of vector spaces) of the range of this operator.
@@ -1416,7 +1416,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorIdentity.shape_dynamic(name='shape_dynamic')` {#LinearOperatorIdentity.shape_dynamic}
+#### `tf.contrib.linalg.LinearOperatorIdentity.shape_tensor(name='shape_tensor')` {#LinearOperatorIdentity.shape_tensor}
Shape of this `LinearOperator`, determined at runtime.
@@ -1497,7 +1497,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorIdentity.tensor_rank_dynamic(name='tensor_rank_dynamic')` {#LinearOperatorIdentity.tensor_rank_dynamic}
+#### `tf.contrib.linalg.LinearOperatorIdentity.tensor_rank_tensor(name='tensor_rank_tensor')` {#LinearOperatorIdentity.tensor_rank_tensor}
Rank (in the sense of tensors) of matrix corresponding to this operator.
@@ -1728,7 +1728,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorScaledIdentity.batch_shape_dynamic(name='batch_shape_dynamic')` {#LinearOperatorScaledIdentity.batch_shape_dynamic}
+#### `tf.contrib.linalg.LinearOperatorScaledIdentity.batch_shape_tensor(name='batch_shape_tensor')` {#LinearOperatorScaledIdentity.batch_shape_tensor}
Shape of batch dimensions of this operator, determined at runtime.
@@ -1778,7 +1778,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorScaledIdentity.domain_dimension_dynamic(name='domain_dimension_dynamic')` {#LinearOperatorScaledIdentity.domain_dimension_dynamic}
+#### `tf.contrib.linalg.LinearOperatorScaledIdentity.domain_dimension_tensor(name='domain_dimension_tensor')` {#LinearOperatorScaledIdentity.domain_dimension_tensor}
Dimension (in the sense of vector spaces) of the domain of this operator.
@@ -1878,7 +1878,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorScaledIdentity.range_dimension_dynamic(name='range_dimension_dynamic')` {#LinearOperatorScaledIdentity.range_dimension_dynamic}
+#### `tf.contrib.linalg.LinearOperatorScaledIdentity.range_dimension_tensor(name='range_dimension_tensor')` {#LinearOperatorScaledIdentity.range_dimension_tensor}
Dimension (in the sense of vector spaces) of the range of this operator.
@@ -1914,7 +1914,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorScaledIdentity.shape_dynamic(name='shape_dynamic')` {#LinearOperatorScaledIdentity.shape_dynamic}
+#### `tf.contrib.linalg.LinearOperatorScaledIdentity.shape_tensor(name='shape_tensor')` {#LinearOperatorScaledIdentity.shape_tensor}
Shape of this `LinearOperator`, determined at runtime.
@@ -1995,7 +1995,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorScaledIdentity.tensor_rank_dynamic(name='tensor_rank_dynamic')` {#LinearOperatorScaledIdentity.tensor_rank_dynamic}
+#### `tf.contrib.linalg.LinearOperatorScaledIdentity.tensor_rank_tensor(name='tensor_rank_tensor')` {#LinearOperatorScaledIdentity.tensor_rank_tensor}
Rank (in the sense of tensors) of matrix corresponding to this operator.
@@ -2209,7 +2209,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorMatrix.batch_shape_dynamic(name='batch_shape_dynamic')` {#LinearOperatorMatrix.batch_shape_dynamic}
+#### `tf.contrib.linalg.LinearOperatorMatrix.batch_shape_tensor(name='batch_shape_tensor')` {#LinearOperatorMatrix.batch_shape_tensor}
Shape of batch dimensions of this operator, determined at runtime.
@@ -2259,7 +2259,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorMatrix.domain_dimension_dynamic(name='domain_dimension_dynamic')` {#LinearOperatorMatrix.domain_dimension_dynamic}
+#### `tf.contrib.linalg.LinearOperatorMatrix.domain_dimension_tensor(name='domain_dimension_tensor')` {#LinearOperatorMatrix.domain_dimension_tensor}
Dimension (in the sense of vector spaces) of the domain of this operator.
@@ -2352,7 +2352,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorMatrix.range_dimension_dynamic(name='range_dimension_dynamic')` {#LinearOperatorMatrix.range_dimension_dynamic}
+#### `tf.contrib.linalg.LinearOperatorMatrix.range_dimension_tensor(name='range_dimension_tensor')` {#LinearOperatorMatrix.range_dimension_tensor}
Dimension (in the sense of vector spaces) of the range of this operator.
@@ -2388,7 +2388,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorMatrix.shape_dynamic(name='shape_dynamic')` {#LinearOperatorMatrix.shape_dynamic}
+#### `tf.contrib.linalg.LinearOperatorMatrix.shape_tensor(name='shape_tensor')` {#LinearOperatorMatrix.shape_tensor}
Shape of this `LinearOperator`, determined at runtime.
@@ -2469,7 +2469,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorMatrix.tensor_rank_dynamic(name='tensor_rank_dynamic')` {#LinearOperatorMatrix.tensor_rank_dynamic}
+#### `tf.contrib.linalg.LinearOperatorMatrix.tensor_rank_tensor(name='tensor_rank_tensor')` {#LinearOperatorMatrix.tensor_rank_tensor}
Rank (in the sense of tensors) of matrix corresponding to this operator.
@@ -2685,7 +2685,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorTriL.batch_shape_dynamic(name='batch_shape_dynamic')` {#LinearOperatorTriL.batch_shape_dynamic}
+#### `tf.contrib.linalg.LinearOperatorTriL.batch_shape_tensor(name='batch_shape_tensor')` {#LinearOperatorTriL.batch_shape_tensor}
Shape of batch dimensions of this operator, determined at runtime.
@@ -2735,7 +2735,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorTriL.domain_dimension_dynamic(name='domain_dimension_dynamic')` {#LinearOperatorTriL.domain_dimension_dynamic}
+#### `tf.contrib.linalg.LinearOperatorTriL.domain_dimension_tensor(name='domain_dimension_tensor')` {#LinearOperatorTriL.domain_dimension_tensor}
Dimension (in the sense of vector spaces) of the domain of this operator.
@@ -2828,7 +2828,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorTriL.range_dimension_dynamic(name='range_dimension_dynamic')` {#LinearOperatorTriL.range_dimension_dynamic}
+#### `tf.contrib.linalg.LinearOperatorTriL.range_dimension_tensor(name='range_dimension_tensor')` {#LinearOperatorTriL.range_dimension_tensor}
Dimension (in the sense of vector spaces) of the range of this operator.
@@ -2864,7 +2864,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorTriL.shape_dynamic(name='shape_dynamic')` {#LinearOperatorTriL.shape_dynamic}
+#### `tf.contrib.linalg.LinearOperatorTriL.shape_tensor(name='shape_tensor')` {#LinearOperatorTriL.shape_tensor}
Shape of this `LinearOperator`, determined at runtime.
@@ -2945,7 +2945,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorTriL.tensor_rank_dynamic(name='tensor_rank_dynamic')` {#LinearOperatorTriL.tensor_rank_dynamic}
+#### `tf.contrib.linalg.LinearOperatorTriL.tensor_rank_tensor(name='tensor_rank_tensor')` {#LinearOperatorTriL.tensor_rank_tensor}
Rank (in the sense of tensors) of matrix corresponding to this operator.
@@ -3172,7 +3172,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorComposition.batch_shape_dynamic(name='batch_shape_dynamic')` {#LinearOperatorComposition.batch_shape_dynamic}
+#### `tf.contrib.linalg.LinearOperatorComposition.batch_shape_tensor(name='batch_shape_tensor')` {#LinearOperatorComposition.batch_shape_tensor}
Shape of batch dimensions of this operator, determined at runtime.
@@ -3222,7 +3222,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorComposition.domain_dimension_dynamic(name='domain_dimension_dynamic')` {#LinearOperatorComposition.domain_dimension_dynamic}
+#### `tf.contrib.linalg.LinearOperatorComposition.domain_dimension_tensor(name='domain_dimension_tensor')` {#LinearOperatorComposition.domain_dimension_tensor}
Dimension (in the sense of vector spaces) of the domain of this operator.
@@ -3322,7 +3322,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorComposition.range_dimension_dynamic(name='range_dimension_dynamic')` {#LinearOperatorComposition.range_dimension_dynamic}
+#### `tf.contrib.linalg.LinearOperatorComposition.range_dimension_tensor(name='range_dimension_tensor')` {#LinearOperatorComposition.range_dimension_tensor}
Dimension (in the sense of vector spaces) of the range of this operator.
@@ -3358,7 +3358,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorComposition.shape_dynamic(name='shape_dynamic')` {#LinearOperatorComposition.shape_dynamic}
+#### `tf.contrib.linalg.LinearOperatorComposition.shape_tensor(name='shape_tensor')` {#LinearOperatorComposition.shape_tensor}
Shape of this `LinearOperator`, determined at runtime.
@@ -3439,7 +3439,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorComposition.tensor_rank_dynamic(name='tensor_rank_dynamic')` {#LinearOperatorComposition.tensor_rank_dynamic}
+#### `tf.contrib.linalg.LinearOperatorComposition.tensor_rank_tensor(name='tensor_rank_tensor')` {#LinearOperatorComposition.tensor_rank_tensor}
Rank (in the sense of tensors) of matrix corresponding to this operator.
diff --git a/tensorflow/g3doc/api_docs/python/contrib.metrics.md b/tensorflow/g3doc/api_docs/python/contrib.metrics.md
index 4d6cf8625c..1537865fc6 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.metrics.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.metrics.md
@@ -1469,10 +1469,10 @@ If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
##### Args:
-* <b>`predictions`</b>: The predicted values, a `bool` `Tensor` of arbitrary
- dimensions.
-* <b>`labels`</b>: The ground truth values, a `bool` `Tensor` whose dimensions must
- match `predictions`.
+* <b>`predictions`</b>: The predicted values, a `Tensor` of arbitrary dimensions. Will
+ be cast to `bool`.
+* <b>`labels`</b>: The ground truth values, a `Tensor` whose dimensions must match
+ `predictions`. Will be cast to `bool`.
* <b>`weights`</b>: Optional `Tensor` whose rank is either 0, or the same rank as
`labels`, and must be broadcastable to `labels` (i.e., all dimensions
must be either `1`, or the same as the corresponding `labels`
@@ -1515,10 +1515,10 @@ If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
##### Args:
-* <b>`predictions`</b>: The predicted values, a `bool` `Tensor` of arbitrary
- dimensions.
-* <b>`labels`</b>: The ground truth values, a `bool` `Tensor` whose dimensions must
- match `predictions`.
+* <b>`predictions`</b>: The predicted values, a `Tensor` of arbitrary dimensions. Will
+ be cast to `bool`.
+* <b>`labels`</b>: The ground truth values, a `Tensor` whose dimensions must match
+ `predictions`. Will be cast to `bool`.
* <b>`weights`</b>: Optional `Tensor` whose rank is either 0, or the same rank as
`labels`, and must be broadcastable to `labels` (i.e., all dimensions
must be either `1`, or the same as the corresponding `labels`
@@ -1562,10 +1562,10 @@ If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
##### Args:
-* <b>`predictions`</b>: The predicted values, a `bool` `Tensor` of arbitrary
- dimensions.
-* <b>`labels`</b>: The ground truth values, a `bool` `Tensor` whose dimensions must
- match `predictions`.
+* <b>`predictions`</b>: The predicted values, a `Tensor` of arbitrary dimensions. Will
+ be cast to `bool`.
+* <b>`labels`</b>: The ground truth values, a `Tensor` whose dimensions must match
+ `predictions`. Will be cast to `bool`.
* <b>`weights`</b>: Optional `Tensor` whose rank is either 0, or the same rank as
`labels`, and must be broadcastable to `labels` (i.e., all dimensions
must be either `1`, or the same as the corresponding `labels`
@@ -1609,10 +1609,10 @@ If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
##### Args:
-* <b>`predictions`</b>: The predicted values, a `bool` `Tensor` of arbitrary
- dimensions.
-* <b>`labels`</b>: The ground truth values, a `bool` `Tensor` whose dimensions must
- match `predictions`.
+* <b>`predictions`</b>: The predicted values, a `Tensor` of arbitrary dimensions. Will
+ be cast to `bool`.
+* <b>`labels`</b>: The ground truth values, a `Tensor` whose dimensions must match
+ `predictions`. Will be cast to `bool`.
* <b>`weights`</b>: Optional `Tensor` whose rank is either 0, or the same rank as
`labels`, and must be broadcastable to `labels` (i.e., all dimensions
must be either `1`, or the same as the corresponding `labels`
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.learn.LinearRegressor.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.learn.LinearRegressor.md
index 6009e8262e..f5b1ca422c 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.learn.LinearRegressor.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.learn.LinearRegressor.md
@@ -137,22 +137,19 @@ See BaseEstimator.export.
- - -
-#### `tf.contrib.learn.LinearRegressor.export_savedmodel(*args, **kwargs)` {#LinearRegressor.export_savedmodel}
-
-Exports inference graph as a SavedModel into given dir. (experimental)
-
-THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and without warning.
+#### `tf.contrib.learn.LinearRegressor.export_savedmodel(export_dir_base, serving_input_fn, default_output_alternative_key=None, assets_extra=None, as_text=False)` {#LinearRegressor.export_savedmodel}
+Exports inference graph as a SavedModel into given dir.
##### Args:
* <b>`export_dir_base`</b>: A string containing a directory to write the exported
graph and checkpoints.
-* <b>`input_fn`</b>: A function that takes no argument and
+* <b>`serving_input_fn`</b>: A function that takes no argument and
returns an `InputFnOps`.
* <b>`default_output_alternative_key`</b>: the name of the head to serve when none is
- specified.
+ specified. Not needed for single-headed models.
* <b>`assets_extra`</b>: A dict specifying how to populate the assets.extra directory
within the exported SavedModel. Each key should give the destination
path (including the filename) relative to the assets.extra directory.
@@ -161,7 +158,6 @@ THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and with
renaming it is specified as
`{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`.
* <b>`as_text`</b>: whether to write the SavedModel proto in text format.
-* <b>`exports_to_keep`</b>: Number of exports to keep.
##### Returns:
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.linalg.LinearOperatorDiag.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.linalg.LinearOperatorDiag.md
index a449b2f097..1900385928 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.linalg.LinearOperatorDiag.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.linalg.LinearOperatorDiag.md
@@ -189,7 +189,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorDiag.batch_shape_dynamic(name='batch_shape_dynamic')` {#LinearOperatorDiag.batch_shape_dynamic}
+#### `tf.contrib.linalg.LinearOperatorDiag.batch_shape_tensor(name='batch_shape_tensor')` {#LinearOperatorDiag.batch_shape_tensor}
Shape of batch dimensions of this operator, determined at runtime.
@@ -239,7 +239,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorDiag.domain_dimension_dynamic(name='domain_dimension_dynamic')` {#LinearOperatorDiag.domain_dimension_dynamic}
+#### `tf.contrib.linalg.LinearOperatorDiag.domain_dimension_tensor(name='domain_dimension_tensor')` {#LinearOperatorDiag.domain_dimension_tensor}
Dimension (in the sense of vector spaces) of the domain of this operator.
@@ -332,7 +332,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorDiag.range_dimension_dynamic(name='range_dimension_dynamic')` {#LinearOperatorDiag.range_dimension_dynamic}
+#### `tf.contrib.linalg.LinearOperatorDiag.range_dimension_tensor(name='range_dimension_tensor')` {#LinearOperatorDiag.range_dimension_tensor}
Dimension (in the sense of vector spaces) of the range of this operator.
@@ -368,7 +368,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorDiag.shape_dynamic(name='shape_dynamic')` {#LinearOperatorDiag.shape_dynamic}
+#### `tf.contrib.linalg.LinearOperatorDiag.shape_tensor(name='shape_tensor')` {#LinearOperatorDiag.shape_tensor}
Shape of this `LinearOperator`, determined at runtime.
@@ -449,7 +449,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorDiag.tensor_rank_dynamic(name='tensor_rank_dynamic')` {#LinearOperatorDiag.tensor_rank_dynamic}
+#### `tf.contrib.linalg.LinearOperatorDiag.tensor_rank_tensor(name='tensor_rank_tensor')` {#LinearOperatorDiag.tensor_rank_tensor}
Rank (in the sense of tensors) of matrix corresponding to this operator.
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.reverse_sequence.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.reverse_sequence.md
index b950cd5fe6..c6e8c748bf 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.reverse_sequence.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.reverse_sequence.md
@@ -6,7 +6,7 @@ This op first slices `input` along the dimension `batch_axis`, and for each
slice `i`, reverses the first `seq_lengths[i]` elements along
the dimension `seq_axis`.
-The elements of `seq_lengths` must obey `seq_lengths[i] < input.dims[seq_dim]`,
+The elements of `seq_lengths` must obey `seq_lengths[i] <= input.dims[seq_dim]`,
and `seq_lengths` must be a vector of length `input.dims[batch_dim]`.
The output slice `i` along dimension `batch_axis` is then given by input
@@ -63,7 +63,7 @@ output[2:, :, 3, :, ...] = input[2:, :, 3, :, ...]
* <b>`input`</b>: A `Tensor`. The input to reverse.
* <b>`seq_lengths`</b>: A `Tensor`. Must be one of the following types: `int32`, `int64`.
1-D with length `input.dims(batch_dim)` and
- `max(seq_lengths) < input.dims(seq_dim)`
+ `max(seq_lengths) <= input.dims(seq_dim)`
* <b>`seq_axis`</b>: An `int`. The dimension which is partially reversed.
* <b>`batch_axis`</b>: An optional `int`. Defaults to `0`.
The dimension along which reversal is performed.
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.learn.LinearClassifier.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.learn.LinearClassifier.md
index d649e42181..08de000315 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.learn.LinearClassifier.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.learn.LinearClassifier.md
@@ -165,22 +165,19 @@ See BaseEstimator.export.
- - -
-#### `tf.contrib.learn.LinearClassifier.export_savedmodel(*args, **kwargs)` {#LinearClassifier.export_savedmodel}
-
-Exports inference graph as a SavedModel into given dir. (experimental)
-
-THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and without warning.
+#### `tf.contrib.learn.LinearClassifier.export_savedmodel(export_dir_base, serving_input_fn, default_output_alternative_key=None, assets_extra=None, as_text=False)` {#LinearClassifier.export_savedmodel}
+Exports inference graph as a SavedModel into given dir.
##### Args:
* <b>`export_dir_base`</b>: A string containing a directory to write the exported
graph and checkpoints.
-* <b>`input_fn`</b>: A function that takes no argument and
+* <b>`serving_input_fn`</b>: A function that takes no argument and
returns an `InputFnOps`.
* <b>`default_output_alternative_key`</b>: the name of the head to serve when none is
- specified.
+ specified. Not needed for single-headed models.
* <b>`assets_extra`</b>: A dict specifying how to populate the assets.extra directory
within the exported SavedModel. Each key should give the destination
path (including the filename) relative to the assets.extra directory.
@@ -189,7 +186,6 @@ THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and with
renaming it is specified as
`{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`.
* <b>`as_text`</b>: whether to write the SavedModel proto in text format.
-* <b>`exports_to_keep`</b>: Number of exports to keep.
##### Returns:
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.learn.monitors.ValidationMonitor.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.learn.monitors.ValidationMonitor.md
index 2bafff8cdf..b24a86f1e1 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.learn.monitors.ValidationMonitor.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.learn.monitors.ValidationMonitor.md
@@ -7,7 +7,7 @@ Can do early stopping on validation metrics if `early_stopping_rounds` is
provided.
- - -
-#### `tf.contrib.learn.monitors.ValidationMonitor.__init__(x=None, y=None, input_fn=None, batch_size=None, eval_steps=None, every_n_steps=100, metrics=None, early_stopping_rounds=None, early_stopping_metric='loss', early_stopping_metric_minimize=True, name=None)` {#ValidationMonitor.__init__}
+#### `tf.contrib.learn.monitors.ValidationMonitor.__init__(x=None, y=None, input_fn=None, batch_size=None, eval_steps=None, every_n_steps=100, metrics=None, hooks=None, early_stopping_rounds=None, early_stopping_metric='loss', early_stopping_metric_minimize=True, name=None)` {#ValidationMonitor.__init__}
Initializes a ValidationMonitor.
@@ -22,6 +22,8 @@ Initializes a ValidationMonitor.
* <b>`every_n_steps`</b>: Check for new checkpoints to evaluate every N steps. If a
new checkpoint is found, it is evaluated. See `EveryN`.
* <b>`metrics`</b>: See `BaseEstimator.evaluate`.
+* <b>`hooks`</b>: A list of `SessionRunHook` hooks to pass to the
+ `Estimator`'s `evaluate` function.
* <b>`early_stopping_rounds`</b>: `int`. If the metric indicated by
`early_stopping_metric` does not change according to
`early_stopping_metric_minimize` for this many steps, then training
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.linalg.LinearOperatorComposition.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.linalg.LinearOperatorComposition.md
index 5e051e5ba8..ee7140922c 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.linalg.LinearOperatorComposition.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.linalg.LinearOperatorComposition.md
@@ -193,7 +193,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorComposition.batch_shape_dynamic(name='batch_shape_dynamic')` {#LinearOperatorComposition.batch_shape_dynamic}
+#### `tf.contrib.linalg.LinearOperatorComposition.batch_shape_tensor(name='batch_shape_tensor')` {#LinearOperatorComposition.batch_shape_tensor}
Shape of batch dimensions of this operator, determined at runtime.
@@ -243,7 +243,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorComposition.domain_dimension_dynamic(name='domain_dimension_dynamic')` {#LinearOperatorComposition.domain_dimension_dynamic}
+#### `tf.contrib.linalg.LinearOperatorComposition.domain_dimension_tensor(name='domain_dimension_tensor')` {#LinearOperatorComposition.domain_dimension_tensor}
Dimension (in the sense of vector spaces) of the domain of this operator.
@@ -343,7 +343,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorComposition.range_dimension_dynamic(name='range_dimension_dynamic')` {#LinearOperatorComposition.range_dimension_dynamic}
+#### `tf.contrib.linalg.LinearOperatorComposition.range_dimension_tensor(name='range_dimension_tensor')` {#LinearOperatorComposition.range_dimension_tensor}
Dimension (in the sense of vector spaces) of the range of this operator.
@@ -379,7 +379,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorComposition.shape_dynamic(name='shape_dynamic')` {#LinearOperatorComposition.shape_dynamic}
+#### `tf.contrib.linalg.LinearOperatorComposition.shape_tensor(name='shape_tensor')` {#LinearOperatorComposition.shape_tensor}
Shape of this `LinearOperator`, determined at runtime.
@@ -460,7 +460,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorComposition.tensor_rank_dynamic(name='tensor_rank_dynamic')` {#LinearOperatorComposition.tensor_rank_dynamic}
+#### `tf.contrib.linalg.LinearOperatorComposition.tensor_rank_tensor(name='tensor_rank_tensor')` {#LinearOperatorComposition.tensor_rank_tensor}
Rank (in the sense of tensors) of matrix corresponding to this operator.
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.linalg.LinearOperatorIdentity.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.linalg.LinearOperatorIdentity.md
index 37e711c819..f4d68516dc 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.linalg.LinearOperatorIdentity.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.linalg.LinearOperatorIdentity.md
@@ -226,7 +226,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorIdentity.batch_shape_dynamic(name='batch_shape_dynamic')` {#LinearOperatorIdentity.batch_shape_dynamic}
+#### `tf.contrib.linalg.LinearOperatorIdentity.batch_shape_tensor(name='batch_shape_tensor')` {#LinearOperatorIdentity.batch_shape_tensor}
Shape of batch dimensions of this operator, determined at runtime.
@@ -276,7 +276,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorIdentity.domain_dimension_dynamic(name='domain_dimension_dynamic')` {#LinearOperatorIdentity.domain_dimension_dynamic}
+#### `tf.contrib.linalg.LinearOperatorIdentity.domain_dimension_tensor(name='domain_dimension_tensor')` {#LinearOperatorIdentity.domain_dimension_tensor}
Dimension (in the sense of vector spaces) of the domain of this operator.
@@ -369,7 +369,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorIdentity.range_dimension_dynamic(name='range_dimension_dynamic')` {#LinearOperatorIdentity.range_dimension_dynamic}
+#### `tf.contrib.linalg.LinearOperatorIdentity.range_dimension_tensor(name='range_dimension_tensor')` {#LinearOperatorIdentity.range_dimension_tensor}
Dimension (in the sense of vector spaces) of the range of this operator.
@@ -405,7 +405,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorIdentity.shape_dynamic(name='shape_dynamic')` {#LinearOperatorIdentity.shape_dynamic}
+#### `tf.contrib.linalg.LinearOperatorIdentity.shape_tensor(name='shape_tensor')` {#LinearOperatorIdentity.shape_tensor}
Shape of this `LinearOperator`, determined at runtime.
@@ -486,7 +486,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorIdentity.tensor_rank_dynamic(name='tensor_rank_dynamic')` {#LinearOperatorIdentity.tensor_rank_dynamic}
+#### `tf.contrib.linalg.LinearOperatorIdentity.tensor_rank_tensor(name='tensor_rank_tensor')` {#LinearOperatorIdentity.tensor_rank_tensor}
Rank (in the sense of tensors) of matrix corresponding to this operator.
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.train.LoggingTensorHook.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.train.LoggingTensorHook.md
index 519d5f253e..e76b7838ed 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.train.LoggingTensorHook.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.train.LoggingTensorHook.md
@@ -3,7 +3,7 @@ Prints the given tensors once every N local steps or once every N seconds.
The tensors will be printed to the log, with `INFO` severity.
- - -
-#### `tf.train.LoggingTensorHook.__init__(tensors, every_n_iter=None, every_n_secs=None)` {#LoggingTensorHook.__init__}
+#### `tf.train.LoggingTensorHook.__init__(tensors, every_n_iter=None, every_n_secs=None, formatter=None)` {#LoggingTensorHook.__init__}
Initializes a LoggingHook monitor.
@@ -17,6 +17,8 @@ Initializes a LoggingHook monitor.
* <b>`every_n_secs`</b>: `int` or `float`, print the values of `tensors` once every N
seconds. Exactly one of `every_n_iter` and `every_n_secs` should be
provided.
+* <b>`formatter`</b>: function, takes dict of `tag`->`Tensor` and returns a string.
+ If `None` uses default printing all tensors.
##### Raises:
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.train.MonitoredTrainingSession.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.train.MonitoredTrainingSession.md
index 19cec59080..254e28a70a 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.train.MonitoredTrainingSession.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.train.MonitoredTrainingSession.md
@@ -1,4 +1,4 @@
-### `tf.train.MonitoredTrainingSession(master='', is_chief=True, checkpoint_dir=None, scaffold=None, hooks=None, chief_only_hooks=None, save_checkpoint_secs=600, save_summaries_steps=100, config=None)` {#MonitoredTrainingSession}
+### `tf.train.MonitoredTrainingSession(master='', is_chief=True, checkpoint_dir=None, scaffold=None, hooks=None, chief_only_hooks=None, save_checkpoint_secs=600, save_summaries_steps=100, save_summaries_secs=None, config=None)` {#MonitoredTrainingSession}
Creates a `MonitoredSession` for training.
@@ -26,8 +26,12 @@ inialize/restore.
using a default checkpoint saver. If `save_checkpoint_secs` is set to
`None`, then the default checkpoint saver isn't used.
* <b>`save_summaries_steps`</b>: The frequency, in number of global steps, that the
- summaries are written to disk using a default summary saver. If
- `save_summaries_steps` is set to `None`, then the default summary saver
+ summaries are written to disk using a default summary saver. If both
+ `save_summaries_steps` and `save_summaries_secs` are set to `None`, then
+ the default summary saver isn't used.
+* <b>`save_summaries_secs`</b>: The frequency, in secs, that the summaries are written
+ to disk using a default summary saver. If both `save_summaries_steps` and
+ `save_summaries_secs` are set to `None`, then the default summary saver
isn't used.
* <b>`config`</b>: an instance of `tf.ConfigProto` proto used to configure the session.
It's the `config` argument of constructor of `tf.Session`.
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf_debug.LocalCLIDebugHook.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf_debug.LocalCLIDebugHook.md
index 851a1d2210..eeb4226633 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf_debug.LocalCLIDebugHook.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf_debug.LocalCLIDebugHook.md
@@ -34,12 +34,18 @@ Create a local debugger command-line interface (CLI) hook.
Add a tensor filter.
+See doc of `LocalCLIDebugWrapperSession.add_tensor_filter()` for details.
+Override default behavior to accomodate the possibility of this method being
+called prior to the initialization of the underlying
+`LocalCLIDebugWrapperSession` object.
+
##### Args:
-* <b>`filter_name`</b>: (`str`) name of the filter.
-* <b>`tensor_filter`</b>: (`callable`) the filter callable. See the doc string of
- `DebugDumpDir.find()` for more details about its signature.
+* <b>`filter_name`</b>: See doc of `LocalCLIDebugWrapperSession.add_tensor_filter()`
+ for details.
+* <b>`tensor_filter`</b>: See doc of
+ `LocalCLIDebugWrapperSession.add_tensor_filter()` for details.
- - -
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.neg.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.neg.md
deleted file mode 100644
index 519fd9a875..0000000000
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.neg.md
+++ /dev/null
@@ -1,16 +0,0 @@
-### `tf.neg(x, name=None)` {#neg}
-
-Computes numerical negative value element-wise.
-
-I.e., \\(y = -x\\).
-
-##### Args:
-
-
-* <b>`x`</b>: A `Tensor`. Must be one of the following types: `half`, `float32`, `float64`, `int32`, `int64`, `complex64`, `complex128`.
-* <b>`name`</b>: A name for the operation (optional).
-
-##### Returns:
-
- A `Tensor`. Has the same type as `x`.
-
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.learn.Estimator.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.learn.Estimator.md
index e1caff4de8..9b900ac378 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.learn.Estimator.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.learn.Estimator.md
@@ -157,22 +157,19 @@ The signature of the input_fn accepted by export is changing to be consistent wi
- - -
-#### `tf.contrib.learn.Estimator.export_savedmodel(*args, **kwargs)` {#Estimator.export_savedmodel}
-
-Exports inference graph as a SavedModel into given dir. (experimental)
-
-THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and without warning.
+#### `tf.contrib.learn.Estimator.export_savedmodel(export_dir_base, serving_input_fn, default_output_alternative_key=None, assets_extra=None, as_text=False)` {#Estimator.export_savedmodel}
+Exports inference graph as a SavedModel into given dir.
##### Args:
* <b>`export_dir_base`</b>: A string containing a directory to write the exported
graph and checkpoints.
-* <b>`input_fn`</b>: A function that takes no argument and
+* <b>`serving_input_fn`</b>: A function that takes no argument and
returns an `InputFnOps`.
* <b>`default_output_alternative_key`</b>: the name of the head to serve when none is
- specified.
+ specified. Not needed for single-headed models.
* <b>`assets_extra`</b>: A dict specifying how to populate the assets.extra directory
within the exported SavedModel. Each key should give the destination
path (including the filename) relative to the assets.extra directory.
@@ -181,7 +178,6 @@ THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and with
renaming it is specified as
`{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`.
* <b>`as_text`</b>: whether to write the SavedModel proto in text format.
-* <b>`exports_to_keep`</b>: Number of exports to keep.
##### Returns:
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.linalg.LinearOperatorScaledIdentity.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.linalg.LinearOperatorScaledIdentity.md
index 9cef244fe4..f37278eb55 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.linalg.LinearOperatorScaledIdentity.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.linalg.LinearOperatorScaledIdentity.md
@@ -200,7 +200,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorScaledIdentity.batch_shape_dynamic(name='batch_shape_dynamic')` {#LinearOperatorScaledIdentity.batch_shape_dynamic}
+#### `tf.contrib.linalg.LinearOperatorScaledIdentity.batch_shape_tensor(name='batch_shape_tensor')` {#LinearOperatorScaledIdentity.batch_shape_tensor}
Shape of batch dimensions of this operator, determined at runtime.
@@ -250,7 +250,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorScaledIdentity.domain_dimension_dynamic(name='domain_dimension_dynamic')` {#LinearOperatorScaledIdentity.domain_dimension_dynamic}
+#### `tf.contrib.linalg.LinearOperatorScaledIdentity.domain_dimension_tensor(name='domain_dimension_tensor')` {#LinearOperatorScaledIdentity.domain_dimension_tensor}
Dimension (in the sense of vector spaces) of the domain of this operator.
@@ -350,7 +350,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorScaledIdentity.range_dimension_dynamic(name='range_dimension_dynamic')` {#LinearOperatorScaledIdentity.range_dimension_dynamic}
+#### `tf.contrib.linalg.LinearOperatorScaledIdentity.range_dimension_tensor(name='range_dimension_tensor')` {#LinearOperatorScaledIdentity.range_dimension_tensor}
Dimension (in the sense of vector spaces) of the range of this operator.
@@ -386,7 +386,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorScaledIdentity.shape_dynamic(name='shape_dynamic')` {#LinearOperatorScaledIdentity.shape_dynamic}
+#### `tf.contrib.linalg.LinearOperatorScaledIdentity.shape_tensor(name='shape_tensor')` {#LinearOperatorScaledIdentity.shape_tensor}
Shape of this `LinearOperator`, determined at runtime.
@@ -467,7 +467,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorScaledIdentity.tensor_rank_dynamic(name='tensor_rank_dynamic')` {#LinearOperatorScaledIdentity.tensor_rank_dynamic}
+#### `tf.contrib.linalg.LinearOperatorScaledIdentity.tensor_rank_tensor(name='tensor_rank_tensor')` {#LinearOperatorScaledIdentity.tensor_rank_tensor}
Rank (in the sense of tensors) of matrix corresponding to this operator.
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.learn.DNNClassifier.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.learn.DNNClassifier.md
index dd5d361619..b1f95ca2ae 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.learn.DNNClassifier.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.learn.DNNClassifier.md
@@ -165,22 +165,19 @@ See BaseEstimator.export.
- - -
-#### `tf.contrib.learn.DNNClassifier.export_savedmodel(*args, **kwargs)` {#DNNClassifier.export_savedmodel}
-
-Exports inference graph as a SavedModel into given dir. (experimental)
-
-THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and without warning.
+#### `tf.contrib.learn.DNNClassifier.export_savedmodel(export_dir_base, serving_input_fn, default_output_alternative_key=None, assets_extra=None, as_text=False)` {#DNNClassifier.export_savedmodel}
+Exports inference graph as a SavedModel into given dir.
##### Args:
* <b>`export_dir_base`</b>: A string containing a directory to write the exported
graph and checkpoints.
-* <b>`input_fn`</b>: A function that takes no argument and
+* <b>`serving_input_fn`</b>: A function that takes no argument and
returns an `InputFnOps`.
* <b>`default_output_alternative_key`</b>: the name of the head to serve when none is
- specified.
+ specified. Not needed for single-headed models.
* <b>`assets_extra`</b>: A dict specifying how to populate the assets.extra directory
within the exported SavedModel. Each key should give the destination
path (including the filename) relative to the assets.extra directory.
@@ -189,7 +186,6 @@ THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and with
renaming it is specified as
`{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`.
* <b>`as_text`</b>: whether to write the SavedModel proto in text format.
-* <b>`exports_to_keep`</b>: Number of exports to keep.
##### Returns:
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.metrics.streaming_true_positives.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.metrics.streaming_true_positives.md
index aa3019dbf4..a022639c94 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.metrics.streaming_true_positives.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.metrics.streaming_true_positives.md
@@ -7,10 +7,10 @@ If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
##### Args:
-* <b>`predictions`</b>: The predicted values, a `bool` `Tensor` of arbitrary
- dimensions.
-* <b>`labels`</b>: The ground truth values, a `bool` `Tensor` whose dimensions must
- match `predictions`.
+* <b>`predictions`</b>: The predicted values, a `Tensor` of arbitrary dimensions. Will
+ be cast to `bool`.
+* <b>`labels`</b>: The ground truth values, a `Tensor` whose dimensions must match
+ `predictions`. Will be cast to `bool`.
* <b>`weights`</b>: Optional `Tensor` whose rank is either 0, or the same rank as
`labels`, and must be broadcastable to `labels` (i.e., all dimensions
must be either `1`, or the same as the corresponding `labels`
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.sparse_softmax_cross_entropy_with_logits.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.sparse_softmax_cross_entropy_with_logits.md
index bcf0156924..0aa696ba2f 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.sparse_softmax_cross_entropy_with_logits.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.sparse_softmax_cross_entropy_with_logits.md
@@ -28,13 +28,13 @@ this function.**
_sentinel: Used to prevent positional parameters. Internal, do not use.
-* <b>`labels`</b>: `Tensor` of shape `[d_0, d_1, ..., d_{r-2}]` and dtype `int32` or
- `int64`. Each entry in `labels` must be an index in `[0, num_classes)`.
- Other values will raise an exception when this op is run on CPU, and
- return `NaN` for corresponding corresponding loss and gradient rows
- on GPU.
-* <b>`logits`</b>: Unscaled log probabilities of rank `r` and shape
- `[d_0, d_1, ..., d_{r-2}, num_classes]` and dtype `float32` or `float64`.
+* <b>`labels`</b>: `Tensor` of shape `[d_0, d_1, ..., d_{r-1}]` (where `r` is rank of
+ `labels` and result) and dtype `int32` or `int64`. Each entry in `labels`
+ must be an index in `[0, num_classes)`. Other values will raise an
+ exception when this op is run on CPU, and return `NaN` for corresponding
+ loss and gradient rows on GPU.
+* <b>`logits`</b>: Unscaled log probabilities of shape
+ `[d_0, d_1, ..., d_{r-1}, num_classes]` and dtype `float32` or `float64`.
* <b>`name`</b>: A name for the operation (optional).
##### Returns:
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.train.write_graph.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.train.write_graph.md
index 872705a482..33e1f1c591 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.train.write_graph.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.train.write_graph.md
@@ -27,3 +27,7 @@ tf.train.write_graph(sess.graph, '/tmp/my-model', 'train.pbtxt')
* <b>`name`</b>: Filename for the graph.
* <b>`as_text`</b>: If `True`, writes the graph as an ASCII proto.
+##### Returns:
+
+ The path of the output proto file.
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.metrics.streaming_false_positives.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.metrics.streaming_false_positives.md
index c31a7c68dc..d3f748fec7 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.metrics.streaming_false_positives.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.metrics.streaming_false_positives.md
@@ -7,10 +7,10 @@ If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
##### Args:
-* <b>`predictions`</b>: The predicted values, a `bool` `Tensor` of arbitrary
- dimensions.
-* <b>`labels`</b>: The ground truth values, a `bool` `Tensor` whose dimensions must
- match `predictions`.
+* <b>`predictions`</b>: The predicted values, a `Tensor` of arbitrary dimensions. Will
+ be cast to `bool`.
+* <b>`labels`</b>: The ground truth values, a `Tensor` whose dimensions must match
+ `predictions`. Will be cast to `bool`.
* <b>`weights`</b>: Optional `Tensor` whose rank is either 0, or the same rank as
`labels`, and must be broadcastable to `labels` (i.e., all dimensions
must be either `1`, or the same as the corresponding `labels`
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.graph_editor.assign_renamed_collections_handler.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.graph_editor.assign_renamed_collections_handler.md
index 05b2eba532..153da470ea 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.graph_editor.assign_renamed_collections_handler.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.graph_editor.assign_renamed_collections_handler.md
@@ -2,6 +2,9 @@
Add the transformed elem to the (renamed) collections of elem.
+A collection is renamed only if is not a known key, as described in
+`tf.GraphKeys`.
+
##### Args:
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.linalg.LinearOperatorMatrix.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.linalg.LinearOperatorMatrix.md
index 40bb846034..af1ab47660 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.linalg.LinearOperatorMatrix.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.linalg.LinearOperatorMatrix.md
@@ -183,7 +183,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorMatrix.batch_shape_dynamic(name='batch_shape_dynamic')` {#LinearOperatorMatrix.batch_shape_dynamic}
+#### `tf.contrib.linalg.LinearOperatorMatrix.batch_shape_tensor(name='batch_shape_tensor')` {#LinearOperatorMatrix.batch_shape_tensor}
Shape of batch dimensions of this operator, determined at runtime.
@@ -233,7 +233,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorMatrix.domain_dimension_dynamic(name='domain_dimension_dynamic')` {#LinearOperatorMatrix.domain_dimension_dynamic}
+#### `tf.contrib.linalg.LinearOperatorMatrix.domain_dimension_tensor(name='domain_dimension_tensor')` {#LinearOperatorMatrix.domain_dimension_tensor}
Dimension (in the sense of vector spaces) of the domain of this operator.
@@ -326,7 +326,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorMatrix.range_dimension_dynamic(name='range_dimension_dynamic')` {#LinearOperatorMatrix.range_dimension_dynamic}
+#### `tf.contrib.linalg.LinearOperatorMatrix.range_dimension_tensor(name='range_dimension_tensor')` {#LinearOperatorMatrix.range_dimension_tensor}
Dimension (in the sense of vector spaces) of the range of this operator.
@@ -362,7 +362,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorMatrix.shape_dynamic(name='shape_dynamic')` {#LinearOperatorMatrix.shape_dynamic}
+#### `tf.contrib.linalg.LinearOperatorMatrix.shape_tensor(name='shape_tensor')` {#LinearOperatorMatrix.shape_tensor}
Shape of this `LinearOperator`, determined at runtime.
@@ -443,7 +443,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorMatrix.tensor_rank_dynamic(name='tensor_rank_dynamic')` {#LinearOperatorMatrix.tensor_rank_dynamic}
+#### `tf.contrib.linalg.LinearOperatorMatrix.tensor_rank_tensor(name='tensor_rank_tensor')` {#LinearOperatorMatrix.tensor_rank_tensor}
Rank (in the sense of tensors) of matrix corresponding to this operator.
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.linalg.LinearOperatorTriL.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.linalg.LinearOperatorTriL.md
index 5454b65f26..13e8d3395a 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.linalg.LinearOperatorTriL.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.linalg.LinearOperatorTriL.md
@@ -185,7 +185,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorTriL.batch_shape_dynamic(name='batch_shape_dynamic')` {#LinearOperatorTriL.batch_shape_dynamic}
+#### `tf.contrib.linalg.LinearOperatorTriL.batch_shape_tensor(name='batch_shape_tensor')` {#LinearOperatorTriL.batch_shape_tensor}
Shape of batch dimensions of this operator, determined at runtime.
@@ -235,7 +235,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorTriL.domain_dimension_dynamic(name='domain_dimension_dynamic')` {#LinearOperatorTriL.domain_dimension_dynamic}
+#### `tf.contrib.linalg.LinearOperatorTriL.domain_dimension_tensor(name='domain_dimension_tensor')` {#LinearOperatorTriL.domain_dimension_tensor}
Dimension (in the sense of vector spaces) of the domain of this operator.
@@ -328,7 +328,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorTriL.range_dimension_dynamic(name='range_dimension_dynamic')` {#LinearOperatorTriL.range_dimension_dynamic}
+#### `tf.contrib.linalg.LinearOperatorTriL.range_dimension_tensor(name='range_dimension_tensor')` {#LinearOperatorTriL.range_dimension_tensor}
Dimension (in the sense of vector spaces) of the range of this operator.
@@ -364,7 +364,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorTriL.shape_dynamic(name='shape_dynamic')` {#LinearOperatorTriL.shape_dynamic}
+#### `tf.contrib.linalg.LinearOperatorTriL.shape_tensor(name='shape_tensor')` {#LinearOperatorTriL.shape_tensor}
Shape of this `LinearOperator`, determined at runtime.
@@ -445,7 +445,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperatorTriL.tensor_rank_dynamic(name='tensor_rank_dynamic')` {#LinearOperatorTriL.tensor_rank_dynamic}
+#### `tf.contrib.linalg.LinearOperatorTriL.tensor_rank_tensor(name='tensor_rank_tensor')` {#LinearOperatorTriL.tensor_rank_tensor}
Rank (in the sense of tensors) of matrix corresponding to this operator.
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.metrics.streaming_true_negatives.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.metrics.streaming_true_negatives.md
index d8f12ab9eb..5b9dfd33f4 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.metrics.streaming_true_negatives.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.metrics.streaming_true_negatives.md
@@ -7,10 +7,10 @@ If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
##### Args:
-* <b>`predictions`</b>: The predicted values, a `bool` `Tensor` of arbitrary
- dimensions.
-* <b>`labels`</b>: The ground truth values, a `bool` `Tensor` whose dimensions must
- match `predictions`.
+* <b>`predictions`</b>: The predicted values, a `Tensor` of arbitrary dimensions. Will
+ be cast to `bool`.
+* <b>`labels`</b>: The ground truth values, a `Tensor` whose dimensions must match
+ `predictions`. Will be cast to `bool`.
* <b>`weights`</b>: Optional `Tensor` whose rank is either 0, or the same rank as
`labels`, and must be broadcastable to `labels` (i.e., all dimensions
must be either `1`, or the same as the corresponding `labels`
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.learn.DNNRegressor.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.learn.DNNRegressor.md
index 5934a587fe..22e7531e78 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.learn.DNNRegressor.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.learn.DNNRegressor.md
@@ -129,22 +129,19 @@ See BaseEstimator.export.
- - -
-#### `tf.contrib.learn.DNNRegressor.export_savedmodel(*args, **kwargs)` {#DNNRegressor.export_savedmodel}
-
-Exports inference graph as a SavedModel into given dir. (experimental)
-
-THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and without warning.
+#### `tf.contrib.learn.DNNRegressor.export_savedmodel(export_dir_base, serving_input_fn, default_output_alternative_key=None, assets_extra=None, as_text=False)` {#DNNRegressor.export_savedmodel}
+Exports inference graph as a SavedModel into given dir.
##### Args:
* <b>`export_dir_base`</b>: A string containing a directory to write the exported
graph and checkpoints.
-* <b>`input_fn`</b>: A function that takes no argument and
+* <b>`serving_input_fn`</b>: A function that takes no argument and
returns an `InputFnOps`.
* <b>`default_output_alternative_key`</b>: the name of the head to serve when none is
- specified.
+ specified. Not needed for single-headed models.
* <b>`assets_extra`</b>: A dict specifying how to populate the assets.extra directory
within the exported SavedModel. Each key should give the destination
path (including the filename) relative to the assets.extra directory.
@@ -153,7 +150,6 @@ THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and with
renaming it is specified as
`{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`.
* <b>`as_text`</b>: whether to write the SavedModel proto in text format.
-* <b>`exports_to_keep`</b>: Number of exports to keep.
##### Returns:
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.linalg.LinearOperator.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.linalg.LinearOperator.md
index a07c373774..41a5a1cb74 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.linalg.LinearOperator.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.linalg.LinearOperator.md
@@ -215,7 +215,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperator.batch_shape_dynamic(name='batch_shape_dynamic')` {#LinearOperator.batch_shape_dynamic}
+#### `tf.contrib.linalg.LinearOperator.batch_shape_tensor(name='batch_shape_tensor')` {#LinearOperator.batch_shape_tensor}
Shape of batch dimensions of this operator, determined at runtime.
@@ -265,7 +265,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperator.domain_dimension_dynamic(name='domain_dimension_dynamic')` {#LinearOperator.domain_dimension_dynamic}
+#### `tf.contrib.linalg.LinearOperator.domain_dimension_tensor(name='domain_dimension_tensor')` {#LinearOperator.domain_dimension_tensor}
Dimension (in the sense of vector spaces) of the domain of this operator.
@@ -358,7 +358,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperator.range_dimension_dynamic(name='range_dimension_dynamic')` {#LinearOperator.range_dimension_dynamic}
+#### `tf.contrib.linalg.LinearOperator.range_dimension_tensor(name='range_dimension_tensor')` {#LinearOperator.range_dimension_tensor}
Dimension (in the sense of vector spaces) of the range of this operator.
@@ -394,7 +394,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperator.shape_dynamic(name='shape_dynamic')` {#LinearOperator.shape_dynamic}
+#### `tf.contrib.linalg.LinearOperator.shape_tensor(name='shape_tensor')` {#LinearOperator.shape_tensor}
Shape of this `LinearOperator`, determined at runtime.
@@ -475,7 +475,7 @@ If this operator acts like the batch matrix `A` with
- - -
-#### `tf.contrib.linalg.LinearOperator.tensor_rank_dynamic(name='tensor_rank_dynamic')` {#LinearOperator.tensor_rank_dynamic}
+#### `tf.contrib.linalg.LinearOperator.tensor_rank_tensor(name='tensor_rank_tensor')` {#LinearOperator.tensor_rank_tensor}
Rank (in the sense of tensors) of matrix corresponding to this operator.
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.metrics.streaming_false_negatives.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.metrics.streaming_false_negatives.md
index 878ba46941..1464305257 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.metrics.streaming_false_negatives.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.metrics.streaming_false_negatives.md
@@ -7,10 +7,10 @@ If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
##### Args:
-* <b>`predictions`</b>: The predicted values, a `bool` `Tensor` of arbitrary
- dimensions.
-* <b>`labels`</b>: The ground truth values, a `bool` `Tensor` whose dimensions must
- match `predictions`.
+* <b>`predictions`</b>: The predicted values, a `Tensor` of arbitrary dimensions. Will
+ be cast to `bool`.
+* <b>`labels`</b>: The ground truth values, a `Tensor` whose dimensions must match
+ `predictions`. Will be cast to `bool`.
* <b>`weights`</b>: Optional `Tensor` whose rank is either 0, or the same rank as
`labels`, and must be broadcastable to `labels` (i.e., all dimensions
must be either `1`, or the same as the corresponding `labels`
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.get_local_variable.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.get_local_variable.md
index 9026066f66..c425a3e64b 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.get_local_variable.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.get_local_variable.md
@@ -14,7 +14,7 @@ for an extensive description of how reusing works. Here is a basic example:
with tf.variable_scope("foo"):
v = tf.get_variable("v", [1]) # v.name == "foo/v:0"
w = tf.get_variable("w", [1]) # w.name == "foo/w:0"
-with tf.variable_scope("foo", reuse=True)
+with tf.variable_scope("foo", reuse=True):
v1 = tf.get_variable("v") # The same as v above.
```
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.get_variable.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.get_variable.md
index c7040d28da..f09098eb51 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.get_variable.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.get_variable.md
@@ -11,7 +11,7 @@ for an extensive description of how reusing works. Here is a basic example:
with tf.variable_scope("foo"):
v = tf.get_variable("v", [1]) # v.name == "foo/v:0"
w = tf.get_variable("w", [1]) # w.name == "foo/w:0"
-with tf.variable_scope("foo", reuse=True)
+with tf.variable_scope("foo", reuse=True):
v1 = tf.get_variable("v") # The same as v above.
```
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.train.FeedFnHook.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.train.FeedFnHook.md
new file mode 100644
index 0000000000..1797a0d3b5
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.train.FeedFnHook.md
@@ -0,0 +1,88 @@
+Runs `feed_fn` and sets the `feed_dict` accordingly.
+- - -
+
+#### `tf.train.FeedFnHook.__init__(feed_fn)` {#FeedFnHook.__init__}
+
+Constructs the FeedFnHook with given `feed_fn`.
+
+##### Args:
+
+
+* <b>`feed_fn`</b>: function, no arguments and returns `dict` to feed.
+
+
+- - -
+
+#### `tf.train.FeedFnHook.after_create_session(session, coord)` {#FeedFnHook.after_create_session}
+
+Called when new TensorFlow session is created.
+
+This is called to signal the hooks that a new session has been created. This
+has two essential differences with the situation in which `begin` is called:
+
+* When this is called, the graph is finalized and ops can no longer be added
+ to the graph.
+* This method will also be called as a result of recovering a wrapped
+ session, not only at the beginning of the overall session.
+
+##### Args:
+
+
+* <b>`session`</b>: A TensorFlow Session that has been created.
+* <b>`coord`</b>: A Coordinator object which keeps track of all threads.
+
+
+- - -
+
+#### `tf.train.FeedFnHook.after_run(run_context, run_values)` {#FeedFnHook.after_run}
+
+Called after each call to run().
+
+The `run_values` argument contains results of requested ops/tensors by
+`before_run()`.
+
+The `run_context` argument is the same one send to `before_run` call.
+`run_context.request_stop()` can be called to stop the iteration.
+
+##### Args:
+
+
+* <b>`run_context`</b>: A `SessionRunContext` object.
+* <b>`run_values`</b>: A SessionRunValues object.
+
+
+- - -
+
+#### `tf.train.FeedFnHook.before_run(run_context)` {#FeedFnHook.before_run}
+
+
+
+
+- - -
+
+#### `tf.train.FeedFnHook.begin()` {#FeedFnHook.begin}
+
+Called once before using the session.
+
+When called, the default graph is the one that will be launched in the
+session. The hook can modify the graph by adding new operations to it.
+After the `begin()` call the graph will be finalized and the other callbacks
+can not modify the graph anymore. Second call of `begin()` on the same
+graph, should not change the graph.
+
+
+- - -
+
+#### `tf.train.FeedFnHook.end(session)` {#FeedFnHook.end}
+
+Called at the end of session.
+
+The `session` argument can be used in case the hook wants to run final ops,
+such as saving a last checkpoint.
+
+##### Args:
+
+
+* <b>`session`</b>: A TensorFlow Session that will be soon closed.
+
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.train.FinalOpsHook.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.train.FinalOpsHook.md
new file mode 100644
index 0000000000..bf8e7184b6
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.train.FinalOpsHook.md
@@ -0,0 +1,111 @@
+A run hook which evaluates `Tensors` at the end of a session.
+- - -
+
+#### `tf.train.FinalOpsHook.__init__(final_ops, final_ops_feed_dict=None)` {#FinalOpsHook.__init__}
+
+Constructs the FinalOpHook with ops to run at the end of the session.
+
+##### Args:
+
+
+* <b>`final_ops`</b>: A single `Tensor`, a list of `Tensors` or a dictionary of
+ names to `Tensors`.
+* <b>`final_ops_feed_dict`</b>: A feed dictionary to use when running
+ `final_ops_dict`.
+
+
+- - -
+
+#### `tf.train.FinalOpsHook.after_create_session(session, coord)` {#FinalOpsHook.after_create_session}
+
+Called when new TensorFlow session is created.
+
+This is called to signal the hooks that a new session has been created. This
+has two essential differences with the situation in which `begin` is called:
+
+* When this is called, the graph is finalized and ops can no longer be added
+ to the graph.
+* This method will also be called as a result of recovering a wrapped
+ session, not only at the beginning of the overall session.
+
+##### Args:
+
+
+* <b>`session`</b>: A TensorFlow Session that has been created.
+* <b>`coord`</b>: A Coordinator object which keeps track of all threads.
+
+
+- - -
+
+#### `tf.train.FinalOpsHook.after_run(run_context, run_values)` {#FinalOpsHook.after_run}
+
+Called after each call to run().
+
+The `run_values` argument contains results of requested ops/tensors by
+`before_run()`.
+
+The `run_context` argument is the same one send to `before_run` call.
+`run_context.request_stop()` can be called to stop the iteration.
+
+##### Args:
+
+
+* <b>`run_context`</b>: A `SessionRunContext` object.
+* <b>`run_values`</b>: A SessionRunValues object.
+
+
+- - -
+
+#### `tf.train.FinalOpsHook.before_run(run_context)` {#FinalOpsHook.before_run}
+
+Called before each call to run().
+
+You can return from this call a `SessionRunArgs` object indicating ops or
+tensors to add to the upcoming `run()` call. These ops/tensors will be run
+together with the ops/tensors originally passed to the original run() call.
+The run args you return can also contain feeds to be added to the run()
+call.
+
+The `run_context` argument is a `SessionRunContext` that provides
+information about the upcoming `run()` call: the originally requested
+op/tensors, the TensorFlow Session.
+
+At this point graph is finalized and you can not add ops.
+
+##### Args:
+
+
+* <b>`run_context`</b>: A `SessionRunContext` object.
+
+##### Returns:
+
+ None or a `SessionRunArgs` object.
+
+
+- - -
+
+#### `tf.train.FinalOpsHook.begin()` {#FinalOpsHook.begin}
+
+Called once before using the session.
+
+When called, the default graph is the one that will be launched in the
+session. The hook can modify the graph by adding new operations to it.
+After the `begin()` call the graph will be finalized and the other callbacks
+can not modify the graph anymore. Second call of `begin()` on the same
+graph, should not change the graph.
+
+
+- - -
+
+#### `tf.train.FinalOpsHook.end(session)` {#FinalOpsHook.end}
+
+
+
+
+- - -
+
+#### `tf.train.FinalOpsHook.final_ops_values` {#FinalOpsHook.final_ops_values}
+
+
+
+
diff --git a/tensorflow/g3doc/api_docs/python/index.md b/tensorflow/g3doc/api_docs/python/index.md
index cc3dc0a0e5..424448acfd 100644
--- a/tensorflow/g3doc/api_docs/python/index.md
+++ b/tensorflow/g3doc/api_docs/python/index.md
@@ -260,7 +260,6 @@
* [`minimum`](../../api_docs/python/math_ops.md#minimum)
* [`mod`](../../api_docs/python/math_ops.md#mod)
* [`multiply`](../../api_docs/python/math_ops.md#multiply)
- * [`neg`](../../api_docs/python/math_ops.md#neg)
* [`negative`](../../api_docs/python/math_ops.md#negative)
* [`norm`](../../api_docs/python/math_ops.md#norm)
* [`polygamma`](../../api_docs/python/math_ops.md#polygamma)
@@ -619,6 +618,8 @@
* [`do_quantize_training_on_graphdef`](../../api_docs/python/train.md#do_quantize_training_on_graphdef)
* [`exponential_decay`](../../api_docs/python/train.md#exponential_decay)
* [`ExponentialMovingAverage`](../../api_docs/python/train.md#ExponentialMovingAverage)
+ * [`FeedFnHook`](../../api_docs/python/train.md#FeedFnHook)
+ * [`FinalOpsHook`](../../api_docs/python/train.md#FinalOpsHook)
* [`FtrlOptimizer`](../../api_docs/python/train.md#FtrlOptimizer)
* [`generate_checkpoint_state_proto`](../../api_docs/python/train.md#generate_checkpoint_state_proto)
* [`get_checkpoint_mtimes`](../../api_docs/python/train.md#get_checkpoint_mtimes)
diff --git a/tensorflow/g3doc/api_docs/python/math_ops.md b/tensorflow/g3doc/api_docs/python/math_ops.md
index 92b001f898..76636dc6f0 100644
--- a/tensorflow/g3doc/api_docs/python/math_ops.md
+++ b/tensorflow/g3doc/api_docs/python/math_ops.md
@@ -3720,24 +3720,3 @@ invert_permutation(x) ==> [2, 4, 3, 0, 1]
A `Tensor`. Has the same type as `x`. 1-D.
-
-## Other Functions and Classes
-- - -
-
-### `tf.neg(x, name=None)` {#neg}
-
-Computes numerical negative value element-wise.
-
-I.e., \\(y = -x\\).
-
-##### Args:
-
-
-* <b>`x`</b>: A `Tensor`. Must be one of the following types: `half`, `float32`, `float64`, `int32`, `int64`, `complex64`, `complex128`.
-* <b>`name`</b>: A name for the operation (optional).
-
-##### Returns:
-
- A `Tensor`. Has the same type as `x`.
-
-
diff --git a/tensorflow/g3doc/api_docs/python/nn.md b/tensorflow/g3doc/api_docs/python/nn.md
index 84aaa5c5c9..5d64aaf072 100644
--- a/tensorflow/g3doc/api_docs/python/nn.md
+++ b/tensorflow/g3doc/api_docs/python/nn.md
@@ -2370,13 +2370,13 @@ this function.**
_sentinel: Used to prevent positional parameters. Internal, do not use.
-* <b>`labels`</b>: `Tensor` of shape `[d_0, d_1, ..., d_{r-2}]` and dtype `int32` or
- `int64`. Each entry in `labels` must be an index in `[0, num_classes)`.
- Other values will raise an exception when this op is run on CPU, and
- return `NaN` for corresponding corresponding loss and gradient rows
- on GPU.
-* <b>`logits`</b>: Unscaled log probabilities of rank `r` and shape
- `[d_0, d_1, ..., d_{r-2}, num_classes]` and dtype `float32` or `float64`.
+* <b>`labels`</b>: `Tensor` of shape `[d_0, d_1, ..., d_{r-1}]` (where `r` is rank of
+ `labels` and result) and dtype `int32` or `int64`. Each entry in `labels`
+ must be an index in `[0, num_classes)`. Other values will raise an
+ exception when this op is run on CPU, and return `NaN` for corresponding
+ loss and gradient rows on GPU.
+* <b>`logits`</b>: Unscaled log probabilities of shape
+ `[d_0, d_1, ..., d_{r-1}, num_classes]` and dtype `float32` or `float64`.
* <b>`name`</b>: A name for the operation (optional).
##### Returns:
diff --git a/tensorflow/g3doc/api_docs/python/state_ops.md b/tensorflow/g3doc/api_docs/python/state_ops.md
index 2db192fddd..9890892b0f 100644
--- a/tensorflow/g3doc/api_docs/python/state_ops.md
+++ b/tensorflow/g3doc/api_docs/python/state_ops.md
@@ -1943,7 +1943,7 @@ for an extensive description of how reusing works. Here is a basic example:
with tf.variable_scope("foo"):
v = tf.get_variable("v", [1]) # v.name == "foo/v:0"
w = tf.get_variable("w", [1]) # w.name == "foo/w:0"
-with tf.variable_scope("foo", reuse=True)
+with tf.variable_scope("foo", reuse=True):
v1 = tf.get_variable("v") # The same as v above.
```
@@ -2032,7 +2032,7 @@ for an extensive description of how reusing works. Here is a basic example:
with tf.variable_scope("foo"):
v = tf.get_variable("v", [1]) # v.name == "foo/v:0"
w = tf.get_variable("w", [1]) # w.name == "foo/w:0"
-with tf.variable_scope("foo", reuse=True)
+with tf.variable_scope("foo", reuse=True):
v1 = tf.get_variable("v") # The same as v above.
```
diff --git a/tensorflow/g3doc/api_docs/python/tf_debug.md b/tensorflow/g3doc/api_docs/python/tf_debug.md
index 28fc9ec502..9dc35ac82e 100644
--- a/tensorflow/g3doc/api_docs/python/tf_debug.md
+++ b/tensorflow/g3doc/api_docs/python/tf_debug.md
@@ -1216,12 +1216,18 @@ Create a local debugger command-line interface (CLI) hook.
Add a tensor filter.
+See doc of `LocalCLIDebugWrapperSession.add_tensor_filter()` for details.
+Override default behavior to accomodate the possibility of this method being
+called prior to the initialization of the underlying
+`LocalCLIDebugWrapperSession` object.
+
##### Args:
-* <b>`filter_name`</b>: (`str`) name of the filter.
-* <b>`tensor_filter`</b>: (`callable`) the filter callable. See the doc string of
- `DebugDumpDir.find()` for more details about its signature.
+* <b>`filter_name`</b>: See doc of `LocalCLIDebugWrapperSession.add_tensor_filter()`
+ for details.
+* <b>`tensor_filter`</b>: See doc of
+ `LocalCLIDebugWrapperSession.add_tensor_filter()` for details.
- - -
diff --git a/tensorflow/g3doc/api_docs/python/train.md b/tensorflow/g3doc/api_docs/python/train.md
index 098418f7a6..ac5ef5fc9f 100644
--- a/tensorflow/g3doc/api_docs/python/train.md
+++ b/tensorflow/g3doc/api_docs/python/train.md
@@ -1533,6 +1533,7 @@ See [Threading and Queues](../../how_tos/threading_and_queues/index.md)
for how to use threads and queues. For documentation on the Queue API,
see [Queues](../../api_docs/python/io_ops.md#queues).
+
- - -
### `class tf.train.Coordinator` {#Coordinator}
@@ -1984,6 +1985,233 @@ Converts this `QueueRunner` to a `QueueRunnerDef` protocol buffer.
- - -
+### `class tf.train.LooperThread` {#LooperThread}
+
+A thread that runs code repeatedly, optionally on a timer.
+
+This thread class is intended to be used with a `Coordinator`. It repeatedly
+runs code specified either as `target` and `args` or by the `run_loop()`
+method.
+
+Before each run the thread checks if the coordinator has requested stop. In
+that case the looper thread terminates immediately.
+
+If the code being run raises an exception, that exception is reported to the
+coordinator and the thread terminates. The coordinator will then request all
+the other threads it coordinates to stop.
+
+You typically pass looper threads to the supervisor `Join()` method.
+- - -
+
+#### `tf.train.LooperThread.__init__(coord, timer_interval_secs, target=None, args=None, kwargs=None)` {#LooperThread.__init__}
+
+Create a LooperThread.
+
+##### Args:
+
+
+* <b>`coord`</b>: A Coordinator.
+* <b>`timer_interval_secs`</b>: Time boundaries at which to call Run(), or None
+ if it should be called back to back.
+* <b>`target`</b>: Optional callable object that will be executed in the thread.
+* <b>`args`</b>: Optional arguments to pass to `target` when calling it.
+* <b>`kwargs`</b>: Optional keyword arguments to pass to `target` when calling it.
+
+##### Raises:
+
+
+* <b>`ValueError`</b>: If one of the arguments is invalid.
+
+
+- - -
+
+#### `tf.train.LooperThread.__repr__()` {#LooperThread.__repr__}
+
+
+
+
+- - -
+
+#### `tf.train.LooperThread.daemon` {#LooperThread.daemon}
+
+A boolean value indicating whether this thread is a daemon thread (True) or not (False).
+
+This must be set before start() is called, otherwise RuntimeError is
+raised. Its initial value is inherited from the creating thread; the
+main thread is not a daemon thread and therefore all threads created in
+the main thread default to daemon = False.
+
+The entire Python program exits when no alive non-daemon threads are
+left.
+
+
+- - -
+
+#### `tf.train.LooperThread.getName()` {#LooperThread.getName}
+
+
+
+
+- - -
+
+#### `tf.train.LooperThread.ident` {#LooperThread.ident}
+
+Thread identifier of this thread or None if it has not been started.
+
+This is a nonzero integer. See the thread.get_ident() function. Thread
+identifiers may be recycled when a thread exits and another thread is
+created. The identifier is available even after the thread has exited.
+
+
+- - -
+
+#### `tf.train.LooperThread.isAlive()` {#LooperThread.isAlive}
+
+Return whether the thread is alive.
+
+This method returns True just before the run() method starts until just
+after the run() method terminates. The module function enumerate()
+returns a list of all alive threads.
+
+
+- - -
+
+#### `tf.train.LooperThread.isDaemon()` {#LooperThread.isDaemon}
+
+
+
+
+- - -
+
+#### `tf.train.LooperThread.is_alive()` {#LooperThread.is_alive}
+
+Return whether the thread is alive.
+
+This method returns True just before the run() method starts until just
+after the run() method terminates. The module function enumerate()
+returns a list of all alive threads.
+
+
+- - -
+
+#### `tf.train.LooperThread.join(timeout=None)` {#LooperThread.join}
+
+Wait until the thread terminates.
+
+This blocks the calling thread until the thread whose join() method is
+called terminates -- either normally or through an unhandled exception
+or until the optional timeout occurs.
+
+When the timeout argument is present and not None, it should be a
+floating point number specifying a timeout for the operation in seconds
+(or fractions thereof). As join() always returns None, you must call
+isAlive() after join() to decide whether a timeout happened -- if the
+thread is still alive, the join() call timed out.
+
+When the timeout argument is not present or None, the operation will
+block until the thread terminates.
+
+A thread can be join()ed many times.
+
+join() raises a RuntimeError if an attempt is made to join the current
+thread as that would cause a deadlock. It is also an error to join() a
+thread before it has been started and attempts to do so raises the same
+exception.
+
+
+- - -
+
+#### `tf.train.LooperThread.loop(coord, timer_interval_secs, target, args=None, kwargs=None)` {#LooperThread.loop}
+
+Start a LooperThread that calls a function periodically.
+
+If `timer_interval_secs` is None the thread calls `target(args)`
+repeatedly. Otherwise `target(args)` is called every `timer_interval_secs`
+seconds. The thread terminates when a stop of the coordinator is
+requested.
+
+##### Args:
+
+
+* <b>`coord`</b>: A Coordinator.
+* <b>`timer_interval_secs`</b>: Number. Time boundaries at which to call `target`.
+* <b>`target`</b>: A callable object.
+* <b>`args`</b>: Optional arguments to pass to `target` when calling it.
+* <b>`kwargs`</b>: Optional keyword arguments to pass to `target` when calling it.
+
+##### Returns:
+
+ The started thread.
+
+
+- - -
+
+#### `tf.train.LooperThread.name` {#LooperThread.name}
+
+A string used for identification purposes only.
+
+It has no semantics. Multiple threads may be given the same name. The
+initial name is set by the constructor.
+
+
+- - -
+
+#### `tf.train.LooperThread.run()` {#LooperThread.run}
+
+
+
+
+- - -
+
+#### `tf.train.LooperThread.run_loop()` {#LooperThread.run_loop}
+
+Called at 'timer_interval_secs' boundaries.
+
+
+- - -
+
+#### `tf.train.LooperThread.setDaemon(daemonic)` {#LooperThread.setDaemon}
+
+
+
+
+- - -
+
+#### `tf.train.LooperThread.setName(name)` {#LooperThread.setName}
+
+
+
+
+- - -
+
+#### `tf.train.LooperThread.start()` {#LooperThread.start}
+
+Start the thread's activity.
+
+It must be called at most once per thread object. It arranges for the
+object's run() method to be invoked in a separate thread of control.
+
+This method will raise a RuntimeError if called more than once on the
+same thread object.
+
+
+- - -
+
+#### `tf.train.LooperThread.start_loop()` {#LooperThread.start_loop}
+
+Called when the thread starts.
+
+
+- - -
+
+#### `tf.train.LooperThread.stop_loop()` {#LooperThread.stop_loop}
+
+Called when the thread stops.
+
+
+
+- - -
+
### `tf.train.add_queue_runner(qr, collection='queue_runners')` {#add_queue_runner}
Adds a `QueueRunner` to a collection in the graph.
@@ -3531,7 +3759,7 @@ with tf.device(tf.train.replica_device_setter(cluster=cluster_spec)):
- - -
-### `tf.train.MonitoredTrainingSession(master='', is_chief=True, checkpoint_dir=None, scaffold=None, hooks=None, chief_only_hooks=None, save_checkpoint_secs=600, save_summaries_steps=100, config=None)` {#MonitoredTrainingSession}
+### `tf.train.MonitoredTrainingSession(master='', is_chief=True, checkpoint_dir=None, scaffold=None, hooks=None, chief_only_hooks=None, save_checkpoint_secs=600, save_summaries_steps=100, save_summaries_secs=None, config=None)` {#MonitoredTrainingSession}
Creates a `MonitoredSession` for training.
@@ -3559,8 +3787,12 @@ inialize/restore.
using a default checkpoint saver. If `save_checkpoint_secs` is set to
`None`, then the default checkpoint saver isn't used.
* <b>`save_summaries_steps`</b>: The frequency, in number of global steps, that the
- summaries are written to disk using a default summary saver. If
- `save_summaries_steps` is set to `None`, then the default summary saver
+ summaries are written to disk using a default summary saver. If both
+ `save_summaries_steps` and `save_summaries_secs` are set to `None`, then
+ the default summary saver isn't used.
+* <b>`save_summaries_secs`</b>: The frequency, in secs, that the summaries are written
+ to disk using a default summary saver. If both `save_summaries_steps` and
+ `save_summaries_secs` are set to `None`, then the default summary saver
isn't used.
* <b>`config`</b>: an instance of `tf.ConfigProto` proto used to configure the session.
It's the `config` argument of constructor of `tf.Session`.
@@ -4111,232 +4343,312 @@ for more information about their attributes.
-## Training Utilities
+## Training Hooks
+
+Hooks are tools that run in the process of training/evaluation of the model.
- - -
-### `tf.train.global_step(sess, global_step_tensor)` {#global_step}
+### `class tf.train.SessionRunHook` {#SessionRunHook}
-Small helper to get the global step.
+Hook to extend calls to MonitoredSession.run().
+- - -
-```python
-# Creates a variable to hold the global_step.
-global_step_tensor = tf.Variable(10, trainable=False, name='global_step')
-# Creates a session.
-sess = tf.Session()
-# Initializes the variable.
-print('global_step: %s' % tf.train.global_step(sess, global_step_tensor))
+#### `tf.train.SessionRunHook.after_create_session(session, coord)` {#SessionRunHook.after_create_session}
-global_step: 10
-```
+Called when new TensorFlow session is created.
-##### Args:
+This is called to signal the hooks that a new session has been created. This
+has two essential differences with the situation in which `begin` is called:
+* When this is called, the graph is finalized and ops can no longer be added
+ to the graph.
+* This method will also be called as a result of recovering a wrapped
+ session, not only at the beginning of the overall session.
-* <b>`sess`</b>: A TensorFlow `Session` object.
-* <b>`global_step_tensor`</b>: `Tensor` or the `name` of the operation that contains
- the global step.
+##### Args:
-##### Returns:
- The global step value.
+* <b>`session`</b>: A TensorFlow Session that has been created.
+* <b>`coord`</b>: A Coordinator object which keeps track of all threads.
- - -
-### `tf.train.basic_train_loop(supervisor, train_step_fn, args=None, kwargs=None, master='')` {#basic_train_loop}
-
-Basic loop to train a model.
+#### `tf.train.SessionRunHook.after_run(run_context, run_values)` {#SessionRunHook.after_run}
-Calls `train_step_fn` in a loop to train a model. The function is called as:
+Called after each call to run().
-```python
-train_step_fn(session, *args, **kwargs)
-```
+The `run_values` argument contains results of requested ops/tensors by
+`before_run()`.
-It is passed a `tf.Session` in addition to `args` and `kwargs`. The function
-typically runs one training step in the session.
+The `run_context` argument is the same one send to `before_run` call.
+`run_context.request_stop()` can be called to stop the iteration.
##### Args:
-* <b>`supervisor`</b>: `tf.Supervisor` to run the training services.
-* <b>`train_step_fn`</b>: Callable to execute one training step. Called
- repeatedly as `train_step_fn(session, *args **kwargs)`.
-* <b>`args`</b>: Optional positional arguments passed to `train_step_fn`.
-* <b>`kwargs`</b>: Optional keyword arguments passed to `train_step_fn`.
-* <b>`master`</b>: Master to use to create the training session. Defaults to
- `""` which causes the session to be created in the local process.
+* <b>`run_context`</b>: A `SessionRunContext` object.
+* <b>`run_values`</b>: A SessionRunValues object.
- - -
-### `tf.train.get_global_step(graph=None)` {#get_global_step}
+#### `tf.train.SessionRunHook.before_run(run_context)` {#SessionRunHook.before_run}
-Get the global step tensor.
+Called before each call to run().
-The global step tensor must be an integer variable. We first try to find it
-in the collection `GLOBAL_STEP`, or by name `global_step:0`.
+You can return from this call a `SessionRunArgs` object indicating ops or
+tensors to add to the upcoming `run()` call. These ops/tensors will be run
+together with the ops/tensors originally passed to the original run() call.
+The run args you return can also contain feeds to be added to the run()
+call.
+
+The `run_context` argument is a `SessionRunContext` that provides
+information about the upcoming `run()` call: the originally requested
+op/tensors, the TensorFlow Session.
+
+At this point graph is finalized and you can not add ops.
##### Args:
-* <b>`graph`</b>: The graph to find the global step in. If missing, use default graph.
+* <b>`run_context`</b>: A `SessionRunContext` object.
##### Returns:
- The global step variable, or `None` if none was found.
+ None or a `SessionRunArgs` object.
-##### Raises:
+- - -
-* <b>`TypeError`</b>: If the global step tensor has a non-integer type, or if it is not
- a `Variable`.
+#### `tf.train.SessionRunHook.begin()` {#SessionRunHook.begin}
+
+Called once before using the session.
+
+When called, the default graph is the one that will be launched in the
+session. The hook can modify the graph by adding new operations to it.
+After the `begin()` call the graph will be finalized and the other callbacks
+can not modify the graph anymore. Second call of `begin()` on the same
+graph, should not change the graph.
- - -
-### `tf.train.assert_global_step(global_step_tensor)` {#assert_global_step}
+#### `tf.train.SessionRunHook.end(session)` {#SessionRunHook.end}
-Asserts `global_step_tensor` is a scalar int `Variable` or `Tensor`.
+Called at the end of session.
+
+The `session` argument can be used in case the hook wants to run final ops,
+such as saving a last checkpoint.
##### Args:
-* <b>`global_step_tensor`</b>: `Tensor` to test.
+* <b>`session`</b>: A TensorFlow Session that will be soon closed.
+
- - -
-### `tf.train.write_graph(graph_or_graph_def, logdir, name, as_text=True)` {#write_graph}
+### `class tf.train.SessionRunArgs` {#SessionRunArgs}
-Writes a graph proto to a file.
+Represents arguments to be added to a `Session.run()` call.
-The graph is written as a binary proto unless `as_text` is `True`.
+Args:
+ fetches: Exactly like the 'fetches' argument to Session.Run().
+ Can be a single tensor or op, a list of 'fetches' or a dictionary
+ of fetches. For example:
+ fetches = global_step_tensor
+ fetches = [train_op, summary_op, global_step_tensor]
+ fetches = {'step': global_step_tensor, 'summ': summary_op}
+ Note that this can recurse as expected:
+ fetches = {'step': global_step_tensor,
+ 'ops': [train_op, check_nan_op]}
+ feed_dict: Exactly like the `feed_dict` argument to `Session.Run()`
+ options: Exactly like the `options` argument to `Session.run()`, i.e., a
+ config_pb2.RunOptions proto.
+- - -
-```python
-v = tf.Variable(0, name='my_variable')
-sess = tf.Session()
-tf.train.write_graph(sess.graph_def, '/tmp/my-model', 'train.pbtxt')
-```
+#### `tf.train.SessionRunArgs.__getnewargs__()` {#SessionRunArgs.__getnewargs__}
-or
+Return self as a plain tuple. Used by copy and pickle.
-```python
-v = tf.Variable(0, name='my_variable')
-sess = tf.Session()
-tf.train.write_graph(sess.graph, '/tmp/my-model', 'train.pbtxt')
-```
-##### Args:
+- - -
+#### `tf.train.SessionRunArgs.__getstate__()` {#SessionRunArgs.__getstate__}
-* <b>`graph_or_graph_def`</b>: A `Graph` or a `GraphDef` protocol buffer.
-* <b>`logdir`</b>: Directory where to write the graph. This can refer to remote
- filesystems, such as Google Cloud Storage (GCS).
-* <b>`name`</b>: Filename for the graph.
-* <b>`as_text`</b>: If `True`, writes the graph as an ASCII proto.
+Exclude the OrderedDict from pickling
- - -
-### `class tf.train.SessionRunHook` {#SessionRunHook}
+#### `tf.train.SessionRunArgs.__new__(cls, fetches, feed_dict=None, options=None)` {#SessionRunArgs.__new__}
+
+
+
-Hook to extend calls to MonitoredSession.run().
- - -
-#### `tf.train.SessionRunHook.after_create_session(session, coord)` {#SessionRunHook.after_create_session}
+#### `tf.train.SessionRunArgs.__repr__()` {#SessionRunArgs.__repr__}
-Called when new TensorFlow session is created.
+Return a nicely formatted representation string
-This is called to signal the hooks that a new session has been created. This
-has two essential differences with the situation in which `begin` is called:
-* When this is called, the graph is finalized and ops can no longer be added
- to the graph.
-* This method will also be called as a result of recovering a wrapped
- session, not only at the beginning of the overall session.
+- - -
-##### Args:
+#### `tf.train.SessionRunArgs.feed_dict` {#SessionRunArgs.feed_dict}
+Alias for field number 1
-* <b>`session`</b>: A TensorFlow Session that has been created.
-* <b>`coord`</b>: A Coordinator object which keeps track of all threads.
+
+- - -
+
+#### `tf.train.SessionRunArgs.fetches` {#SessionRunArgs.fetches}
+
+Alias for field number 0
- - -
-#### `tf.train.SessionRunHook.after_run(run_context, run_values)` {#SessionRunHook.after_run}
+#### `tf.train.SessionRunArgs.options` {#SessionRunArgs.options}
-Called after each call to run().
+Alias for field number 2
-The `run_values` argument contains results of requested ops/tensors by
-`before_run()`.
-The `run_context` argument is the same one send to `before_run` call.
-`run_context.request_stop()` can be called to stop the iteration.
-##### Args:
+- - -
+### `class tf.train.SessionRunContext` {#SessionRunContext}
-* <b>`run_context`</b>: A `SessionRunContext` object.
-* <b>`run_values`</b>: A SessionRunValues object.
+Provides information about the `session.run()` call being made.
+
+Provides information about original request to `Session.Run()` function.
+SessionRunHook objects can stop the loop by calling `request_stop()` of
+`run_context`. In the future we may use this object to add more information
+about run without changing the Hook API.
+- - -
+
+#### `tf.train.SessionRunContext.__init__(original_args, session)` {#SessionRunContext.__init__}
+
+Initializes SessionRunContext.
- - -
-#### `tf.train.SessionRunHook.before_run(run_context)` {#SessionRunHook.before_run}
+#### `tf.train.SessionRunContext.original_args` {#SessionRunContext.original_args}
-Called before each call to run().
+A `SessionRunArgs` object holding the original arguments of `run()`.
-You can return from this call a `SessionRunArgs` object indicating ops or
-tensors to add to the upcoming `run()` call. These ops/tensors will be run
-together with the ops/tensors originally passed to the original run() call.
-The run args you return can also contain feeds to be added to the run()
-call.
+If user called `MonitoredSession.run(fetches=a, feed_dict=b)`, then this
+field is equal to SessionRunArgs(a, b).
-The `run_context` argument is a `SessionRunContext` that provides
-information about the upcoming `run()` call: the originally requested
-op/tensors, the TensorFlow Session.
+##### Returns:
-At this point graph is finalized and you can not add ops.
+ A `SessionRunArgs` object
-##### Args:
+- - -
+
+#### `tf.train.SessionRunContext.request_stop()` {#SessionRunContext.request_stop}
-* <b>`run_context`</b>: A `SessionRunContext` object.
+Sets stop requested field.
+
+Hooks can use this function to request stop of iterations.
+`MonitoredSession` checks whether this is called or not.
+
+
+- - -
+
+#### `tf.train.SessionRunContext.session` {#SessionRunContext.session}
+
+A TensorFlow session object which will execute the `run`.
+
+
+- - -
+
+#### `tf.train.SessionRunContext.stop_requested` {#SessionRunContext.stop_requested}
+
+Returns whether a stop is requested or not.
+
+If true, `MonitoredSession` stops iterations.
##### Returns:
- None or a `SessionRunArgs` object.
+ A `bool`
+
- - -
-#### `tf.train.SessionRunHook.begin()` {#SessionRunHook.begin}
+### `class tf.train.SessionRunValues` {#SessionRunValues}
-Called once before using the session.
+Contains the results of `Session.run()`.
-When called, the default graph is the one that will be launched in the
-session. The hook can modify the graph by adding new operations to it.
-After the `begin()` call the graph will be finalized and the other callbacks
-can not modify the graph anymore. Second call of `begin()` on the same
-graph, should not change the graph.
+In the future we may use this object to add more information about result of
+run without changing the Hook API.
+
+Args:
+ results: The return values from `Session.run()` corresponding to the fetches
+ attribute returned in the RunArgs. Note that this has the same shape as
+ the RunArgs fetches. For example:
+ fetches = global_step_tensor
+ => results = nparray(int)
+ fetches = [train_op, summary_op, global_step_tensor]
+ => results = [None, nparray(string), nparray(int)]
+ fetches = {'step': global_step_tensor, 'summ': summary_op}
+ => results = {'step': nparray(int), 'summ': nparray(string)}
+ options: `RunOptions` from the `Session.run()` call.
+ run_metadata: `RunMetadata` from the `Session.run()` call.
+- - -
+
+#### `tf.train.SessionRunValues.__getnewargs__()` {#SessionRunValues.__getnewargs__}
+
+Return self as a plain tuple. Used by copy and pickle.
- - -
-#### `tf.train.SessionRunHook.end(session)` {#SessionRunHook.end}
+#### `tf.train.SessionRunValues.__getstate__()` {#SessionRunValues.__getstate__}
-Called at the end of session.
+Exclude the OrderedDict from pickling
-The `session` argument can be used in case the hook wants to run final ops,
-such as saving a last checkpoint.
-##### Args:
+- - -
+#### `tf.train.SessionRunValues.__new__(_cls, results, options, run_metadata)` {#SessionRunValues.__new__}
+
+Create new instance of SessionRunValues(results, options, run_metadata)
+
+
+- - -
+
+#### `tf.train.SessionRunValues.__repr__()` {#SessionRunValues.__repr__}
+
+Return a nicely formatted representation string
+
+
+- - -
+
+#### `tf.train.SessionRunValues.options` {#SessionRunValues.options}
+
+Alias for field number 1
+
+
+- - -
+
+#### `tf.train.SessionRunValues.results` {#SessionRunValues.results}
+
+Alias for field number 0
+
+
+- - -
+
+#### `tf.train.SessionRunValues.run_metadata` {#SessionRunValues.run_metadata}
+
+Alias for field number 2
-* <b>`session`</b>: A TensorFlow Session that will be soon closed.
@@ -4349,7 +4661,7 @@ Prints the given tensors once every N local steps or once every N seconds.
The tensors will be printed to the log, with `INFO` severity.
- - -
-#### `tf.train.LoggingTensorHook.__init__(tensors, every_n_iter=None, every_n_secs=None)` {#LoggingTensorHook.__init__}
+#### `tf.train.LoggingTensorHook.__init__(tensors, every_n_iter=None, every_n_secs=None, formatter=None)` {#LoggingTensorHook.__init__}
Initializes a LoggingHook monitor.
@@ -4363,6 +4675,8 @@ Initializes a LoggingHook monitor.
* <b>`every_n_secs`</b>: `int` or `float`, print the values of `tensors` once every N
seconds. Exactly one of `every_n_iter` and `every_n_secs` should be
provided.
+* <b>`formatter`</b>: function, takes dict of `tag`->`Tensor` and returns a string.
+ If `None` uses default printing all tensors.
##### Raises:
@@ -4953,431 +5267,343 @@ such as saving a last checkpoint.
- - -
-### `class tf.train.SessionRunArgs` {#SessionRunArgs}
-
-Represents arguments to be added to a `Session.run()` call.
+### `class tf.train.FinalOpsHook` {#FinalOpsHook}
-Args:
- fetches: Exactly like the 'fetches' argument to Session.Run().
- Can be a single tensor or op, a list of 'fetches' or a dictionary
- of fetches. For example:
- fetches = global_step_tensor
- fetches = [train_op, summary_op, global_step_tensor]
- fetches = {'step': global_step_tensor, 'summ': summary_op}
- Note that this can recurse as expected:
- fetches = {'step': global_step_tensor,
- 'ops': [train_op, check_nan_op]}
- feed_dict: Exactly like the `feed_dict` argument to `Session.Run()`
- options: Exactly like the `options` argument to `Session.run()`, i.e., a
- config_pb2.RunOptions proto.
+A run hook which evaluates `Tensors` at the end of a session.
- - -
-#### `tf.train.SessionRunArgs.__getnewargs__()` {#SessionRunArgs.__getnewargs__}
+#### `tf.train.FinalOpsHook.__init__(final_ops, final_ops_feed_dict=None)` {#FinalOpsHook.__init__}
-Return self as a plain tuple. Used by copy and pickle.
+Constructs the FinalOpHook with ops to run at the end of the session.
+##### Args:
-- - -
-#### `tf.train.SessionRunArgs.__getstate__()` {#SessionRunArgs.__getstate__}
-
-Exclude the OrderedDict from pickling
+* <b>`final_ops`</b>: A single `Tensor`, a list of `Tensors` or a dictionary of
+ names to `Tensors`.
+* <b>`final_ops_feed_dict`</b>: A feed dictionary to use when running
+ `final_ops_dict`.
- - -
-#### `tf.train.SessionRunArgs.__new__(cls, fetches, feed_dict=None, options=None)` {#SessionRunArgs.__new__}
-
+#### `tf.train.FinalOpsHook.after_create_session(session, coord)` {#FinalOpsHook.after_create_session}
+Called when new TensorFlow session is created.
+This is called to signal the hooks that a new session has been created. This
+has two essential differences with the situation in which `begin` is called:
-- - -
-
-#### `tf.train.SessionRunArgs.__repr__()` {#SessionRunArgs.__repr__}
-
-Return a nicely formatted representation string
-
+* When this is called, the graph is finalized and ops can no longer be added
+ to the graph.
+* This method will also be called as a result of recovering a wrapped
+ session, not only at the beginning of the overall session.
-- - -
+##### Args:
-#### `tf.train.SessionRunArgs.feed_dict` {#SessionRunArgs.feed_dict}
-Alias for field number 1
+* <b>`session`</b>: A TensorFlow Session that has been created.
+* <b>`coord`</b>: A Coordinator object which keeps track of all threads.
- - -
-#### `tf.train.SessionRunArgs.fetches` {#SessionRunArgs.fetches}
+#### `tf.train.FinalOpsHook.after_run(run_context, run_values)` {#FinalOpsHook.after_run}
-Alias for field number 0
+Called after each call to run().
+The `run_values` argument contains results of requested ops/tensors by
+`before_run()`.
-- - -
+The `run_context` argument is the same one send to `before_run` call.
+`run_context.request_stop()` can be called to stop the iteration.
-#### `tf.train.SessionRunArgs.options` {#SessionRunArgs.options}
+##### Args:
-Alias for field number 2
+* <b>`run_context`</b>: A `SessionRunContext` object.
+* <b>`run_values`</b>: A SessionRunValues object.
- - -
-### `class tf.train.SessionRunContext` {#SessionRunContext}
-
-Provides information about the `session.run()` call being made.
-
-Provides information about original request to `Session.Run()` function.
-SessionRunHook objects can stop the loop by calling `request_stop()` of
-`run_context`. In the future we may use this object to add more information
-about run without changing the Hook API.
-- - -
+#### `tf.train.FinalOpsHook.before_run(run_context)` {#FinalOpsHook.before_run}
-#### `tf.train.SessionRunContext.__init__(original_args, session)` {#SessionRunContext.__init__}
+Called before each call to run().
-Initializes SessionRunContext.
+You can return from this call a `SessionRunArgs` object indicating ops or
+tensors to add to the upcoming `run()` call. These ops/tensors will be run
+together with the ops/tensors originally passed to the original run() call.
+The run args you return can also contain feeds to be added to the run()
+call.
+The `run_context` argument is a `SessionRunContext` that provides
+information about the upcoming `run()` call: the originally requested
+op/tensors, the TensorFlow Session.
-- - -
+At this point graph is finalized and you can not add ops.
-#### `tf.train.SessionRunContext.original_args` {#SessionRunContext.original_args}
+##### Args:
-A `SessionRunArgs` object holding the original arguments of `run()`.
-If user called `MonitoredSession.run(fetches=a, feed_dict=b)`, then this
-field is equal to SessionRunArgs(a, b).
+* <b>`run_context`</b>: A `SessionRunContext` object.
##### Returns:
- A `SessionRunArgs` object
+ None or a `SessionRunArgs` object.
- - -
-#### `tf.train.SessionRunContext.request_stop()` {#SessionRunContext.request_stop}
-
-Sets stop requested field.
-
-Hooks can use this function to request stop of iterations.
-`MonitoredSession` checks whether this is called or not.
-
-
-- - -
+#### `tf.train.FinalOpsHook.begin()` {#FinalOpsHook.begin}
-#### `tf.train.SessionRunContext.session` {#SessionRunContext.session}
+Called once before using the session.
-A TensorFlow session object which will execute the `run`.
+When called, the default graph is the one that will be launched in the
+session. The hook can modify the graph by adding new operations to it.
+After the `begin()` call the graph will be finalized and the other callbacks
+can not modify the graph anymore. Second call of `begin()` on the same
+graph, should not change the graph.
- - -
-#### `tf.train.SessionRunContext.stop_requested` {#SessionRunContext.stop_requested}
+#### `tf.train.FinalOpsHook.end(session)` {#FinalOpsHook.end}
-Returns whether a stop is requested or not.
-
-If true, `MonitoredSession` stops iterations.
-
-##### Returns:
-
- A `bool`
- - -
-### `class tf.train.SessionRunValues` {#SessionRunValues}
+#### `tf.train.FinalOpsHook.final_ops_values` {#FinalOpsHook.final_ops_values}
-Contains the results of `Session.run()`.
-In the future we may use this object to add more information about result of
-run without changing the Hook API.
-Args:
- results: The return values from `Session.run()` corresponding to the fetches
- attribute returned in the RunArgs. Note that this has the same shape as
- the RunArgs fetches. For example:
- fetches = global_step_tensor
- => results = nparray(int)
- fetches = [train_op, summary_op, global_step_tensor]
- => results = [None, nparray(string), nparray(int)]
- fetches = {'step': global_step_tensor, 'summ': summary_op}
- => results = {'step': nparray(int), 'summ': nparray(string)}
- options: `RunOptions` from the `Session.run()` call.
- run_metadata: `RunMetadata` from the `Session.run()` call.
-- - -
-
-#### `tf.train.SessionRunValues.__getnewargs__()` {#SessionRunValues.__getnewargs__}
-
-Return self as a plain tuple. Used by copy and pickle.
-
-
-- - -
-
-#### `tf.train.SessionRunValues.__getstate__()` {#SessionRunValues.__getstate__}
-
-Exclude the OrderedDict from pickling
- - -
-#### `tf.train.SessionRunValues.__new__(_cls, results, options, run_metadata)` {#SessionRunValues.__new__}
-
-Create new instance of SessionRunValues(results, options, run_metadata)
-
+### `class tf.train.FeedFnHook` {#FeedFnHook}
+Runs `feed_fn` and sets the `feed_dict` accordingly.
- - -
-#### `tf.train.SessionRunValues.__repr__()` {#SessionRunValues.__repr__}
-
-Return a nicely formatted representation string
+#### `tf.train.FeedFnHook.__init__(feed_fn)` {#FeedFnHook.__init__}
+Constructs the FeedFnHook with given `feed_fn`.
-- - -
+##### Args:
-#### `tf.train.SessionRunValues.options` {#SessionRunValues.options}
-Alias for field number 1
+* <b>`feed_fn`</b>: function, no arguments and returns `dict` to feed.
- - -
-#### `tf.train.SessionRunValues.results` {#SessionRunValues.results}
+#### `tf.train.FeedFnHook.after_create_session(session, coord)` {#FeedFnHook.after_create_session}
-Alias for field number 0
+Called when new TensorFlow session is created.
+This is called to signal the hooks that a new session has been created. This
+has two essential differences with the situation in which `begin` is called:
-- - -
+* When this is called, the graph is finalized and ops can no longer be added
+ to the graph.
+* This method will also be called as a result of recovering a wrapped
+ session, not only at the beginning of the overall session.
-#### `tf.train.SessionRunValues.run_metadata` {#SessionRunValues.run_metadata}
+##### Args:
-Alias for field number 2
+* <b>`session`</b>: A TensorFlow Session that has been created.
+* <b>`coord`</b>: A Coordinator object which keeps track of all threads.
- - -
-### `class tf.train.LooperThread` {#LooperThread}
-
-A thread that runs code repeatedly, optionally on a timer.
+#### `tf.train.FeedFnHook.after_run(run_context, run_values)` {#FeedFnHook.after_run}
-This thread class is intended to be used with a `Coordinator`. It repeatedly
-runs code specified either as `target` and `args` or by the `run_loop()`
-method.
-
-Before each run the thread checks if the coordinator has requested stop. In
-that case the looper thread terminates immediately.
-
-If the code being run raises an exception, that exception is reported to the
-coordinator and the thread terminates. The coordinator will then request all
-the other threads it coordinates to stop.
-
-You typically pass looper threads to the supervisor `Join()` method.
-- - -
+Called after each call to run().
-#### `tf.train.LooperThread.__init__(coord, timer_interval_secs, target=None, args=None, kwargs=None)` {#LooperThread.__init__}
+The `run_values` argument contains results of requested ops/tensors by
+`before_run()`.
-Create a LooperThread.
+The `run_context` argument is the same one send to `before_run` call.
+`run_context.request_stop()` can be called to stop the iteration.
##### Args:
-* <b>`coord`</b>: A Coordinator.
-* <b>`timer_interval_secs`</b>: Time boundaries at which to call Run(), or None
- if it should be called back to back.
-* <b>`target`</b>: Optional callable object that will be executed in the thread.
-* <b>`args`</b>: Optional arguments to pass to `target` when calling it.
-* <b>`kwargs`</b>: Optional keyword arguments to pass to `target` when calling it.
-
-##### Raises:
-
-
-* <b>`ValueError`</b>: If one of the arguments is invalid.
+* <b>`run_context`</b>: A `SessionRunContext` object.
+* <b>`run_values`</b>: A SessionRunValues object.
- - -
-#### `tf.train.LooperThread.__repr__()` {#LooperThread.__repr__}
+#### `tf.train.FeedFnHook.before_run(run_context)` {#FeedFnHook.before_run}
- - -
-#### `tf.train.LooperThread.daemon` {#LooperThread.daemon}
-
-A boolean value indicating whether this thread is a daemon thread (True) or not (False).
-
-This must be set before start() is called, otherwise RuntimeError is
-raised. Its initial value is inherited from the creating thread; the
-main thread is not a daemon thread and therefore all threads created in
-the main thread default to daemon = False.
-
-The entire Python program exits when no alive non-daemon threads are
-left.
-
-
-- - -
-
-#### `tf.train.LooperThread.getName()` {#LooperThread.getName}
+#### `tf.train.FeedFnHook.begin()` {#FeedFnHook.begin}
+Called once before using the session.
+When called, the default graph is the one that will be launched in the
+session. The hook can modify the graph by adding new operations to it.
+After the `begin()` call the graph will be finalized and the other callbacks
+can not modify the graph anymore. Second call of `begin()` on the same
+graph, should not change the graph.
- - -
-#### `tf.train.LooperThread.ident` {#LooperThread.ident}
+#### `tf.train.FeedFnHook.end(session)` {#FeedFnHook.end}
-Thread identifier of this thread or None if it has not been started.
-
-This is a nonzero integer. See the thread.get_ident() function. Thread
-identifiers may be recycled when a thread exits and another thread is
-created. The identifier is available even after the thread has exited.
-
-
-- - -
-
-#### `tf.train.LooperThread.isAlive()` {#LooperThread.isAlive}
+Called at the end of session.
-Return whether the thread is alive.
+The `session` argument can be used in case the hook wants to run final ops,
+such as saving a last checkpoint.
-This method returns True just before the run() method starts until just
-after the run() method terminates. The module function enumerate()
-returns a list of all alive threads.
+##### Args:
-- - -
+* <b>`session`</b>: A TensorFlow Session that will be soon closed.
-#### `tf.train.LooperThread.isDaemon()` {#LooperThread.isDaemon}
+## Training Utilities
- - -
-#### `tf.train.LooperThread.is_alive()` {#LooperThread.is_alive}
-
-Return whether the thread is alive.
-
-This method returns True just before the run() method starts until just
-after the run() method terminates. The module function enumerate()
-returns a list of all alive threads.
-
+### `tf.train.global_step(sess, global_step_tensor)` {#global_step}
-- - -
+Small helper to get the global step.
-#### `tf.train.LooperThread.join(timeout=None)` {#LooperThread.join}
+```python
+# Creates a variable to hold the global_step.
+global_step_tensor = tf.Variable(10, trainable=False, name='global_step')
+# Creates a session.
+sess = tf.Session()
+# Initializes the variable.
+print('global_step: %s' % tf.train.global_step(sess, global_step_tensor))
-Wait until the thread terminates.
+global_step: 10
+```
-This blocks the calling thread until the thread whose join() method is
-called terminates -- either normally or through an unhandled exception
-or until the optional timeout occurs.
+##### Args:
-When the timeout argument is present and not None, it should be a
-floating point number specifying a timeout for the operation in seconds
-(or fractions thereof). As join() always returns None, you must call
-isAlive() after join() to decide whether a timeout happened -- if the
-thread is still alive, the join() call timed out.
-When the timeout argument is not present or None, the operation will
-block until the thread terminates.
+* <b>`sess`</b>: A TensorFlow `Session` object.
+* <b>`global_step_tensor`</b>: `Tensor` or the `name` of the operation that contains
+ the global step.
-A thread can be join()ed many times.
+##### Returns:
-join() raises a RuntimeError if an attempt is made to join the current
-thread as that would cause a deadlock. It is also an error to join() a
-thread before it has been started and attempts to do so raises the same
-exception.
+ The global step value.
- - -
-#### `tf.train.LooperThread.loop(coord, timer_interval_secs, target, args=None, kwargs=None)` {#LooperThread.loop}
+### `tf.train.basic_train_loop(supervisor, train_step_fn, args=None, kwargs=None, master='')` {#basic_train_loop}
-Start a LooperThread that calls a function periodically.
+Basic loop to train a model.
-If `timer_interval_secs` is None the thread calls `target(args)`
-repeatedly. Otherwise `target(args)` is called every `timer_interval_secs`
-seconds. The thread terminates when a stop of the coordinator is
-requested.
+Calls `train_step_fn` in a loop to train a model. The function is called as:
-##### Args:
+```python
+train_step_fn(session, *args, **kwargs)
+```
+It is passed a `tf.Session` in addition to `args` and `kwargs`. The function
+typically runs one training step in the session.
-* <b>`coord`</b>: A Coordinator.
-* <b>`timer_interval_secs`</b>: Number. Time boundaries at which to call `target`.
-* <b>`target`</b>: A callable object.
-* <b>`args`</b>: Optional arguments to pass to `target` when calling it.
-* <b>`kwargs`</b>: Optional keyword arguments to pass to `target` when calling it.
+##### Args:
-##### Returns:
- The started thread.
+* <b>`supervisor`</b>: `tf.Supervisor` to run the training services.
+* <b>`train_step_fn`</b>: Callable to execute one training step. Called
+ repeatedly as `train_step_fn(session, *args **kwargs)`.
+* <b>`args`</b>: Optional positional arguments passed to `train_step_fn`.
+* <b>`kwargs`</b>: Optional keyword arguments passed to `train_step_fn`.
+* <b>`master`</b>: Master to use to create the training session. Defaults to
+ `""` which causes the session to be created in the local process.
- - -
-#### `tf.train.LooperThread.name` {#LooperThread.name}
-
-A string used for identification purposes only.
+### `tf.train.get_global_step(graph=None)` {#get_global_step}
-It has no semantics. Multiple threads may be given the same name. The
-initial name is set by the constructor.
+Get the global step tensor.
+The global step tensor must be an integer variable. We first try to find it
+in the collection `GLOBAL_STEP`, or by name `global_step:0`.
-- - -
+##### Args:
-#### `tf.train.LooperThread.run()` {#LooperThread.run}
+* <b>`graph`</b>: The graph to find the global step in. If missing, use default graph.
+##### Returns:
+ The global step variable, or `None` if none was found.
-- - -
+##### Raises:
-#### `tf.train.LooperThread.run_loop()` {#LooperThread.run_loop}
-Called at 'timer_interval_secs' boundaries.
+* <b>`TypeError`</b>: If the global step tensor has a non-integer type, or if it is not
+ a `Variable`.
- - -
-#### `tf.train.LooperThread.setDaemon(daemonic)` {#LooperThread.setDaemon}
-
-
-
+### `tf.train.assert_global_step(global_step_tensor)` {#assert_global_step}
-- - -
+Asserts `global_step_tensor` is a scalar int `Variable` or `Tensor`.
-#### `tf.train.LooperThread.setName(name)` {#LooperThread.setName}
+##### Args:
+* <b>`global_step_tensor`</b>: `Tensor` to test.
- - -
-#### `tf.train.LooperThread.start()` {#LooperThread.start}
-
-Start the thread's activity.
-
-It must be called at most once per thread object. It arranges for the
-object's run() method to be invoked in a separate thread of control.
+### `tf.train.write_graph(graph_or_graph_def, logdir, name, as_text=True)` {#write_graph}
-This method will raise a RuntimeError if called more than once on the
-same thread object.
+Writes a graph proto to a file.
+The graph is written as a binary proto unless `as_text` is `True`.
-- - -
+```python
+v = tf.Variable(0, name='my_variable')
+sess = tf.Session()
+tf.train.write_graph(sess.graph_def, '/tmp/my-model', 'train.pbtxt')
+```
-#### `tf.train.LooperThread.start_loop()` {#LooperThread.start_loop}
+or
-Called when the thread starts.
+```python
+v = tf.Variable(0, name='my_variable')
+sess = tf.Session()
+tf.train.write_graph(sess.graph, '/tmp/my-model', 'train.pbtxt')
+```
+##### Args:
-- - -
-#### `tf.train.LooperThread.stop_loop()` {#LooperThread.stop_loop}
+* <b>`graph_or_graph_def`</b>: A `Graph` or a `GraphDef` protocol buffer.
+* <b>`logdir`</b>: Directory where to write the graph. This can refer to remote
+ filesystems, such as Google Cloud Storage (GCS).
+* <b>`name`</b>: Filename for the graph.
+* <b>`as_text`</b>: If `True`, writes the graph as an ASCII proto.
-Called when the thread stops.
+##### Returns:
+ The path of the output proto file.
diff --git a/tensorflow/g3doc/tutorials/tflearn/index.md b/tensorflow/g3doc/tutorials/tflearn/index.md
index b6e26ee351..9f6485e30b 100644
--- a/tensorflow/g3doc/tutorials/tflearn/index.md
+++ b/tensorflow/g3doc/tutorials/tflearn/index.md
@@ -202,8 +202,8 @@ The code above first defines the model's feature columns, which specify the data
type for the features in the data set. All the feature data is continuous, so
`tf.contrib.layers.real_valued_column` is the appropriate function to use to
construct the feature columns. There are four features in the data set (sepal
-width, sepal height, petal width, and petal height), so `dimensions` must be set
-accordingly to `4` to hold all the data.
+width, sepal height, petal width, and petal height), so accordingly `dimension`
+must be set to `4` to hold all the data.
Then, the code creates a `DNNClassifier` model using the following arguments:
diff --git a/tensorflow/go/genop/internal/genop.go b/tensorflow/go/genop/internal/genop.go
index 75c111e957..d9ebec0f8c 100644
--- a/tensorflow/go/genop/internal/genop.go
+++ b/tensorflow/go/genop/internal/genop.go
@@ -395,7 +395,7 @@ func goType(tfType string) (string, error) {
case "type":
gotype = "tf.DataType"
case "shape":
- gotype = "[]int64"
+ gotype = "tf.Shape"
case "tensor":
gotype = "tf.Tensor"
case "string":
diff --git a/tensorflow/go/graph.go b/tensorflow/go/graph.go
index 2eb1194610..c0f91ffb30 100644
--- a/tensorflow/go/graph.go
+++ b/tensorflow/go/graph.go
@@ -259,13 +259,38 @@ func setAttr(cdesc *C.TF_OperationDescription, status *status, name string, valu
if err := status.Err(); err != nil {
return fmt.Errorf("bad value for attribute %q: %v", name, err)
}
+ case Shape:
+ ndims, dims := cshape(value)
+ var dimsp *C.int64_t
+ if ndims > 0 {
+ dimsp = &dims[0]
+ }
+ C.TF_SetAttrShape(cdesc, cAttrName, dimsp, ndims)
+ case []Shape:
+ ndims := make([]C.int, len(value))
+ dims := make([][]C.int64_t, len(value))
+ dimsp := make([]*C.int64_t, len(value))
+ for i, s := range value {
+ ndims[i], dims[i] = cshape(s)
+ if ndims[i] > 0 {
+ dimsp[i] = &dims[i][0]
+ }
+ }
+ C.TF_SetAttrShapeList(cdesc, cAttrName, &dimsp[0], &ndims[0], C.int(len(value)))
default:
- // Shapes can be done, but will require that it be
- // distinguishable from []int64. Which is fine, it
- // probably makes sense to define a Shape type anyway,
- // since that should handle partially known shapes as
- // well and hide the special meaning of -1?
return fmt.Errorf("attribute %q has a type (%T) which is not valid for operation attributes", name, value)
}
return nil
}
+
+func cshape(s Shape) (C.int, []C.int64_t) {
+ ndims := C.int(s.NumDimensions())
+ if ndims < 0 {
+ return -1, nil
+ }
+ dims := make([]C.int64_t, ndims)
+ for i, s := range s.dims {
+ dims[i] = C.int64_t(s)
+ }
+ return ndims, dims
+}
diff --git a/tensorflow/go/op/op_test.go b/tensorflow/go/op/op_test.go
new file mode 100644
index 0000000000..eaa27bfcd0
--- /dev/null
+++ b/tensorflow/go/op/op_test.go
@@ -0,0 +1,33 @@
+// Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Tests for the generated code of some operations.
+
+package op
+
+import (
+ "testing"
+
+ tf "github.com/tensorflow/tensorflow/tensorflow/go"
+)
+
+func TestPlaceholder(t *testing.T) {
+ s := NewScope()
+ Placeholder(s.SubScope("x"), tf.Float, PlaceholderShape(tf.MakeShape(-1, 10)))
+ Placeholder(s.SubScope("y"), tf.Float, PlaceholderShape(tf.ScalarShape()))
+ Placeholder(s.SubScope("z"), tf.Float, PlaceholderShape(tf.Shape{}))
+ if _, err := s.Finalize(); err != nil {
+ t.Fatal(err)
+ }
+}
diff --git a/tensorflow/go/operation_test.go b/tensorflow/go/operation_test.go
index 8080515ee9..4c4c960448 100644
--- a/tensorflow/go/operation_test.go
+++ b/tensorflow/go/operation_test.go
@@ -81,6 +81,21 @@ func TestOperationOutputListSize(t *testing.T) {
}
}
+func TestOperationShapeAttribute(t *testing.T) {
+ g := NewGraph()
+ _, err := g.AddOperation(OpSpec{
+ Type: "Placeholder",
+ Attrs: map[string]interface{}{
+ "dtype": Float,
+ "shape": MakeShape(-1, 3),
+ },
+ })
+ if err != nil {
+ t.Fatal(err)
+ }
+ // If and when the API to get attributes is added, check that here.
+}
+
func TestOutputShape(t *testing.T) {
graph := NewGraph()
testdata := []struct {
diff --git a/tensorflow/go/shape.go b/tensorflow/go/shape.go
new file mode 100644
index 0000000000..c48bbf29a3
--- /dev/null
+++ b/tensorflow/go/shape.go
@@ -0,0 +1,102 @@
+// Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tensorflow
+
+import (
+ "fmt"
+ "strings"
+)
+
+// Shape represents the (possibly partially known) shape of a tensor that will
+// be produced by an operation.
+//
+// The zero-value of a Shape represents a shape with an unknown number of
+// dimensions.
+type Shape struct {
+ dims []int64
+}
+
+// ScalarShape returns a Shape representing a scalar.
+func ScalarShape() Shape {
+ return Shape{dims: make([]int64, 0)}
+}
+
+// MakeShape returns a Shape with the provided size of each dimension.
+//
+// A value of -1 implies that the size of the corresponding dimension is not
+// known.
+func MakeShape(shape ...int64) Shape {
+ cpy := make([]int64, len(shape))
+ copy(cpy, shape)
+ return Shape{dims: cpy}
+}
+
+// NumDimensions returns the number of dimensions represented by s, or -1 if
+// unknown.
+func (s Shape) NumDimensions() int {
+ if s.dims == nil {
+ return -1
+ }
+ return len(s.dims)
+}
+
+// Size returns the size of the dim-th dimension of the shape, or -1 if it
+// is unknown.
+//
+// REQUIRES: 0 <= dim < s.NumDimensions()
+func (s Shape) Size(dim int) int64 {
+ if dim < 0 || dim > s.NumDimensions() {
+ return -1
+ }
+ return s.dims[dim]
+}
+
+// IsFullySpecified returns true iff the size of all the dimensions of s are
+// known.
+func (s Shape) IsFullySpecified() bool {
+ if s.dims == nil {
+ return false
+ }
+ for _, size := range s.dims {
+ if size <= 1 {
+ return false
+ }
+ }
+ return true
+}
+
+// ToSlice returns the (possibly partially known) shape represented by s as a
+// slice, or an error if the number of dimensions is not known.
+func (s Shape) ToSlice() ([]int64, error) {
+ if s.dims == nil {
+ return nil, fmt.Errorf("cannot create a slice for a Shape with an unknown number of dimensions")
+ }
+ cpy := make([]int64, len(s.dims))
+ copy(cpy, s.dims)
+ return cpy, nil
+}
+
+func (s Shape) String() string {
+ if s.dims == nil {
+ return "?"
+ }
+ ret := fmt.Sprint(s.dims)
+ for _, size := range s.dims {
+ if size < 0 {
+ ret = strings.Replace(ret, fmt.Sprint(size), "?", 1)
+ }
+ }
+ return strings.Replace(ret, " ", ", ", -1)
+}
diff --git a/tensorflow/go/shape_test.go b/tensorflow/go/shape_test.go
new file mode 100644
index 0000000000..f8f3d4e94b
--- /dev/null
+++ b/tensorflow/go/shape_test.go
@@ -0,0 +1,83 @@
+// Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tensorflow
+
+import (
+ "fmt"
+ "reflect"
+ "testing"
+)
+
+func TestShape(t *testing.T) {
+ tests := []struct {
+ shape Shape
+ slice []int64
+ full bool
+ str string
+ }{
+ {
+ shape: ScalarShape(),
+ slice: make([]int64, 0),
+ full: true,
+ str: "[]",
+ },
+ {
+ shape: MakeShape(-1, 2, -1, 4),
+ slice: []int64{-1, 2, -1, 4},
+ full: false,
+ str: "[?, 2, ?, 4]",
+ },
+ {
+ shape: MakeShape(2, 3),
+ slice: []int64{2, 3},
+ full: true,
+ str: "[2, 3]",
+ },
+ }
+ for _, test := range tests {
+ t.Run(fmt.Sprintf("%#v", test.shape), func(t *testing.T) {
+ if got, want := test.shape.NumDimensions(), len(test.slice); got != want {
+ t.Errorf("Got %v, want %v", got, want)
+ }
+ if gotSlice, err := test.shape.ToSlice(); err != nil || !reflect.DeepEqual(gotSlice, test.slice) {
+ t.Errorf("Got (%#v, %v), want (%#v, nil)", gotSlice, err, test.slice)
+ }
+ if got, want := test.shape.IsFullySpecified(), test.full; got != want {
+ t.Errorf("Got %v, want %v", got, want)
+ }
+ if got, want := test.shape.String(), test.str; got != want {
+ t.Errorf("Got %v, want %v", got, want)
+ }
+ })
+ }
+
+}
+
+func TestZeroShape(t *testing.T) {
+ var s Shape
+ if s.NumDimensions() != -1 {
+ t.Error(s.NumDimensions())
+ }
+ if _, err := s.ToSlice(); err == nil {
+ t.Error("ToSlice() on a Shape of unknown number of dimensions should fail")
+ }
+ if s.IsFullySpecified() {
+ t.Error("Shape of unknown number of dimensions should not be fully specified")
+ }
+ if got, want := s.String(), "?"; got != want {
+ t.Errorf("Got %q, want %q", got, want)
+ }
+
+}
diff --git a/tensorflow/java/src/test/java/org/tensorflow/OperationBuilderTest.java b/tensorflow/java/src/test/java/org/tensorflow/OperationBuilderTest.java
index b13f830631..cb3de5f744 100644
--- a/tensorflow/java/src/test/java/org/tensorflow/OperationBuilderTest.java
+++ b/tensorflow/java/src/test/java/org/tensorflow/OperationBuilderTest.java
@@ -17,6 +17,7 @@ package org.tensorflow;
import static org.junit.Assert.fail;
+import org.junit.Ignore;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@@ -26,7 +27,8 @@ import org.junit.runners.JUnit4;
public class OperationBuilderTest {
// TODO(ashankar): Restore this test once the C API gracefully handles mixing graphs and
// operations instead of segfaulting.
- // @Test
+ @Test
+ @Ignore
public void failWhenMixingOperationsOnDifferentGraphs() {
try (Graph g1 = new Graph();
Graph g2 = new Graph()) {
diff --git a/tensorflow/python/client/session.py b/tensorflow/python/client/session.py
index c1a8191def..248d4c9b81 100644
--- a/tensorflow/python/client/session.py
+++ b/tensorflow/python/client/session.py
@@ -1308,7 +1308,12 @@ class InteractiveSession(BaseSession):
config: (Optional) `ConfigProto` proto used to configure the session.
"""
if not config:
- config = config_pb2.ConfigProto()
+ # If config is not provided, choose some reasonable defaults for
+ # interactive use:
+ #
+ # - Grow GPU memory as needed at the cost of fragmentation.
+ gpu_options = config_pb2.GPUOptions(allow_growth=True)
+ config = config_pb2.ConfigProto(gpu_options=gpu_options)
# Interactive sessions always place pruned graphs.
config.graph_options.place_pruned_graph = True
diff --git a/tensorflow/python/debug/BUILD b/tensorflow/python/debug/BUILD
index 0aa5ce0a60..9ad8a1121f 100644
--- a/tensorflow/python/debug/BUILD
+++ b/tensorflow/python/debug/BUILD
@@ -592,6 +592,18 @@ py_test(
],
)
+sh_test(
+ name = "examples_test",
+ size = "small",
+ srcs = ["examples/examples_test.sh"],
+ data = [
+ ":debug_errors",
+ ":debug_fibonacci",
+ ":debug_mnist",
+ ":debug_tflearn_iris",
+ ],
+)
+
filegroup(
name = "all_files",
srcs = glob(
diff --git a/tensorflow/python/debug/examples/debug_fibonacci.py b/tensorflow/python/debug/examples/debug_fibonacci.py
index 14722ecd08..6fdc78b605 100644
--- a/tensorflow/python/debug/examples/debug_fibonacci.py
+++ b/tensorflow/python/debug/examples/debug_fibonacci.py
@@ -45,7 +45,7 @@ def main(_):
sess.run(tf.global_variables_initializer())
# Wrap the TensorFlow Session object for debugging.
- sess = tf_debug.LocalCLIDebugWrapperSession(sess)
+ sess = tf_debug.LocalCLIDebugWrapperSession(sess, ui_type=FLAGS.ui_type)
sess.run(n1)
@@ -66,5 +66,10 @@ if __name__ == "__main__":
type=int,
default=20,
help="Length of the fibonacci sequence to compute.")
+ parser.add_argument(
+ "--ui_type",
+ type=str,
+ default="curses",
+ help="Command-line user interface type (curses | readline)")
FLAGS, unparsed = parser.parse_known_args()
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
diff --git a/tensorflow/python/debug/examples/debug_mnist.py b/tensorflow/python/debug/examples/debug_mnist.py
index d8195a6847..73d398c086 100644
--- a/tensorflow/python/debug/examples/debug_mnist.py
+++ b/tensorflow/python/debug/examples/debug_mnist.py
@@ -41,11 +41,14 @@ RAND_SEED = 42
def main(_):
# Import data
- mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
+ mnist = input_data.read_data_sets(FLAGS.data_dir,
+ one_hot=True,
+ fake_data=FLAGS.fake_data)
def feed_dict(train):
- if train:
- xs, ys = mnist.train.next_batch(FLAGS.train_batch_size, fake_data=False)
+ if train or FLAGS.fake_data:
+ xs, ys = mnist.train.next_batch(FLAGS.train_batch_size,
+ fake_data=FLAGS.fake_data)
else:
xs, ys = mnist.test.images, mnist.test.labels
@@ -157,6 +160,13 @@ if __name__ == "__main__":
default="curses",
help="Command-line user interface type (curses | readline)")
parser.add_argument(
+ "--fake_data",
+ type="bool",
+ nargs="?",
+ const=True,
+ default=False,
+ help="Use fake MNIST data for unit testing")
+ parser.add_argument(
"--debug",
type="bool",
nargs="?",
diff --git a/tensorflow/python/debug/examples/debug_tflearn_iris.py b/tensorflow/python/debug/examples/debug_tflearn_iris.py
index 009885b9ea..57ebba689d 100644
--- a/tensorflow/python/debug/examples/debug_tflearn_iris.py
+++ b/tensorflow/python/debug/examples/debug_tflearn_iris.py
@@ -80,15 +80,22 @@ def iris_input_fn():
def main(_):
- training_data_path, test_data_path = maybe_download_data(FLAGS.data_dir)
-
# Load datasets.
- training_set = tf.contrib.learn.datasets.base.load_csv_with_header(
- filename=training_data_path,
- target_dtype=np.int,
- features_dtype=np.float32)
- test_set = tf.contrib.learn.datasets.base.load_csv_with_header(
- filename=test_data_path, target_dtype=np.int, features_dtype=np.float32)
+ if FLAGS.fake_data:
+ training_set = tf.contrib.learn.datasets.base.Dataset(
+ np.random.random([120, 4]),
+ np.random.random_integers(3, size=[120]) - 1)
+ test_set = tf.contrib.learn.datasets.base.Dataset(
+ np.random.random([30, 4]),
+ np.random.random_integers(3, size=[30]) - 1)
+ else:
+ training_data_path, test_data_path = maybe_download_data(FLAGS.data_dir)
+ training_set = tf.contrib.learn.datasets.base.load_csv_with_header(
+ filename=training_data_path,
+ target_dtype=np.int,
+ features_dtype=np.float32)
+ test_set = tf.contrib.learn.datasets.base.load_csv_with_header(
+ filename=test_data_path, target_dtype=np.int, features_dtype=np.float32)
# Specify that all features have real-value data
feature_columns = [tf.contrib.layers.real_valued_column("", dimension=4)]
@@ -102,8 +109,11 @@ def main(_):
n_classes=3,
model_dir=model_dir)
- hooks = ([tf_debug.LocalCLIDebugHook(ui_type=FLAGS.ui_type)] if FLAGS.debug
- else None)
+ hooks = None
+ if FLAGS.debug:
+ debug_hook = tf_debug.LocalCLIDebugHook(ui_type=FLAGS.ui_type)
+ debug_hook.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
+ hooks = [debug_hook]
if not FLAGS.use_experiment:
# Fit model.
@@ -163,6 +173,13 @@ if __name__ == "__main__":
default="curses",
help="Command-line user interface type (curses | readline)")
parser.add_argument(
+ "--fake_data",
+ type="bool",
+ nargs="?",
+ const=True,
+ default=False,
+ help="Use fake MNIST data for unit testing")
+ parser.add_argument(
"--debug",
type="bool",
nargs="?",
diff --git a/tensorflow/python/debug/examples/examples_test.sh b/tensorflow/python/debug/examples/examples_test.sh
new file mode 100755
index 0000000000..397078b91d
--- /dev/null
+++ b/tensorflow/python/debug/examples/examples_test.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Bash unit tests for TensorFlow Debugger (tfdbg) Python examples that do not
+# involve downloading data.
+
+set -e
+
+
+DEBUG_FIBONACCI_BIN="$TEST_SRCDIR/org_tensorflow/tensorflow/python/debug/debug_fibonacci"
+
+# Override the default ui_type=curses to allow the test to pass in a tty-less
+# test environment.
+cat << EOF | "${DEBUG_FIBONACCI_BIN}" --ui_type=readline
+run
+exit
+EOF
+
+
+DEBUG_ERRORS_BIN="$TEST_SRCDIR/org_tensorflow/tensorflow/python/debug/debug_errors"
+
+cat << EOF | "${DEBUG_ERRORS_BIN}" --error=no_error --ui_type=readline
+run
+exit
+EOF
+
+
+DEBUG_MNIST_BIN="$TEST_SRCDIR/org_tensorflow/tensorflow/python/debug/debug_mnist"
+
+# Use a large enough "run -t" number to let the process end properly.
+cat << EOF | "${DEBUG_MNIST_BIN}" --debug --fake_data --ui_type=readline
+run -f has_inf_or_nan
+run -t 1000
+EOF
+
+
+DEBUG_TFLEARN_IRIS_BIN="$TEST_SRCDIR/org_tensorflow/tensorflow/python/debug/debug_tflearn_iris"
+
+cat << EOF | "${DEBUG_TFLEARN_IRIS_BIN}" --debug --fake_data --train_steps=2 --ui_type=readline
+run -f has_inf_or_nan
+EOF
diff --git a/tensorflow/python/debug/wrappers/hooks.py b/tensorflow/python/debug/wrappers/hooks.py
index cda2becc6e..30f0e117e6 100644
--- a/tensorflow/python/debug/wrappers/hooks.py
+++ b/tensorflow/python/debug/wrappers/hooks.py
@@ -44,6 +44,28 @@ class LocalCLIDebugHook(session_run_hook.SessionRunHook,
self._ui_type = ui_type
self._wrapper_initialized = False
+ self._pending_tensor_filters = {}
+
+ def add_tensor_filter(self, filter_name, tensor_filter):
+ """Add a tensor filter.
+
+ See doc of `LocalCLIDebugWrapperSession.add_tensor_filter()` for details.
+ Override default behavior to accomodate the possibility of this method being
+ called prior to the initialization of the underlying
+ `LocalCLIDebugWrapperSession` object.
+
+ Args:
+ filter_name: See doc of `LocalCLIDebugWrapperSession.add_tensor_filter()`
+ for details.
+ tensor_filter: See doc of
+ `LocalCLIDebugWrapperSession.add_tensor_filter()` for details.
+ """
+
+ if self._wrapper_initialized:
+ local_cli_wrapper.LocalCLIDebugWrapperSession.add_tensor_filter(
+ self, filter_name, tensor_filter)
+ else:
+ self._pending_tensor_filters[filter_name] = tensor_filter
def begin(self):
pass
@@ -52,6 +74,13 @@ class LocalCLIDebugHook(session_run_hook.SessionRunHook,
if not self._wrapper_initialized:
local_cli_wrapper.LocalCLIDebugWrapperSession.__init__(
self, run_context.session, ui_type=self._ui_type)
+
+ # Actually register tensor filters registered prior to the construction
+ # of the underlying LocalCLIDebugWrapperSession object.
+ for filter_name in self._pending_tensor_filters:
+ local_cli_wrapper.LocalCLIDebugWrapperSession.add_tensor_filter(
+ self, filter_name, self._pending_tensor_filters[filter_name])
+
self._wrapper_initialized = True
# Increment run call counter.
diff --git a/tensorflow/python/framework/meta_graph.py b/tensorflow/python/framework/meta_graph.py
index 49406eedf3..cc33c20f87 100644
--- a/tensorflow/python/framework/meta_graph.py
+++ b/tensorflow/python/framework/meta_graph.py
@@ -476,7 +476,8 @@ def import_scoped_meta_graph(meta_graph_or_file,
sorted(input_map)):
raise ValueError("Graph contains unbound inputs: %s. Must "
"provide these inputs through input_map." %
- ",".join([compat.as_str(v) for v in field.value]))
+ ",".join([compat.as_str(v) for v in field.value
+ if not input_map or v not in input_map]))
break
# Sets graph to default graph if it's not passed in.
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index f4c3dcf99f..13b6923c3c 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -342,6 +342,18 @@ tf_py_test(
)
tf_py_test(
+ name = "record_input_test",
+ size = "small",
+ srcs = ["record_input_test.py"],
+ additional_deps = [
+ "//tensorflow/python:client_testlib",
+ "//tensorflow/python:data_flow_ops",
+ "//tensorflow/python:io_ops",
+ "//tensorflow/python:util",
+ ],
+)
+
+tf_py_test(
name = "io_ops_test",
size = "small",
srcs = ["io_ops_test.py"],
diff --git a/tensorflow/python/kernel_tests/argmax_op_test.py b/tensorflow/python/kernel_tests/argmax_op_test.py
index ac9a78d0fa..a5352561aa 100644
--- a/tensorflow/python/kernel_tests/argmax_op_test.py
+++ b/tensorflow/python/kernel_tests/argmax_op_test.py
@@ -90,6 +90,12 @@ class ArgMaxTest(test.TestCase):
r"Reduction axis 0 is empty in shape \[0\]"):
op([], 0).eval()
+ def testDefaultAxis(self):
+ with self.test_session():
+ for op in math_ops.argmin, math_ops.argmax:
+ ans = op([1]).eval()
+ self.assertAllEqual(ans, 0)
+
if __name__ == "__main__":
test.main()
diff --git a/tensorflow/python/kernel_tests/confusion_matrix_test.py b/tensorflow/python/kernel_tests/confusion_matrix_test.py
index cf88209148..2d116df2ff 100644
--- a/tensorflow/python/kernel_tests/confusion_matrix_test.py
+++ b/tensorflow/python/kernel_tests/confusion_matrix_test.py
@@ -22,6 +22,7 @@ import numpy as np
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors_impl
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import confusion_matrix
from tensorflow.python.ops import math_ops
@@ -215,5 +216,239 @@ class ConfusionMatrixTest(test.TestCase):
self.assertEqual(tf_cm.dtype, np.int64)
+class RemoveSqueezableDimensionsTest(test.TestCase):
+
+ def testBothScalarShape(self):
+ label_values = 1.0
+ prediction_values = 0.0
+ static_labels, static_predictions = (
+ confusion_matrix.remove_squeezable_dimensions(
+ label_values, prediction_values))
+
+ labels_placeholder = array_ops.placeholder(dtype=dtypes.float32)
+ predictions_placeholder = array_ops.placeholder(dtype=dtypes.float32)
+ dynamic_labels, dynamic_predictions = (
+ confusion_matrix.remove_squeezable_dimensions(
+ labels_placeholder, predictions_placeholder))
+
+ with self.test_session():
+ self.assertAllEqual(label_values, static_labels.eval())
+ self.assertAllEqual(prediction_values, static_predictions.eval())
+ feed_dict = {
+ labels_placeholder: label_values,
+ predictions_placeholder: prediction_values
+ }
+ self.assertAllEqual(
+ label_values, dynamic_labels.eval(feed_dict=feed_dict))
+ self.assertAllEqual(
+ prediction_values, dynamic_predictions.eval(feed_dict=feed_dict))
+
+ def testSameShape(self):
+ label_values = np.ones(shape=(2, 3, 1))
+ prediction_values = np.zeros_like(label_values)
+ static_labels, static_predictions = (
+ confusion_matrix.remove_squeezable_dimensions(
+ label_values, prediction_values))
+
+ labels_placeholder = array_ops.placeholder(dtype=dtypes.int32)
+ predictions_placeholder = array_ops.placeholder(dtype=dtypes.int32)
+ dynamic_labels, dynamic_predictions = (
+ confusion_matrix.remove_squeezable_dimensions(
+ labels_placeholder, predictions_placeholder))
+
+ with self.test_session():
+ self.assertAllEqual(label_values, static_labels.eval())
+ self.assertAllEqual(prediction_values, static_predictions.eval())
+ feed_dict = {
+ labels_placeholder: label_values,
+ predictions_placeholder: prediction_values
+ }
+ self.assertAllEqual(
+ label_values, dynamic_labels.eval(feed_dict=feed_dict))
+ self.assertAllEqual(
+ prediction_values, dynamic_predictions.eval(feed_dict=feed_dict))
+
+ def testSameShapeExpectedRankDiff0(self):
+ label_values = np.ones(shape=(2, 3, 1))
+ prediction_values = np.zeros_like(label_values)
+ static_labels, static_predictions = (
+ confusion_matrix.remove_squeezable_dimensions(
+ label_values, prediction_values, expected_rank_diff=0))
+
+ labels_placeholder = array_ops.placeholder(dtype=dtypes.int32)
+ predictions_placeholder = array_ops.placeholder(dtype=dtypes.int32)
+ dynamic_labels, dynamic_predictions = (
+ confusion_matrix.remove_squeezable_dimensions(
+ labels_placeholder, predictions_placeholder, expected_rank_diff=0))
+
+ with self.test_session():
+ self.assertAllEqual(label_values, static_labels.eval())
+ self.assertAllEqual(prediction_values, static_predictions.eval())
+ feed_dict = {
+ labels_placeholder: label_values,
+ predictions_placeholder: prediction_values
+ }
+ self.assertAllEqual(
+ label_values, dynamic_labels.eval(feed_dict=feed_dict))
+ self.assertAllEqual(
+ prediction_values, dynamic_predictions.eval(feed_dict=feed_dict))
+
+ def testSqueezableLabels(self):
+ label_values = np.ones(shape=(2, 3, 1))
+ prediction_values = np.zeros(shape=(2, 3))
+ static_labels, static_predictions = (
+ confusion_matrix.remove_squeezable_dimensions(
+ label_values, prediction_values))
+
+ labels_placeholder = array_ops.placeholder(dtype=dtypes.int32)
+ predictions_placeholder = array_ops.placeholder(dtype=dtypes.int32)
+ dynamic_labels, dynamic_predictions = (
+ confusion_matrix.remove_squeezable_dimensions(
+ labels_placeholder, predictions_placeholder))
+
+ expected_label_values = np.reshape(label_values, newshape=(2, 3))
+ with self.test_session():
+ self.assertAllEqual(expected_label_values, static_labels.eval())
+ self.assertAllEqual(prediction_values, static_predictions.eval())
+ feed_dict = {
+ labels_placeholder: label_values,
+ predictions_placeholder: prediction_values
+ }
+ self.assertAllEqual(
+ expected_label_values, dynamic_labels.eval(feed_dict=feed_dict))
+ self.assertAllEqual(
+ prediction_values, dynamic_predictions.eval(feed_dict=feed_dict))
+
+ def testSqueezableLabelsExpectedRankDiffPlus1(self):
+ label_values = np.ones(shape=(2, 3, 1))
+ prediction_values = np.zeros(shape=(2, 3, 5))
+ static_labels, static_predictions = (
+ confusion_matrix.remove_squeezable_dimensions(
+ label_values, prediction_values, expected_rank_diff=1))
+
+ labels_placeholder = array_ops.placeholder(dtype=dtypes.int32)
+ predictions_placeholder = array_ops.placeholder(dtype=dtypes.int32)
+ dynamic_labels, dynamic_predictions = (
+ confusion_matrix.remove_squeezable_dimensions(
+ labels_placeholder, predictions_placeholder, expected_rank_diff=1))
+
+ expected_label_values = np.reshape(label_values, newshape=(2, 3))
+ with self.test_session():
+ self.assertAllEqual(expected_label_values, static_labels.eval())
+ self.assertAllEqual(prediction_values, static_predictions.eval())
+ feed_dict = {
+ labels_placeholder: label_values,
+ predictions_placeholder: prediction_values
+ }
+ self.assertAllEqual(
+ expected_label_values, dynamic_labels.eval(feed_dict=feed_dict))
+ self.assertAllEqual(
+ prediction_values, dynamic_predictions.eval(feed_dict=feed_dict))
+
+ def testSqueezablePredictions(self):
+ label_values = np.ones(shape=(2, 3))
+ prediction_values = np.zeros(shape=(2, 3, 1))
+ static_labels, static_predictions = (
+ confusion_matrix.remove_squeezable_dimensions(
+ label_values, prediction_values))
+
+ labels_placeholder = array_ops.placeholder(dtype=dtypes.int32)
+ predictions_placeholder = array_ops.placeholder(dtype=dtypes.int32)
+ dynamic_labels, dynamic_predictions = (
+ confusion_matrix.remove_squeezable_dimensions(
+ labels_placeholder, predictions_placeholder))
+
+ expected_prediction_values = np.reshape(prediction_values, newshape=(2, 3))
+ with self.test_session():
+ self.assertAllEqual(label_values, static_labels.eval())
+ self.assertAllEqual(expected_prediction_values, static_predictions.eval())
+ feed_dict = {
+ labels_placeholder: label_values,
+ predictions_placeholder: prediction_values
+ }
+ self.assertAllEqual(
+ label_values, dynamic_labels.eval(feed_dict=feed_dict))
+ self.assertAllEqual(
+ expected_prediction_values,
+ dynamic_predictions.eval(feed_dict=feed_dict))
+
+ def testSqueezablePredictionsExpectedRankDiffMinus1(self):
+ label_values = np.ones(shape=(2, 3, 5))
+ prediction_values = np.zeros(shape=(2, 3, 1))
+ static_labels, static_predictions = (
+ confusion_matrix.remove_squeezable_dimensions(
+ label_values, prediction_values, expected_rank_diff=-1))
+
+ labels_placeholder = array_ops.placeholder(dtype=dtypes.int32)
+ predictions_placeholder = array_ops.placeholder(dtype=dtypes.int32)
+ dynamic_labels, dynamic_predictions = (
+ confusion_matrix.remove_squeezable_dimensions(
+ labels_placeholder, predictions_placeholder, expected_rank_diff=-1))
+
+ expected_prediction_values = np.reshape(prediction_values, newshape=(2, 3))
+ with self.test_session():
+ self.assertAllEqual(label_values, static_labels.eval())
+ self.assertAllEqual(expected_prediction_values, static_predictions.eval())
+ feed_dict = {
+ labels_placeholder: label_values,
+ predictions_placeholder: prediction_values
+ }
+ self.assertAllEqual(
+ label_values, dynamic_labels.eval(feed_dict=feed_dict))
+ self.assertAllEqual(
+ expected_prediction_values,
+ dynamic_predictions.eval(feed_dict=feed_dict))
+
+ def testUnsqueezableLabels(self):
+ label_values = np.ones(shape=(2, 3, 2))
+ prediction_values = np.zeros(shape=(2, 3))
+ with self.assertRaisesRegexp(ValueError, r"Can not squeeze dim\[2\]"):
+ confusion_matrix.remove_squeezable_dimensions(
+ label_values, prediction_values)
+
+ labels_placeholder = array_ops.placeholder(dtype=dtypes.int32)
+ predictions_placeholder = array_ops.placeholder(dtype=dtypes.int32)
+ dynamic_labels, dynamic_predictions = (
+ confusion_matrix.remove_squeezable_dimensions(
+ labels_placeholder, predictions_placeholder))
+
+ with self.test_session():
+ feed_dict = {
+ labels_placeholder: label_values,
+ predictions_placeholder: prediction_values
+ }
+ with self.assertRaisesRegexp(
+ errors_impl.InvalidArgumentError,
+ "Tried to explicitly squeeze dimension 2"):
+ dynamic_labels.eval(feed_dict=feed_dict)
+ self.assertAllEqual(
+ prediction_values, dynamic_predictions.eval(feed_dict=feed_dict))
+
+ def testUnsqueezablePredictions(self):
+ label_values = np.ones(shape=(2, 3))
+ prediction_values = np.zeros(shape=(2, 3, 2))
+ with self.assertRaisesRegexp(ValueError, r"Can not squeeze dim\[2\]"):
+ confusion_matrix.remove_squeezable_dimensions(
+ label_values, prediction_values)
+
+ labels_placeholder = array_ops.placeholder(dtype=dtypes.int32)
+ predictions_placeholder = array_ops.placeholder(dtype=dtypes.int32)
+ dynamic_labels, dynamic_predictions = (
+ confusion_matrix.remove_squeezable_dimensions(
+ labels_placeholder, predictions_placeholder))
+
+ with self.test_session():
+ feed_dict = {
+ labels_placeholder: label_values,
+ predictions_placeholder: prediction_values
+ }
+ self.assertAllEqual(
+ label_values, dynamic_labels.eval(feed_dict=feed_dict))
+ with self.assertRaisesRegexp(
+ errors_impl.InvalidArgumentError,
+ "Tried to explicitly squeeze dimension 2"):
+ dynamic_predictions.eval(feed_dict=feed_dict)
+
+
if __name__ == "__main__":
test.main()
diff --git a/tensorflow/python/kernel_tests/losses_test.py b/tensorflow/python/kernel_tests/losses_test.py
index 125d353df3..f3ae092b6f 100644
--- a/tensorflow/python/kernel_tests/losses_test.py
+++ b/tensorflow/python/kernel_tests/losses_test.py
@@ -72,7 +72,7 @@ class AbsoluteDifferenceLossTest(test.TestCase):
self.assertAlmostEqual(5.5 * weights, loss.eval(), 3)
def testNonZeroLossWithOneDimBatchSpecificWeights(self):
- weights = constant_op.constant([1.2, 0.0], shape=[2,])
+ weights = constant_op.constant((1.2, 0.0), shape=(2, 1))
loss = losses.absolute_difference(self._labels, self._predictions, weights)
with self.test_session():
self.assertAlmostEqual(5.6, loss.eval(), 3)
@@ -154,7 +154,7 @@ class SoftmaxCrossEntropyLossTest(test.TestCase):
logits = constant_op.constant([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0],
[0.0, 0.0, 10.0]])
labels = constant_op.constant([[0, 0, 1], [1, 0, 0], [0, 1, 0]])
- weights = constant_op.constant([1.2, 3.4, 5.6], shape=[3])
+ weights = constant_op.constant((1.2, 3.4, 5.6))
with self.test_session():
loss = losses.softmax_cross_entropy(labels, logits, weights)
self.assertAlmostEqual((1.2 + 3.4 + 5.6) * 10.0 / 3.0, loss.eval(), 3)
@@ -296,8 +296,6 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase):
constant_op.constant(weights))
self.assertAlmostEqual(weights * 10.0, loss.eval(), 3)
- # TODO(b/33556118): Bug: this should be averaged across all dimensions, not
- # summed across dim 0.
def testNonZeroLossWith1DTensorWeight(self):
logits = constant_op.constant([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0],
[0.0, 0.0, 10.0]])
@@ -305,25 +303,25 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase):
weights = 2.3
with self.test_session():
loss = losses.sparse_softmax_cross_entropy(
- labels, logits, constant_op.constant(weights, shape=(1,)))
- self.assertAlmostEqual(weights * 3.0 * 10.0, loss.eval(), 2)
+ labels, logits, constant_op.constant((weights,)))
+ self.assertAlmostEqual(weights * 10.0, loss.eval(), 3)
def testNonZeroLossWithPlaceholderForWeights(self):
logits = constant_op.constant([[10.0, 0.0, 0.0],
[0.0, 10.0, 0.0],
[0.0, 0.0, 10.0]])
labels = constant_op.constant([[2], [0], [1]])
- weights = array_ops.placeholder(dtypes.float32, shape=(None,))
+ weights = array_ops.placeholder(dtypes.float32)
with self.test_session() as sess:
loss = losses.sparse_softmax_cross_entropy(labels, logits, weights)
loss_val = sess.run(loss,
- feed_dict={weights: [1.2, 3.4, 5.6]})
+ feed_dict={weights: ((1.2,), (3.4,), (5.6,))})
self.assertAlmostEqual((1.2 + 3.4 + 5.6) * 10.0 / 3.0, loss_val, 3)
def testNonZeroLossWithPlaceholderForLogitsLabelsAndWeights(self):
logits = array_ops.placeholder(dtypes.float32, shape=(None, 3))
labels = array_ops.placeholder(dtypes.int32, shape=(None, 1))
- weights = array_ops.placeholder(dtypes.float32, shape=(None,))
+ weights = array_ops.placeholder(dtypes.float32)
with self.test_session() as sess:
loss = losses.sparse_softmax_cross_entropy(labels, logits, weights)
loss_val = sess.run(loss,
@@ -332,7 +330,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase):
[0.0, 10.0, 0.0],
[0.0, 0.0, 10.0]],
labels: [[2], [0], [1]],
- weights: [1.2, 3.4, 5.6],
+ weights: ((1.2,), (3.4,), (5.6,)),
})
self.assertAlmostEqual((1.2 + 3.4 + 5.6) * 10.0 / 3.0, loss_val, 3)
@@ -340,7 +338,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase):
logits = constant_op.constant([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0],
[0.0, 0.0, 10.0]])
labels = constant_op.constant([[2], [0], [1]])
- weights = constant_op.constant([1.2, 3.4, 5.6], shape=[3])
+ weights = constant_op.constant([1.2, 3.4, 5.6], shape=(3, 1))
with self.test_session():
loss = losses.sparse_softmax_cross_entropy(labels, logits, weights)
self.assertAlmostEqual((1.2 + 3.4 + 5.6) * 10.0 / 3.0, loss.eval(), 3)
@@ -358,7 +356,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase):
logits = constant_op.constant([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0],
[0.0, 0.0, 10.0]])
labels = constant_op.constant([[2], [0], [1]])
- weights = constant_op.constant([0, 0, 0], shape=[3])
+ weights = constant_op.constant([0, 0, 0], shape=(3, 1))
with self.test_session():
loss = losses.sparse_softmax_cross_entropy(labels, logits, weights)
self.assertAlmostEqual(0.0, loss.eval(), 3)
@@ -367,7 +365,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase):
logits = constant_op.constant([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0],
[0.0, 0.0, 10.0]])
labels = constant_op.constant([[2], [0], [1]])
- weights = constant_op.constant([1.2, 0, 0], shape=[3])
+ weights = constant_op.constant([1.2, 0, 0], shape=(3, 1))
with self.test_session():
loss = losses.sparse_softmax_cross_entropy(labels, logits, weights)
self.assertAlmostEqual(12.0, loss.eval(), 3)
@@ -432,9 +430,9 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase):
[-100.0, -100.0, 100.0, -100.0],
[-100.0, -100.0, -100.0, 100.0]])
labels = constant_op.constant([[0, 1], [2, 3]])
- weights = constant_op.constant([1.2, 3.4, 5.6, 7.8])
+ weights = constant_op.constant(1.2)
- with self.assertRaises(errors_impl.InvalidArgumentError):
+ with self.assertRaisesRegexp(ValueError, 'dimension'):
losses.sparse_softmax_cross_entropy(
labels, logits, weights=weights).eval()
@@ -629,7 +627,7 @@ class LogLossTest(test.TestCase):
loss, 3)
def testNonZeroLossWithOneDimBatchSpecificWeights(self):
- weights = constant_op.constant([1.2, 3.4], shape=[2])
+ weights = constant_op.constant((1.2, 3.4), shape=(2, 1))
expected_losses = np.multiply(
self._expected_losses,
np.asarray([1.2, 1.2, 1.2, 3.4, 3.4, 3.4]).reshape((2, 3)))
@@ -638,7 +636,7 @@ class LogLossTest(test.TestCase):
self.assertAlmostEqual(-np.sum(expected_losses) / 6.0, loss.eval(), 3)
def testNonZeroLossWithOneDimBatchSpecificWeightsSomeZero(self):
- weights = constant_op.constant([1.2, 0], shape=[2])
+ weights = constant_op.constant((1.2, 0), shape=(2, 1))
expected_losses = np.multiply(self._expected_losses,
np.asarray([1.2, 1.2, 1.2, 0, 0, 0]).reshape(
(2, 3)))
@@ -797,7 +795,7 @@ class MeanSquaredErrorTest(test.TestCase):
self.assertAlmostEqual(49.5 * weights, loss.eval(), 3)
def testNonZeroLossWithOneDimBatchSpecificWeights(self):
- weights = constant_op.constant([1.2, 3.4], shape=[2,])
+ weights = constant_op.constant([1.2, 3.4], shape=(2, 1))
loss = losses.mean_squared_error(self._labels, self._predictions, weights)
with self.test_session():
self.assertAlmostEqual(767.8 / 6.0, loss.eval(), 3)
@@ -855,7 +853,7 @@ class MeanPairwiseSquaredErrorTest(test.TestCase):
labels=constant_op.constant(self._labels),
weights=None)
- def _test_mean_pairwise_squared_error(
+ def _test_valid_weights(
self, labels, predictions, expected_loss, weights=1.0):
with self.test_session():
static_inputs_op = losses.mean_pairwise_squared_error(
@@ -881,11 +879,11 @@ class MeanPairwiseSquaredErrorTest(test.TestCase):
expected_loss, dynamic_inputs_op.eval(feed_dict=feed_dict), places=3)
def testAllCorrectNoLossWeight(self):
- self._test_mean_pairwise_squared_error(
+ self._test_valid_weights(
self._labels, self._labels, expected_loss=0.0)
def testNonZeroLoss(self):
- self._test_mean_pairwise_squared_error(
+ self._test_valid_weights(
self._labels, self._predictions,
expected_loss=np.sum(self._expected_losses))
@@ -916,7 +914,7 @@ class MeanPairwiseSquaredErrorTest(test.TestCase):
def testNonZeroLossWithPythonScalarWeight(self):
weight = 2.3
- self._test_mean_pairwise_squared_error(
+ self._test_valid_weights(
self._labels, self._predictions,
expected_loss=weight * np.sum(self._expected_losses),
weights=weight)
@@ -932,16 +930,9 @@ class MeanPairwiseSquaredErrorTest(test.TestCase):
loss.eval(), 3)
def testNonZeroLossWithScalarZeroWeight(self):
- self._test_mean_pairwise_squared_error(
+ self._test_valid_weights(
self._labels, self._predictions, expected_loss=0.0, weights=0.0)
- def testNonZeroLossWithOneDimBatchSpecificWeights(self):
- weights = np.asarray((1.2, 3.4))
- self._test_mean_pairwise_squared_error(
- self._labels, self._predictions,
- expected_loss=np.sum(np.multiply(weights, self._expected_losses)),
- weights=weights)
-
def test3d(self):
labels = np.array([
[[1, 9, 2], [12, 11, 10], [9, 8, 7]],
@@ -951,7 +942,7 @@ class MeanPairwiseSquaredErrorTest(test.TestCase):
[[4, 8, 12], [1, 2, 3], [4, 5, 6]],
[[8, 1, 3], [7, 8, 9], [10, 11, 12]],
])
- self._test_mean_pairwise_squared_error(
+ self._test_valid_weights(
labels, predictions, expected_loss=122.22222)
def test3dWeightedScalar(self):
@@ -964,11 +955,36 @@ class MeanPairwiseSquaredErrorTest(test.TestCase):
[[8, 1, 3], [7, 8, 9], [10, 11, 12]],
])
weight = 3.0
- self._test_mean_pairwise_squared_error(
+ self._test_valid_weights(
labels, predictions, expected_loss=weight * 122.22222,
weights=weight)
- def test3dWeighted2x0(self):
+ def _test_invalid_weights(
+ self, labels, predictions, weights=1.0):
+ expected_error_msg = 'weights can not be broadcast to values'
+
+ # Static check.
+ with self.assertRaisesRegexp(ValueError, expected_error_msg):
+ losses.mean_pairwise_squared_error(
+ predictions=predictions, labels=labels, weights=weights)
+
+ # Dynamic check.
+ predictions_placeholder = array_ops.placeholder(dtypes.float32)
+ labels_placeholder = array_ops.placeholder(dtypes.int32)
+ weights_placeholder = array_ops.placeholder(dtypes.float32)
+ dynamic_inputs_op = losses.mean_pairwise_squared_error(
+ predictions=predictions_placeholder,
+ labels=labels_placeholder,
+ weights=weights_placeholder)
+ with self.test_session():
+ with self.assertRaisesRegexp(errors_impl.OpError, expected_error_msg):
+ dynamic_inputs_op.eval(feed_dict={
+ predictions_placeholder: predictions,
+ labels_placeholder: labels,
+ weights_placeholder: weights,
+ })
+
+ def testInvalid3dWeighted2x0(self):
labels = np.array([
[[1, 9, 2], [12, 11, 10], [9, 8, 7]],
[[-5, -5, 7], [6, 5, 4], [3, 2, 1]],
@@ -977,11 +993,9 @@ class MeanPairwiseSquaredErrorTest(test.TestCase):
[[4, 8, 12], [1, 2, 3], [4, 5, 6]],
[[8, 1, 3], [7, 8, 9], [10, 11, 12]],
])
- self._test_mean_pairwise_squared_error(
- labels, predictions, expected_loss=253.24445,
- weights=np.asarray((1.2, 3.4)))
+ self._test_invalid_weights(
+ labels, predictions, weights=np.asarray((1.2, 3.4)))
- # TODO(ptucker): According to the pydoc, this should work.
def test3dWeighted2x3x3(self):
labels = np.array([
[[1, 9, 2], [12, 11, 10], [9, 8, 7]],
@@ -991,19 +1005,13 @@ class MeanPairwiseSquaredErrorTest(test.TestCase):
[[4, 8, 12], [1, 2, 3], [4, 5, 6]],
[[8, 1, 3], [7, 8, 9], [10, 11, 12]],
])
- with self.assertRaisesRegexp(
- ValueError, 'Dimensions must be equal, but are 2 and 3'):
- losses.mean_pairwise_squared_error(
- predictions=predictions, labels=labels,
- weights=np.ones((2, 3, 3)))
-
- def testZeroLossWithOneDimBatchZeroWeights(self):
- self._test_mean_pairwise_squared_error(
- self._labels, self._predictions, expected_loss=0.0,
- weights=np.zeros((2,)))
+ self._test_valid_weights(
+ # TODO(ptucker): This doesn't look right.
+ labels, predictions, expected_loss=9 * 122.22222,
+ weights=np.ones((2, 3, 3)))
def testLossWithAllZeroBatchSpecificWeights(self):
- self._test_mean_pairwise_squared_error(
+ self._test_valid_weights(
self._labels, self._predictions, expected_loss=0.0,
weights=np.zeros((2, 1)))
@@ -1071,7 +1079,7 @@ class CosineDistanceLossTest(test.TestCase):
predictions=constant_op.constant(self._predictions),
labels=constant_op.constant(self._labels),
dim=2,
- weights=constant_op.constant([1, 0, 0]))
+ weights=np.asarray((1, 0, 0)).reshape((3, 1, 1)))
with self.test_session():
self.assertEqual(1.0, loss.eval())
@@ -1081,21 +1089,10 @@ class CosineDistanceLossTest(test.TestCase):
labels=constant_op.constant(self._labels),
dim=2,
weights=constant_op.constant(
- [1, 0, 0, 1, 1, 1], shape=(3, 2)))
+ [1, 0, 0, 1, 1, 1], shape=(3, 2, 1)))
with self.test_session():
self.assertEqual(3.0 / 4.0, loss.eval())
- def testValueErrorThrownWithShapelessPlaceholder(self):
- tf_predictions = array_ops.placeholder(dtypes.float32)
- with self.test_session():
- with self.assertRaises(ValueError):
- losses.cosine_distance(
- predictions=tf_predictions,
- labels=constant_op.constant(self._labels),
- dim=2,
- weights=constant_op.constant(
- [1, 0, 0, 1, 1, 1], shape=(3, 2)))
-
def testMeasurementSpecificWeightsWithPlaceholderWithShape(self):
tf_predictions = array_ops.placeholder(
dtypes.float32, shape=self._labels.shape)
@@ -1104,7 +1101,7 @@ class CosineDistanceLossTest(test.TestCase):
labels=constant_op.constant(self._labels),
dim=2,
weights=constant_op.constant(
- [1, 0, 0, 1, 1, 1], shape=(3, 2)))
+ [1, 0, 0, 1, 1, 1], shape=(3, 2, 1)))
with self.test_session() as sess:
loss = sess.run(loss, feed_dict={tf_predictions: self._predictions})
self.assertEqual(3.0 / 4.0, loss)
@@ -1114,7 +1111,7 @@ class CosineDistanceLossTest(test.TestCase):
predictions=constant_op.constant(self._predictions),
labels=constant_op.constant(self._labels),
dim=2,
- weights=array_ops.zeros((3,)))
+ weights=array_ops.zeros((3, 1, 1)))
with self.test_session():
self.assertEqual(0, loss.eval())
@@ -1123,7 +1120,7 @@ class CosineDistanceLossTest(test.TestCase):
predictions=constant_op.constant(self._predictions),
labels=constant_op.constant(self._labels),
dim=2,
- weights=array_ops.zeros((3, 2)))
+ weights=array_ops.zeros((3, 2, 1)))
with self.test_session():
self.assertEqual(0, loss.eval())
@@ -1161,17 +1158,18 @@ class ComputeWeightedLossTest(test.TestCase):
with ops.Graph().as_default():
self.assertEqual(0, len(util.get_losses()))
raw_losses = self._raw_losses
- shape = self._shape
- unweighted_losses = (losses.compute_weighted_loss(raw_losses),
- losses.compute_weighted_loss(
- raw_losses, weights=1.0),
- losses.compute_weighted_loss(
- raw_losses, weights=np.ones(shape=shape[0:1])),
- losses.compute_weighted_loss(
- raw_losses, weights=np.ones(shape=shape[0:2])),
- losses.compute_weighted_loss(
- raw_losses, weights=np.ones(shape=shape)))
- self.assertEqual(5, len(util.get_losses()))
+ unweighted_losses = (
+ losses.compute_weighted_loss(raw_losses),
+ losses.compute_weighted_loss(raw_losses, weights=np.ones((1, 1, 1))),
+ losses.compute_weighted_loss(raw_losses, weights=np.ones((1, 1, 4))),
+ losses.compute_weighted_loss(raw_losses, weights=np.ones((1, 2, 1))),
+ losses.compute_weighted_loss(raw_losses, weights=np.ones((1, 2, 4))),
+ losses.compute_weighted_loss(raw_losses, weights=np.ones((3, 1, 1))),
+ losses.compute_weighted_loss(raw_losses, weights=np.ones((3, 1, 4))),
+ losses.compute_weighted_loss(raw_losses, weights=np.ones((3, 2, 1))),
+ losses.compute_weighted_loss(raw_losses, weights=np.ones(self._shape))
+ )
+ self.assertEqual(9, len(util.get_losses()))
with self.test_session():
for unweighted_loss in unweighted_losses:
self.assertAllClose(self._unweighted_loss, unweighted_loss.eval())
@@ -1187,215 +1185,114 @@ class ComputeWeightedLossTest(test.TestCase):
self.assertAllClose(
np.mean(weight * self._raw_losses), weighted_loss.eval())
- # TODO(b/33556118): Bug: `loss1` should be the same as `testUnweighted`, and
- # `loss17` should be the same as `testScalarWeight`.
- def testScalar1DWeight(self):
+ def _test_invalid_weights(self, weights):
with ops.Graph().as_default():
self.assertEqual(0, len(util.get_losses()))
- loss1 = losses.compute_weighted_loss(self._raw_losses, weights=(1.0,))
+ expected_error_msg = 'weights can not be broadcast to values'
+
+ # Static check.
+ with self.assertRaisesRegexp(ValueError, expected_error_msg):
+ losses.compute_weighted_loss(self._raw_losses, weights=weights)
+
+ # Dynamic check.
+ weights_placeholder = array_ops.placeholder(dtypes.float32)
+ weighted_loss = losses.compute_weighted_loss(
+ self._raw_losses, weights=weights_placeholder)
self.assertEqual(1, len(util.get_losses()))
- weight = 17.0
- loss17 = losses.compute_weighted_loss(self._raw_losses, weights=(weight,))
- self.assertEqual(2, len(util.get_losses()))
with self.test_session():
- self.assertAllClose(self._unweighted_loss * self._shape[0],
- loss1.eval())
- self.assertAllClose(
- np.mean(weight * self._raw_losses) * self._shape[0], loss17.eval())
+ with self.assertRaisesRegexp(errors_impl.OpError, expected_error_msg):
+ weighted_loss.eval(feed_dict={weights_placeholder: weights})
- def testInvalid1DWeight(self):
- with ops.Graph().as_default():
- with self.assertRaisesRegexp(ValueError, 'Dimensions must be equal'):
- losses.compute_weighted_loss(self._raw_losses, weights=(17.0, 31.0))
-
- def testInvalid4DWeight(self):
- with ops.Graph().as_default():
- with self.assertRaisesRegexp(ValueError, 'Invalid weights shape'):
- losses.compute_weighted_loss(
- self._raw_losses, weights=np.zeros(shape=(2, 2, 2, 2)))
+ def testInvalidWeightTooManyDims(self):
+ self._test_invalid_weights(np.zeros(shape=(2, 2, 2, 2)))
- def testInvalid4DWeight2(self):
+ def testInvalidWeightMismatchedDim(self):
with ops.Graph().as_default():
raw_losses = array_ops.reshape(self._raw_losses, shape=(3, 2, 4, 1))
weights = np.ones(shape=(3, 2, 4, 2))
- with self.assertRaisesRegexp(ValueError, 'Invalid weights shape'):
+ expected_error_msg = 'weights can not be broadcast to values'
+ self.assertEqual(0, len(util.get_losses()))
+
+ # Static check.
+ with self.assertRaisesRegexp(ValueError, expected_error_msg):
losses.compute_weighted_loss(raw_losses, weights=weights)
- def test3Weight(self):
- with ops.Graph().as_default():
- self.assertEqual(0, len(util.get_losses()))
- weights3 = (17.0, 5.0, 2.0)
+ # Dynamic check.
+ weights_placeholder = array_ops.placeholder(dtypes.float32)
weighted_loss = losses.compute_weighted_loss(
- self._raw_losses, weights=weights3)
+ raw_losses, weights=weights_placeholder)
self.assertEqual(1, len(util.get_losses()))
with self.test_session():
- weights3x1x1 = np.reshape(weights3, (3, 1, 1))
- self.assertAllClose(
- np.mean(weights3x1x1 * self._raw_losses), weighted_loss.eval())
+ with self.assertRaisesRegexp(errors_impl.OpError, expected_error_msg):
+ weighted_loss.eval(feed_dict={weights_placeholder: weights})
- def test3x1Weight(self):
- with ops.Graph().as_default():
- self.assertEqual(0, len(util.get_losses()))
- weights3x1 = (
- (17.0,),
- (5.0,),
- (2.0,),)
- weighted_loss = losses.compute_weighted_loss(
- self._raw_losses, weights=weights3x1)
- self.assertEqual(1, len(util.get_losses()))
- with self.test_session():
- weights3x1x1 = np.reshape(weights3x1, (3, 1, 1))
- self.assertAllClose(
- np.mean(weights3x1x1 * self._raw_losses), weighted_loss.eval())
+ def testInvalid3Weight(self):
+ self._test_invalid_weights((17.0, 5.0, 2.0))
- # TODO(ptucker): Bug: this should be the same as `test3x1Weight`.
- def test3x1x1Weight(self):
- with ops.Graph().as_default():
- self.assertEqual(0, len(util.get_losses()))
- weights3x1x1 = (
- ((17.0,),),
- ((5.0,),),
- ((2.0,),),)
- weighted_loss = losses.compute_weighted_loss(
- self._raw_losses, weights=weights3x1x1)
- self.assertEqual(1, len(util.get_losses()))
- with self.test_session():
- self.assertAllClose(
- np.mean(weights3x1x1 * self._raw_losses) * self._shape[1],
- weighted_loss.eval())
+ def testInvalid3x1Weight(self):
+ self._test_invalid_weights(((17.0,), (5.0,), (2.0,),))
- def test3x2Weight(self):
- with ops.Graph().as_default():
- self.assertEqual(0, len(util.get_losses()))
- weights3x2 = (
- (17.0, 3.0),
- (5.0, 31.0),
- (2.0, 7.0),)
- weighted_loss = losses.compute_weighted_loss(
- self._raw_losses, weights=weights3x2)
- self.assertEqual(1, len(util.get_losses()))
- with self.test_session():
- weights3x2x1 = np.reshape(weights3x2, (3, 2, 1))
- self.assertAllClose(
- np.mean(weights3x2x1 * self._raw_losses), weighted_loss.eval())
+ def testInvalid3x2Weight(self):
+ self._test_invalid_weights((
+ (17.0, 3.0),
+ (5.0, 31.0),
+ (2.0, 7.0),))
+
+ def testInvalid1x2Weight(self):
+ self._test_invalid_weights((17.0, 3.0,),)
- # TODO(b/33556118): Bug: this should be averaged across all dimensions, not
- # summed across dim 0.
- def test1x2Weight(self):
+ def testInvalidScalar1DWeight(self):
+ self._test_invalid_weights((17.0,),)
+
+ def _test_valid_weights(self, weights):
with ops.Graph().as_default():
self.assertEqual(0, len(util.get_losses()))
- weights1x2 = ((
- 17.0,
- 3.0,),)
weighted_loss = losses.compute_weighted_loss(
- self._raw_losses, weights=weights1x2)
+ self._raw_losses, weights=weights)
self.assertEqual(1, len(util.get_losses()))
with self.test_session():
- weights1x2x1 = np.reshape(weights1x2, (1, 2, 1))
self.assertAllClose(
- np.mean(weights1x2x1 * self._raw_losses) * self._shape[0],
+ np.mean(weights * self._raw_losses),
weighted_loss.eval())
- # TODO(b/33556118): Bug: this should be averaged across all dimensions, not
- # summed across dim 0.
+ def test1x1x1Weight(self):
+ self._test_valid_weights((((17.0,),),))
+
def test1x2x1Weight(self):
- with ops.Graph().as_default():
- self.assertEqual(0, len(util.get_losses()))
- weights1x2x1 = ((
- (17.0,),
- (3.0,),),)
- weighted_loss = losses.compute_weighted_loss(
- self._raw_losses, weights=weights1x2x1)
- self.assertEqual(1, len(util.get_losses()))
- with self.test_session():
- self.assertAllClose(
- np.mean(weights1x2x1 * self._raw_losses) * self._shape[0],
- weighted_loss.eval())
+ self._test_valid_weights((((17.0,), (3.0,),),))
- # TODO(b/33556118): Bug: this should be averaged across all dimensions, not
- # summed across dims 0 & 1.
def test1x1x4Weight(self):
- with ops.Graph().as_default():
- self.assertEqual(0, len(util.get_losses()))
- weights1x1x4 = (((17.0, 13.0, 2.0, 5.0),),)
- weighted_loss = losses.compute_weighted_loss(
- self._raw_losses, weights=weights1x1x4)
- self.assertEqual(1, len(util.get_losses()))
- shape = self._shape
- with self.test_session():
- self.assertAllClose(
- np.mean(weights1x1x4 * self._raw_losses) * shape[0] * shape[1],
- weighted_loss.eval())
+ self._test_valid_weights((((17.0, 13.0, 2.0, 5.0),),))
+
+ def test3x1x1Weight(self):
+ self._test_valid_weights((((17.0,),), ((5.0,),), ((2.0,),),))
def test3x2x1Weight(self):
- with ops.Graph().as_default():
- self.assertEqual(0, len(util.get_losses()))
- weights3x2x1 = (
- ((17.0,), (3.0,)),
- ((5.0,), (31.0,)),
- ((2.0,), (7.0,)),
- )
- weighted_loss = losses.compute_weighted_loss(
- self._raw_losses, weights=weights3x2x1)
- self.assertEqual(1, len(util.get_losses()))
- with self.test_session():
- self.assertAllClose(
- np.mean(weights3x2x1 * self._raw_losses),
- weighted_loss.eval())
+ self._test_valid_weights((
+ ((17.0,), (3.0,)),
+ ((5.0,), (31.0,)),
+ ((2.0,), (7.0,)),
+ ))
- # TODO(b/33556118): Bug: this should be averaged across all dimensions, not
- # summed across dim 1.
def test3x1x4Weight(self):
- with ops.Graph().as_default():
- self.assertEqual(0, len(util.get_losses()))
- weights3x1x4 = (
- ((17.0, 13.0, 2.0, 5.0),),
- ((5.0, 31.0, 17.0, 5.0),),
- ((7.0, 3.0, 11.0, 5.0),),
- )
- weighted_loss = losses.compute_weighted_loss(
- self._raw_losses, weights=weights3x1x4)
- self.assertEqual(1, len(util.get_losses()))
- with self.test_session():
- self.assertAllClose(
- np.mean(weights3x1x4 * self._raw_losses) * self._shape[1],
- weighted_loss.eval())
+ self._test_valid_weights((
+ ((17.0, 13.0, 2.0, 5.0),),
+ ((5.0, 31.0, 17.0, 5.0),),
+ ((7.0, 3.0, 11.0, 5.0),),
+ ))
- # TODO(b/33556118): Bug: this should be averaged across all dimensions, not
- # summed across dim 0.
def test1x2x4Weight(self):
- with ops.Graph().as_default():
- self.assertEqual(0, len(util.get_losses()))
- weights1x2x4 = ((
- (17.0, 13.0, 2.0, 5.0),
- (3.0, 13.0, 11.0, 2.0),),)
- weighted_loss = losses.compute_weighted_loss(
- self._raw_losses, weights=weights1x2x4)
- self.assertEqual(1, len(util.get_losses()))
- with self.test_session():
- self.assertAllClose(
- np.mean(weights1x2x4 * self._raw_losses) * self._shape[0],
- weighted_loss.eval())
+ self._test_valid_weights(((
+ (17.0, 13.0, 2.0, 5.0),
+ (3.0, 13.0, 11.0, 2.0),
+ ),))
def test3x2x4Weight(self):
- with ops.Graph().as_default():
- self.assertEqual(0, len(util.get_losses()))
- weights3x2x4 = (
- (
- (17.0, 13.0, 2.0, 5.0),
- (3.0, 13.0, 11.0, 2.0),),
- (
- (5.0, 31.0, 17.0, 5.0),
- (13.0, 3.0, 1.0, 11.0),),
- (
- (7.0, 3.0, 11.0, 5.0),
- (13.0, 11.0, 1.0, 7.0),),)
- weighted_loss = losses.compute_weighted_loss(
- self._raw_losses, weights=weights3x2x4)
- self.assertEqual(1, len(util.get_losses()))
- with self.test_session():
- self.assertAllClose(
- np.mean(weights3x2x4 * self._raw_losses), weighted_loss.eval())
+ self._test_valid_weights((
+ ((17.0, 13.0, 2.0, 5.0), (3.0, 13.0, 11.0, 2.0),),
+ ((5.0, 31.0, 17.0, 5.0), (13.0, 3.0, 1.0, 11.0),),
+ ((7.0, 3.0, 11.0, 5.0), (13.0, 11.0, 1.0, 7.0),),
+ ))
if __name__ == '__main__':
diff --git a/tensorflow/python/kernel_tests/metrics_test.py b/tensorflow/python/kernel_tests/metrics_test.py
index fc021c897a..4fbde86aec 100644
--- a/tensorflow/python/kernel_tests/metrics_test.py
+++ b/tensorflow/python/kernel_tests/metrics_test.py
@@ -31,6 +31,7 @@ from tensorflow.python.framework import ops
from tensorflow.python.framework import sparse_tensor
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import data_flow_ops
+from tensorflow.python.ops import math_ops
from tensorflow.python.ops import metrics
from tensorflow.python.ops import random_ops
from tensorflow.python.ops import variables
@@ -721,15 +722,18 @@ class PrecisionTest(test.TestCase):
self.assertAlmostEqual(1, sess.run(update_op))
self.assertAlmostEqual(1, precision.eval())
- def testSomeCorrect(self):
- predictions = constant_op.constant([1, 0, 1, 0], shape=(1, 4))
- labels = constant_op.constant([0, 1, 1, 0], shape=(1, 4))
- precision, update_op = metrics.precision(labels, predictions)
+ def testSomeCorrect_multipleInputDtypes(self):
+ for dtype in (dtypes_lib.bool, dtypes_lib.int32, dtypes_lib.float32):
+ predictions = math_ops.cast(
+ constant_op.constant([1, 0, 1, 0], shape=(1, 4)), dtype=dtype)
+ labels = math_ops.cast(
+ constant_op.constant([0, 1, 1, 0], shape=(1, 4)), dtype=dtype)
+ precision, update_op = metrics.precision(labels, predictions)
- with self.test_session() as sess:
- sess.run(variables.local_variables_initializer())
- self.assertAlmostEqual(0.5, update_op.eval())
- self.assertAlmostEqual(0.5, precision.eval())
+ with self.test_session() as sess:
+ sess.run(variables.local_variables_initializer())
+ self.assertAlmostEqual(0.5, update_op.eval())
+ self.assertAlmostEqual(0.5, precision.eval())
def testWeighted1d(self):
predictions = constant_op.constant([[1, 0, 1, 0], [1, 0, 1, 0]])
@@ -885,15 +889,18 @@ class RecallTest(test.TestCase):
sess.run(update_op)
self.assertEqual(1, recall.eval())
- def testSomeCorrect(self):
- predictions = constant_op.constant([1, 0, 1, 0], shape=(1, 4))
- labels = constant_op.constant([0, 1, 1, 0], shape=(1, 4))
- recall, update_op = metrics.recall(labels, predictions)
+ def testSomeCorrect_multipleInputDtypes(self):
+ for dtype in (dtypes_lib.bool, dtypes_lib.int32, dtypes_lib.float32):
+ predictions = math_ops.cast(
+ constant_op.constant([1, 0, 1, 0], shape=(1, 4)), dtype=dtype)
+ labels = math_ops.cast(
+ constant_op.constant([0, 1, 1, 0], shape=(1, 4)), dtype=dtype)
+ recall, update_op = metrics.recall(labels, predictions)
- with self.test_session() as sess:
- sess.run(variables.local_variables_initializer())
- self.assertAlmostEqual(0.5, update_op.eval())
- self.assertAlmostEqual(0.5, recall.eval())
+ with self.test_session() as sess:
+ sess.run(variables.local_variables_initializer())
+ self.assertAlmostEqual(0.5, update_op.eval())
+ self.assertAlmostEqual(0.5, recall.eval())
def testWeighted1d(self):
predictions = constant_op.constant([[1, 0, 1, 0], [0, 1, 0, 1]])
@@ -1008,17 +1015,20 @@ class AUCTest(test.TestCase):
self.assertEqual(1, auc.eval())
- def testSomeCorrect(self):
+ def testSomeCorrect_multipleLabelDtypes(self):
with self.test_session() as sess:
- predictions = constant_op.constant(
- [1, 0, 1, 0], shape=(1, 4), dtype=dtypes_lib.float32)
- labels = constant_op.constant([0, 1, 1, 0], shape=(1, 4))
- auc, update_op = metrics.auc(labels, predictions)
+ for label_dtype in (
+ dtypes_lib.bool, dtypes_lib.int32, dtypes_lib.float32):
+ predictions = constant_op.constant(
+ [1, 0, 1, 0], shape=(1, 4), dtype=dtypes_lib.float32)
+ labels = math_ops.cast(
+ constant_op.constant([0, 1, 1, 0], shape=(1, 4)), dtype=label_dtype)
+ auc, update_op = metrics.auc(labels, predictions)
- sess.run(variables.local_variables_initializer())
- self.assertAlmostEqual(0.5, sess.run(update_op))
+ sess.run(variables.local_variables_initializer())
+ self.assertAlmostEqual(0.5, sess.run(update_op))
- self.assertAlmostEqual(0.5, auc.eval())
+ self.assertAlmostEqual(0.5, auc.eval())
def testWeighted1d(self):
with self.test_session() as sess:
@@ -1297,23 +1307,24 @@ class SpecificityAtSensitivityTest(test.TestCase):
self.assertAlmostEqual(0.6, sess.run(update_op))
self.assertAlmostEqual(0.6, specificity.eval())
- def testWeighted1d(self):
- predictions_values = [0.1, 0.2, 0.4, 0.3, 0.0, 0.1, 0.2, 0.2, 0.26, 0.26]
- labels_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
- weights_values = [3]
+ def testWeighted1d_multipleLabelDtypes(self):
+ for label_dtype in (dtypes_lib.bool, dtypes_lib.int32, dtypes_lib.float32):
+ predictions_values = [0.1, 0.2, 0.4, 0.3, 0.0, 0.1, 0.2, 0.2, 0.26, 0.26]
+ labels_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
+ weights_values = [3]
- predictions = constant_op.constant(
- predictions_values, dtype=dtypes_lib.float32)
- labels = constant_op.constant(labels_values)
- weights = constant_op.constant(weights_values)
- specificity, update_op = metrics.specificity_at_sensitivity(
- labels, predictions, weights=weights, sensitivity=0.4)
+ predictions = constant_op.constant(
+ predictions_values, dtype=dtypes_lib.float32)
+ labels = math_ops.cast(labels_values, dtype=label_dtype)
+ weights = constant_op.constant(weights_values)
+ specificity, update_op = metrics.specificity_at_sensitivity(
+ labels, predictions, weights=weights, sensitivity=0.4)
- with self.test_session() as sess:
- sess.run(variables.local_variables_initializer())
+ with self.test_session() as sess:
+ sess.run(variables.local_variables_initializer())
- self.assertAlmostEqual(0.6, sess.run(update_op))
- self.assertAlmostEqual(0.6, specificity.eval())
+ self.assertAlmostEqual(0.6, sess.run(update_op))
+ self.assertAlmostEqual(0.6, specificity.eval())
def testWeighted2d(self):
predictions_values = [0.1, 0.2, 0.4, 0.3, 0.0, 0.1, 0.2, 0.2, 0.26, 0.26]
@@ -1432,22 +1443,24 @@ class SensitivityAtSpecificityTest(test.TestCase):
self.assertAlmostEqual(0.6, sess.run(update_op))
self.assertAlmostEqual(0.6, specificity.eval())
- def testWeighted(self):
- predictions_values = [0.0, 0.1, 0.2, 0.3, 0.4, 0.01, 0.02, 0.25, 0.26, 0.26]
- labels_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
- weights_values = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+ def testWeighted_multipleLabelDtypes(self):
+ for label_dtype in (dtypes_lib.bool, dtypes_lib.int32, dtypes_lib.float32):
+ predictions_values = [
+ 0.0, 0.1, 0.2, 0.3, 0.4, 0.01, 0.02, 0.25, 0.26, 0.26]
+ labels_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
+ weights_values = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
- predictions = constant_op.constant(
- predictions_values, dtype=dtypes_lib.float32)
- labels = constant_op.constant(labels_values)
- weights = constant_op.constant(weights_values)
- specificity, update_op = metrics.sensitivity_at_specificity(
- labels, predictions, weights=weights, specificity=0.4)
+ predictions = constant_op.constant(
+ predictions_values, dtype=dtypes_lib.float32)
+ labels = math_ops.cast(labels_values, dtype=label_dtype)
+ weights = constant_op.constant(weights_values)
+ specificity, update_op = metrics.sensitivity_at_specificity(
+ labels, predictions, weights=weights, specificity=0.4)
- with self.test_session() as sess:
- sess.run(variables.local_variables_initializer())
- self.assertAlmostEqual(0.675, sess.run(update_op))
- self.assertAlmostEqual(0.675, specificity.eval())
+ with self.test_session() as sess:
+ sess.run(variables.local_variables_initializer())
+ self.assertAlmostEqual(0.675, sess.run(update_op))
+ self.assertAlmostEqual(0.675, specificity.eval())
# TODO(nsilberman): Break this up into two sets of tests.
@@ -1536,22 +1549,25 @@ class PrecisionRecallThresholdsTest(test.TestCase):
self.assertEqual(1, prec.eval())
self.assertEqual(1, rec.eval())
- def testSomeCorrect(self):
+ def testSomeCorrect_multipleLabelDtypes(self):
with self.test_session() as sess:
- predictions = constant_op.constant(
- [1, 0, 1, 0], shape=(1, 4), dtype=dtypes_lib.float32)
- labels = constant_op.constant([0, 1, 1, 0], shape=(1, 4))
- thresholds = [0.5]
- prec, prec_op = metrics.precision_at_thresholds(labels, predictions,
- thresholds)
- rec, rec_op = metrics.recall_at_thresholds(labels, predictions,
- thresholds)
+ for label_dtype in (
+ dtypes_lib.bool, dtypes_lib.int32, dtypes_lib.float32):
+ predictions = constant_op.constant(
+ [1, 0, 1, 0], shape=(1, 4), dtype=dtypes_lib.float32)
+ labels = math_ops.cast(
+ constant_op.constant([0, 1, 1, 0], shape=(1, 4)), dtype=label_dtype)
+ thresholds = [0.5]
+ prec, prec_op = metrics.precision_at_thresholds(labels, predictions,
+ thresholds)
+ rec, rec_op = metrics.recall_at_thresholds(labels, predictions,
+ thresholds)
- sess.run(variables.local_variables_initializer())
- sess.run([prec_op, rec_op])
+ sess.run(variables.local_variables_initializer())
+ sess.run([prec_op, rec_op])
- self.assertAlmostEqual(0.5, prec.eval())
- self.assertAlmostEqual(0.5, rec.eval())
+ self.assertAlmostEqual(0.5, prec.eval())
+ self.assertAlmostEqual(0.5, rec.eval())
def testAllIncorrect(self):
inputs = np.random.randint(0, 2, size=(100, 1))
diff --git a/tensorflow/python/kernel_tests/record_input_test.py b/tensorflow/python/kernel_tests/record_input_test.py
new file mode 100644
index 0000000000..9b5de4fcdb
--- /dev/null
+++ b/tensorflow/python/kernel_tests/record_input_test.py
@@ -0,0 +1,80 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for record_input_op."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+from tensorflow.python.lib.io import tf_record
+from tensorflow.python.ops import data_flow_ops
+from tensorflow.python.platform import test
+
+
+class RecordInputOpTest(test.TestCase):
+
+ def generateTestData(self, prefix, n, m):
+ for i in range(n):
+ f = os.path.join(self.get_temp_dir(), prefix + "." + str(i))
+ w = tf_record.TFRecordWriter(f)
+
+ for j in range(m):
+ w.write("{0:0{width}}".format(i * m + j, width=10).encode("utf-8"))
+
+ w.close()
+
+ def testRecordInputSimple(self):
+ with self.test_session() as sess:
+ self.generateTestData("basic", 1, 1)
+
+ yield_op = data_flow_ops.RecordInput(
+ file_pattern=os.path.join(self.get_temp_dir(), "basic.*"),
+ parallelism=1,
+ buffer_size=1,
+ batch_size=1,
+ name="record_input").get_yield_op()
+
+ self.assertEqual(sess.run(yield_op), b"0000000000")
+
+ def testRecordInputEpochs(self):
+ files = 100
+ records_per_file = 100
+ with self.test_session() as sess:
+ self.generateTestData("basic", files, records_per_file)
+
+ records = data_flow_ops.RecordInput(
+ file_pattern=os.path.join(self.get_temp_dir(), "basic.*"),
+ parallelism=2,
+ buffer_size=2000,
+ batch_size=1,
+ shift_ratio=0.33,
+ seed=10,
+ name="record_input")
+
+ yield_op = records.get_yield_op()
+
+ # cycle over 3 epochs and make sure we never duplicate
+ for _ in range(3):
+ epoch_set = set()
+ for _ in range(files * records_per_file):
+ r = sess.run(yield_op)
+ self.assertTrue(r[0] not in epoch_set)
+ epoch_set.add(r[0])
+
+
+if __name__ == "__main__":
+ test.main()
diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py
index 74a6052ff6..853b08b2a5 100644
--- a/tensorflow/python/layers/base.py
+++ b/tensorflow/python/layers/base.py
@@ -121,6 +121,14 @@ class _Layer(object):
return self._non_trainable_variables if self.trainable else self.variables
@property
+ def trainable_weights(self):
+ return self.trainable_variables
+
+ @property
+ def non_trainable_weights(self):
+ return self.non_trainable_variables
+
+ @property
def variables(self):
"""Returns the list of all layer variables/weights.
diff --git a/tensorflow/python/layers/convolutional.py b/tensorflow/python/layers/convolutional.py
index a476b0f72a..3b96d4362f 100644
--- a/tensorflow/python/layers/convolutional.py
+++ b/tensorflow/python/layers/convolutional.py
@@ -268,7 +268,7 @@ def conv1d(inputs,
activity_regularizer=None,
trainable=True,
name=None,
- reuse=False):
+ reuse=None):
"""Functional interface for 1D convolution layer (e.g. temporal convolution).
This layer creates a convolution kernel that is convolved
@@ -435,7 +435,7 @@ def conv2d(inputs,
activity_regularizer=None,
trainable=True,
name=None,
- reuse=False):
+ reuse=None):
"""Functional interface for the 2D convolution layer.
This layer creates a convolution kernel that is convolved
@@ -608,7 +608,7 @@ def conv3d(inputs,
activity_regularizer=None,
trainable=True,
name=None,
- reuse=False):
+ reuse=None):
"""Functional interface for the 3D convolution layer.
This layer creates a convolution kernel that is convolved
@@ -867,7 +867,7 @@ def separable_conv2d(inputs,
activity_regularizer=None,
trainable=True,
name=None,
- reuse=False):
+ reuse=None):
"""Functional interface for the depthwise separable 2D convolution layer.
This layer performs a depthwise convolution that acts separately on
@@ -1128,7 +1128,7 @@ def conv2d_transpose(inputs,
activity_regularizer=None,
trainable=True,
name=None,
- reuse=False):
+ reuse=None):
"""Transposed convolution layer (sometimes called Deconvolution).
The need for transposed convolutions generally arises
diff --git a/tensorflow/python/layers/convolutional_test.py b/tensorflow/python/layers/convolutional_test.py
index c47e92c582..1a5fe5c9b7 100644
--- a/tensorflow/python/layers/convolutional_test.py
+++ b/tensorflow/python/layers/convolutional_test.py
@@ -18,11 +18,16 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
+import numpy as np
+
from tensorflow.python.framework import ops
from tensorflow.python.layers import convolutional as conv_layers
+from tensorflow.python.ops import init_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import random_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
from tensorflow.python.platform import test
@@ -191,21 +196,45 @@ class ConvTest(test.TestCase):
height, width = 7, 9
images = random_ops.random_uniform((5, height, width, 3), seed=1)
conv_layers.conv2d(images, 32, [3, 3], name='conv1')
- self.assertEqual(
- len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 2)
+ self.assertEqual(len(variables.trainable_variables()), 2)
conv_layers.conv2d(images, 32, [3, 3], name='conv1', reuse=True)
- self.assertEqual(
- len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 2)
+ self.assertEqual(len(variables.trainable_variables()), 2)
+
+ def testFunctionalConv2DReuseFromScope(self):
+ with variable_scope.variable_scope('scope'):
+ height, width = 7, 9
+ images = random_ops.random_uniform((5, height, width, 3), seed=1)
+ conv_layers.conv2d(images, 32, [3, 3], name='conv1')
+ self.assertEqual(len(variables.trainable_variables()), 2)
+ with variable_scope.variable_scope('scope', reuse=True):
+ conv_layers.conv2d(images, 32, [3, 3], name='conv1')
+ self.assertEqual(len(variables.trainable_variables()), 2)
+
+ def testFunctionalConv2DInitializerFromScope(self):
+ with self.test_session() as sess:
+ with variable_scope.variable_scope(
+ 'scope', initializer=init_ops.ones_initializer()):
+ height, width = 7, 9
+ images = random_ops.random_uniform((5, height, width, 3), seed=1)
+ conv_layers.conv2d(images, 32, [3, 3], name='conv1')
+ weights = variables.trainable_variables()
+ # Check the names of weights in order.
+ self.assertTrue('kernel' in weights[0].name)
+ self.assertTrue('bias' in weights[1].name)
+ sess.run(variables.global_variables_initializer())
+ weights = sess.run(weights)
+ # Check that the kernel weights got initialized to ones (from scope)
+ self.assertAllClose(weights[0], np.ones((3, 3, 3, 32)))
+ # Check that the bias still got initialized to zeros.
+ self.assertAllClose(weights[1], np.zeros((32)))
def testFunctionalConv2DNoReuse(self):
height, width = 7, 9
images = random_ops.random_uniform((5, height, width, 3), seed=1)
conv_layers.conv2d(images, 32, [3, 3])
- self.assertEqual(
- len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 2)
+ self.assertEqual(len(variables.trainable_variables()), 2)
conv_layers.conv2d(images, 32, [3, 3])
- self.assertEqual(
- len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 4)
+ self.assertEqual(len(variables.trainable_variables()), 4)
class SeparableConv2DTest(test.TestCase):
@@ -323,22 +352,48 @@ class SeparableConv2DTest(test.TestCase):
height, width = 7, 9
images = random_ops.random_uniform((5, height, width, 3), seed=1)
conv_layers.separable_conv2d(images, 32, [3, 3], name='sepconv1')
- self.assertEqual(
- len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 3)
+ self.assertEqual(len(variables.trainable_variables()), 3)
conv_layers.separable_conv2d(
images, 32, [3, 3], name='sepconv1', reuse=True)
- self.assertEqual(
- len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 3)
+ self.assertEqual(len(variables.trainable_variables()), 3)
+
+ def testFunctionalConv2DReuseFromScope(self):
+ with variable_scope.variable_scope('scope'):
+ height, width = 7, 9
+ images = random_ops.random_uniform((5, height, width, 3), seed=1)
+ conv_layers.separable_conv2d(images, 32, [3, 3], name='sepconv1')
+ self.assertEqual(len(variables.trainable_variables()), 3)
+ with variable_scope.variable_scope('scope', reuse=True):
+ conv_layers.separable_conv2d(images, 32, [3, 3], name='sepconv1')
+ self.assertEqual(len(variables.trainable_variables()), 3)
+
+ def testFunctionalConv2DInitializerFromScope(self):
+ with self.test_session() as sess:
+ with variable_scope.variable_scope(
+ 'scope', initializer=init_ops.ones_initializer()):
+ height, width = 7, 9
+ images = random_ops.random_uniform((5, height, width, 3), seed=1)
+ conv_layers.separable_conv2d(images, 32, [3, 3], name='sepconv1')
+ weights = variables.trainable_variables()
+ # Check the names of weights in order.
+ self.assertTrue('depthwise_kernel' in weights[0].name)
+ self.assertTrue('pointwise_kernel' in weights[1].name)
+ self.assertTrue('bias' in weights[2].name)
+ sess.run(variables.global_variables_initializer())
+ weights = sess.run(weights)
+ # Check that the kernel weights got initialized to ones (from scope)
+ self.assertAllClose(weights[0], np.ones((3, 3, 3, 1)))
+ self.assertAllClose(weights[1], np.ones((1, 1, 3, 32)))
+ # Check that the bias still got initialized to zeros.
+ self.assertAllClose(weights[2], np.zeros((32)))
def testFunctionalConv2DNoReuse(self):
height, width = 7, 9
images = random_ops.random_uniform((5, height, width, 3), seed=1)
conv_layers.separable_conv2d(images, 32, [3, 3])
- self.assertEqual(
- len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 3)
+ self.assertEqual(len(variables.trainable_variables()), 3)
conv_layers.separable_conv2d(images, 32, [3, 3])
- self.assertEqual(
- len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 6)
+ self.assertEqual(len(variables.trainable_variables()), 6)
def testSeparableConv2DDepthwiseRegularizer(self):
height, width = 7, 9
@@ -511,21 +566,45 @@ class Conv2DTransposeTest(test.TestCase):
height, width = 7, 9
images = random_ops.random_uniform((5, height, width, 3), seed=1)
conv_layers.conv2d_transpose(images, 32, [3, 3], name='deconv1')
- self.assertEqual(
- len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 2)
+ self.assertEqual(len(variables.trainable_variables()), 2)
conv_layers.conv2d_transpose(images, 32, [3, 3], name='deconv1', reuse=True)
- self.assertEqual(
- len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 2)
+ self.assertEqual(len(variables.trainable_variables()), 2)
+
+ def testFunctionalConv2DTransposeReuseFromScope(self):
+ with variable_scope.variable_scope('scope'):
+ height, width = 7, 9
+ images = random_ops.random_uniform((5, height, width, 3), seed=1)
+ conv_layers.conv2d_transpose(images, 32, [3, 3], name='deconv1')
+ self.assertEqual(len(variables.trainable_variables()), 2)
+ with variable_scope.variable_scope('scope', reuse=True):
+ conv_layers.conv2d_transpose(images, 32, [3, 3], name='deconv1')
+ self.assertEqual(len(variables.trainable_variables()), 2)
+
+ def testFunctionalConv2DTransposeInitializerFromScope(self):
+ with self.test_session() as sess:
+ with variable_scope.variable_scope(
+ 'scope', initializer=init_ops.ones_initializer()):
+ height, width = 7, 9
+ images = random_ops.random_uniform((5, height, width, 3), seed=1)
+ conv_layers.conv2d_transpose(images, 32, [3, 3], name='deconv1')
+ weights = variables.trainable_variables()
+ # Check the names of weights in order.
+ self.assertTrue('kernel' in weights[0].name)
+ self.assertTrue('bias' in weights[1].name)
+ sess.run(variables.global_variables_initializer())
+ weights = sess.run(weights)
+ # Check that the kernel weights got initialized to ones (from scope)
+ self.assertAllClose(weights[0], np.ones((3, 3, 32, 3)))
+ # Check that the bias still got initialized to zeros.
+ self.assertAllClose(weights[1], np.zeros((32)))
def testFunctionalConv2DTransposeNoReuse(self):
height, width = 7, 9
images = random_ops.random_uniform((5, height, width, 3), seed=1)
conv_layers.conv2d_transpose(images, 32, [3, 3])
- self.assertEqual(
- len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 2)
+ self.assertEqual(len(variables.trainable_variables()), 2)
conv_layers.conv2d_transpose(images, 32, [3, 3])
- self.assertEqual(
- len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 4)
+ self.assertEqual(len(variables.trainable_variables()), 4)
if __name__ == '__main__':
diff --git a/tensorflow/python/layers/core.py b/tensorflow/python/layers/core.py
index c662478ccc..92894e1447 100644
--- a/tensorflow/python/layers/core.py
+++ b/tensorflow/python/layers/core.py
@@ -41,10 +41,12 @@ from tensorflow.python.layers import utils
class Dense(base._Layer): # pylint: disable=protected-access
"""Densely-connected layer class.
- This layer implements the operation `outputs = activation(inputs.w + b)`
+ This layer implements the operation:
+ `outputs = activation(inputs.kernel + bias)`
Where `activation` is the activation function passed as the `activation`
- argument (if not `None`), `w` is a weights matrix created by the layer,
- and `b` is a bias vector created by the layer (only if `use_bias` is `True`).
+ argument (if not `None`), `kernel` is a weights matrix created by the layer,
+ and `bias` is a bias vector created by the layer
+ (only if `use_bias` is `True`).
Note: if the input to the layer has a rank greater than 2, then it is
flattened prior to the initial matrix multiply by `w`.
@@ -54,9 +56,9 @@ class Dense(base._Layer): # pylint: disable=protected-access
activation: Activation function (callable). Set it to None to maintain a
linear activation.
use_bias: Boolean, whether the layer uses a bias.
- weights_initializer: Initializer function for the weight matrix.
+ kernel_initializer: Initializer function for the weight matrix.
bias_initializer: Initializer function for the bias.
- weights_regularizer: Regularizer function for the weight matrix.
+ kernel_regularizer: Regularizer function for the weight matrix.
bias_regularizer: Regularizer function for the bias.
activity_regularizer: Regularizer function for the output.
trainable: Boolean, if `True` also add variables to the graph collection
@@ -70,21 +72,21 @@ class Dense(base._Layer): # pylint: disable=protected-access
units: Python integer, dimensionality of the output space.
activation: Activation function (callable).
use_bias: Boolean, whether the layer uses a bias.
- weights_initializer: Initializer instance (or name) for the weight matrix.
+ kernel_initializer: Initializer instance (or name) for the weight matrix.
bias_initializer: Initializer instance (or name) for the bias.
- weights_regularizer: Regularizer instance for the weight matrix (callable)
+ kernel_regularizer: Regularizer instance for the weight matrix (callable)
bias_regularizer: Regularizer instance for the bias (callable).
activity_regularizer: Regularizer instance for the output (callable)
- weights: Weight matrix (TensorFlow variable or tensor).
+ kernel: Weight matrix (TensorFlow variable or tensor).
bias: Bias vector, if applicable (TensorFlow variable or tensor).
"""
def __init__(self, units,
activation=None,
use_bias=True,
- weights_initializer=None,
+ kernel_initializer=None,
bias_initializer=init_ops.zeros_initializer(),
- weights_regularizer=None,
+ kernel_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
trainable=True,
@@ -94,9 +96,9 @@ class Dense(base._Layer): # pylint: disable=protected-access
self.units = units
self.activation = activation
self.use_bias = use_bias
- self.weights_initializer = weights_initializer
+ self.kernel_initializer = kernel_initializer
self.bias_initializer = bias_initializer
- self.weights_regularizer = weights_regularizer
+ self.kernel_regularizer = kernel_regularizer
self.bias_regularizer = bias_regularizer
self.activity_regularizer = activity_regularizer
@@ -113,12 +115,12 @@ class Dense(base._Layer): # pylint: disable=protected-access
# weight of the layer. If the layer is not trainable
# (self.trainable = False), the variable will not be added to
# tf.trainable_variables(), and self.trainable_weights will be empty.
- self.w = vs.get_variable('weights',
- shape=[input_shape[-1].value, self.units],
- initializer=self.weights_initializer,
- regularizer=self.weights_regularizer,
- dtype=self.dtype,
- trainable=True)
+ self.kernel = vs.get_variable('kernel',
+ shape=[input_shape[-1].value, self.units],
+ initializer=self.kernel_initializer,
+ regularizer=self.kernel_regularizer,
+ dtype=self.dtype,
+ trainable=True)
if self.use_bias:
self.bias = vs.get_variable('bias',
shape=[self.units,],
@@ -140,7 +142,7 @@ class Dense(base._Layer): # pylint: disable=protected-access
output_shape_tensor = array_ops.stack(output_shape_tensors)
inputs = array_ops.reshape(inputs, [-1, input_dim])
- outputs = standard_ops.matmul(inputs, self.w)
+ outputs = standard_ops.matmul(inputs, self.kernel)
if self.use_bias:
outputs = nn.bias_add(outputs, self.bias)
@@ -158,20 +160,22 @@ def dense(
inputs, units,
activation=None,
use_bias=True,
- weights_initializer=None,
+ kernel_initializer=None,
bias_initializer=init_ops.zeros_initializer(),
- weights_regularizer=None,
+ kernel_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
trainable=True,
name=None,
- reuse=False):
+ reuse=None):
"""Functional interface for the densely-connected layer.
- This layer implements the operation `outputs = activation(inputs.w + b)`
+ This layer implements the operation:
+ `outputs = activation(inputs.kernel + bias)`
Where `activation` is the activation function passed as the `activation`
- argument (if not `None`), `w` is a weights matrix created by the layer,
- and `b` is a bias vector created by the layer (only if `use_bias` is `True`).
+ argument (if not `None`), `kernel` is a weights matrix created by the layer,
+ and `bias` is a bias vector created by the layer
+ (only if `use_bias` is `True`).
Note: if the `inputs` tensor has a rank greater than 2, then it is
flattened prior to the initial matrix multiply by `w`.
@@ -182,9 +186,9 @@ def dense(
activation: Activation function (callable). Set it to None to maintain a
linear activation.
use_bias: Boolean, whether the layer uses a bias.
- weights_initializer: Initializer function for the weight matrix.
+ kernel_initializer: Initializer function for the weight matrix.
bias_initializer: Initializer function for the bias.
- weights_regularizer: Regularizer function for the weight matrix.
+ kernel_regularizer: Regularizer function for the weight matrix.
bias_regularizer: Regularizer function for the bias.
activity_regularizer: Regularizer function for the output.
trainable: Boolean, if `True` also add variables to the graph collection
@@ -199,9 +203,9 @@ def dense(
layer = Dense(units,
activation=activation,
use_bias=use_bias,
- weights_initializer=weights_initializer,
+ kernel_initializer=kernel_initializer,
bias_initializer=bias_initializer,
- weights_regularizer=weights_regularizer,
+ kernel_regularizer=kernel_regularizer,
bias_regularizer=bias_regularizer,
activity_regularizer=activity_regularizer,
trainable=trainable,
diff --git a/tensorflow/python/layers/core_test.py b/tensorflow/python/layers/core_test.py
index c1fbe957df..cfcee7b788 100644
--- a/tensorflow/python/layers/core_test.py
+++ b/tensorflow/python/layers/core_test.py
@@ -39,7 +39,7 @@ class DenseTest(test.TestCase):
dense = core_layers.Dense(2, activation=nn_ops.relu, name='my_dense')
self.assertEqual(dense.units, 2)
self.assertEqual(dense.activation, nn_ops.relu)
- self.assertEqual(dense.weights_regularizer, None)
+ self.assertEqual(dense.kernel_regularizer, None)
self.assertEqual(dense.bias_regularizer, None)
self.assertEqual(dense.activity_regularizer, None)
self.assertEqual(dense.use_bias, True)
@@ -55,36 +55,37 @@ class DenseTest(test.TestCase):
dense = core_layers.Dense(2, activation=nn_ops.relu, name='my_dense')
inputs = random_ops.random_uniform((5, 2), seed=1)
_ = dense(inputs)
- self.assertListEqual(dense.variables, [dense.w, dense.bias])
- self.assertListEqual(dense.trainable_variables, [dense.w, dense.bias])
+ self.assertListEqual(dense.variables, [dense.kernel, dense.bias])
+ self.assertListEqual(dense.trainable_variables, [dense.kernel, dense.bias])
self.assertListEqual(dense.non_trainable_variables, [])
- self.assertListEqual(dense._trainable_variables, [dense.w, dense.bias])
+ self.assertListEqual(dense._trainable_variables, [dense.kernel, dense.bias])
self.assertListEqual(dense._non_trainable_variables, [])
self.assertEqual(
len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 2)
- self.assertEqual(dense.w.name, 'my_dense/weights:0')
+ self.assertEqual(dense.kernel.name, 'my_dense/kernel:0')
self.assertEqual(dense.bias.name, 'my_dense/bias:0')
def testNoBias(self):
dense = core_layers.Dense(2, use_bias=False, name='my_dense')
inputs = random_ops.random_uniform((5, 2), seed=1)
_ = dense(inputs)
- self.assertListEqual(dense.variables, [dense.w])
- self.assertListEqual(dense.trainable_variables, [dense.w])
+ self.assertListEqual(dense.variables, [dense.kernel])
+ self.assertListEqual(dense.trainable_variables, [dense.kernel])
self.assertListEqual(dense.non_trainable_variables, [])
self.assertEqual(
len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 1)
- self.assertEqual(dense.w.name, 'my_dense/weights:0')
+ self.assertEqual(dense.kernel.name, 'my_dense/kernel:0')
self.assertEqual(dense.bias, None)
def testNonTrainable(self):
dense = core_layers.Dense(2, trainable=False, name='my_dense')
inputs = random_ops.random_uniform((5, 2), seed=1)
_ = dense(inputs)
- self.assertListEqual(dense.variables, [dense.w, dense.bias])
- self.assertListEqual(dense.non_trainable_variables, [dense.w, dense.bias])
+ self.assertListEqual(dense.variables, [dense.kernel, dense.bias])
+ self.assertListEqual(dense.non_trainable_variables,
+ [dense.kernel, dense.bias])
self.assertListEqual(dense.trainable_variables, [])
- self.assertListEqual(dense._trainable_variables, [dense.w, dense.bias])
+ self.assertListEqual(dense._trainable_variables, [dense.kernel, dense.bias])
self.assertListEqual(dense._non_trainable_variables, [])
self.assertEqual(
len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 0)
@@ -149,25 +150,25 @@ class DenseTest(test.TestCase):
self.assertEqual(len(loss_keys), 1)
self.assertListEqual(dense.losses, loss_keys)
- def testWeightsRegularizer(self):
+ def testKernelRegularizer(self):
regularizer = lambda x: math_ops.reduce_sum(x) * 1e-3
dense = core_layers.Dense(
- 2, name='my_dense', weights_regularizer=regularizer)
+ 2, name='my_dense', kernel_regularizer=regularizer)
inputs = random_ops.random_uniform((5, 3), seed=1)
_ = dense(inputs)
loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
self.assertEqual(len(loss_keys), 1)
self.assertListEqual(dense.losses, loss_keys)
- def testWeightsRegularizerWithReuse(self):
+ def testKernelRegularizerWithReuse(self):
regularizer = lambda x: math_ops.reduce_sum(x) * 1e-3
inputs = random_ops.random_uniform((5, 3), seed=1)
_ = core_layers.dense(
- inputs, 2, name='my_dense', weights_regularizer=regularizer)
+ inputs, 2, name='my_dense', kernel_regularizer=regularizer)
self.assertEqual(
len(ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)), 1)
_ = core_layers.dense(
- inputs, 2, name='my_dense', weights_regularizer=regularizer, reuse=True)
+ inputs, 2, name='my_dense', kernel_regularizer=regularizer, reuse=True)
self.assertEqual(
len(ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)), 1)
@@ -206,6 +207,16 @@ class DenseTest(test.TestCase):
vars2 = variables.trainable_variables()
self.assertEqual(vars1, vars2)
+ def testFunctionalDenseTwiceReuseFromScope(self):
+ with variable_scope.variable_scope('scope'):
+ inputs = random_ops.random_uniform((5, 3), seed=1)
+ core_layers.dense(inputs, 2, name='my_dense')
+ vars1 = variables.trainable_variables()
+ with variable_scope.variable_scope('scope', reuse=True):
+ core_layers.dense(inputs, 2, name='my_dense')
+ vars2 = variables.trainable_variables()
+ self.assertEqual(vars1, vars2)
+
def testFunctionalDenseInitializerFromScope(self):
with self.test_session() as sess:
with variable_scope.variable_scope(
@@ -237,17 +248,17 @@ class DenseTest(test.TestCase):
inputs = random_ops.random_uniform((5, 3), seed=1)
core_layers.dense(inputs, 2, name='my_dense')
var = variables.trainable_variables()[0]
- self.assertEqual(var.name, 'test/my_dense/weights:0')
+ self.assertEqual(var.name, 'test/my_dense/kernel:0')
with variable_scope.variable_scope('test1') as scope:
inputs = random_ops.random_uniform((5, 3), seed=1)
core_layers.dense(inputs, 2, name=scope)
var = variables.trainable_variables()[2]
- self.assertEqual(var.name, 'test1/weights:0')
+ self.assertEqual(var.name, 'test1/kernel:0')
with variable_scope.variable_scope('test2'):
inputs = random_ops.random_uniform((5, 3), seed=1)
core_layers.dense(inputs, 2)
var = variables.trainable_variables()[4]
- self.assertEqual(var.name, 'test2/dense/weights:0')
+ self.assertEqual(var.name, 'test2/dense/kernel:0')
class DropoutTest(test.TestCase):
diff --git a/tensorflow/python/layers/normalization.py b/tensorflow/python/layers/normalization.py
index fcbc69f2c5..4a59d77948 100644
--- a/tensorflow/python/layers/normalization.py
+++ b/tensorflow/python/layers/normalization.py
@@ -257,7 +257,7 @@ def batch_normalization(inputs,
training=False,
trainable=True,
name=None,
- reuse=False):
+ reuse=None):
"""Functional interface for the batch normalization layer.
Reference: http://arxiv.org/abs/1502.03167
diff --git a/tensorflow/python/layers/normalization_test.py b/tensorflow/python/layers/normalization_test.py
index 93efc09ca0..91b7cb6f48 100644
--- a/tensorflow/python/layers/normalization_test.py
+++ b/tensorflow/python/layers/normalization_test.py
@@ -26,6 +26,7 @@ from tensorflow.python.layers import normalization as normalization_layers
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import random_ops
+from tensorflow.python.ops import variable_scope
from tensorflow.python.ops import variables
from tensorflow.python.platform import test
@@ -454,6 +455,20 @@ class BNTest(test.TestCase):
self.assertAlmostEqual(np.mean(normed_np_output), 0., places=2)
self.assertAlmostEqual(np.std(normed_np_output), 1., places=1)
+ def testFunctionalReuseFromScope(self):
+ inputs = variables.Variable(
+ np.random.random((5, 4, 3, 6)), dtype=dtypes.float32)
+ epsilon = 1e-3
+ training = array_ops.placeholder(dtype='bool')
+ with variable_scope.variable_scope('scope'):
+ _ = normalization_layers.batch_norm(
+ inputs, axis=-1, momentum=0.9, epsilon=epsilon, training=training)
+ self.assertEqual(len(variables.global_variables()), 5)
+ with variable_scope.variable_scope('scope', reuse=True):
+ _ = normalization_layers.batch_norm(
+ inputs, axis=-1, momentum=0.9, epsilon=epsilon, training=training)
+ self.assertEqual(len(variables.global_variables()), 5)
+
def testNoCenter(self):
bn = normalization_layers.BatchNormalization(axis=1, center=False)
inputs = random_ops.random_uniform((5, 4, 3), seed=1)
diff --git a/tensorflow/python/ops/confusion_matrix.py b/tensorflow/python/ops/confusion_matrix.py
index 628853545e..95247ea125 100644
--- a/tensorflow/python/ops/confusion_matrix.py
+++ b/tensorflow/python/ops/confusion_matrix.py
@@ -32,8 +32,19 @@ from tensorflow.python.ops import math_ops
from tensorflow.python.ops import sparse_ops
-def remove_squeezable_dimensions(labels, predictions, name=None):
- """Squeeze last dim if ranks of `predictions` and `labels` differ by 1.
+def remove_squeezable_dimensions(
+ labels, predictions, expected_rank_diff=0, name=None):
+ """Squeeze last dim if ranks differ from expected by exactly 1.
+
+ In the common case where we expect shapes to match, `expected_rank_diff`
+ defaults to 0, and we squeeze the last dimension of the larger rank if they
+ differ by 1.
+
+ But, for example, if `labels` contains class IDs and `predictions` contains 1
+ probability per class, we expect `predictions` to have 1 more dimension than
+ `labels`, so `expected_rank_diff` would be 1. In this case, we'd squeeze
+ `labels` if `rank(predictions) - rank(labels) == 0`, and
+ `predictions` if `rank(predictions) - rank(labels) == 2`.
This will use static shape if available. Otherwise, it will add graph
operations, which could result in a performance hit.
@@ -41,6 +52,7 @@ def remove_squeezable_dimensions(labels, predictions, name=None):
Args:
labels: Label values, a `Tensor` whose dimensions match `predictions`.
predictions: Predicted values, a `Tensor` of arbitrary dimensions.
+ expected_rank_diff: Expected result of `rank(predictions) - rank(labels)`.
name: Name of the op.
Returns:
@@ -57,10 +69,10 @@ def remove_squeezable_dimensions(labels, predictions, name=None):
if (labels_rank is not None) and (predictions_rank is not None):
# Use static rank.
rank_diff = predictions_rank - labels_rank
- if rank_diff == -1:
- labels = array_ops.squeeze(labels, [-1])
- elif rank_diff == 1:
+ if rank_diff == expected_rank_diff + 1:
predictions = array_ops.squeeze(predictions, [-1])
+ elif rank_diff == expected_rank_diff - 1:
+ labels = array_ops.squeeze(labels, [-1])
return labels, predictions
# Use dynamic rank.
@@ -68,13 +80,13 @@ def remove_squeezable_dimensions(labels, predictions, name=None):
if (predictions_rank is None) or (
predictions_shape.dims[-1].is_compatible_with(1)):
predictions = control_flow_ops.cond(
- math_ops.equal(1, rank_diff),
+ math_ops.equal(expected_rank_diff + 1, rank_diff),
lambda: array_ops.squeeze(predictions, [-1]),
lambda: predictions)
if (labels_rank is None) or (
labels_shape.dims[-1].is_compatible_with(1)):
labels = control_flow_ops.cond(
- math_ops.equal(-1, rank_diff),
+ math_ops.equal(expected_rank_diff - 1, rank_diff),
lambda: array_ops.squeeze(labels, [-1]),
lambda: labels)
return labels, predictions
diff --git a/tensorflow/python/ops/data_flow_ops.py b/tensorflow/python/ops/data_flow_ops.py
index 72f0454e30..037c3a8187 100644
--- a/tensorflow/python/ops/data_flow_ops.py
+++ b/tensorflow/python/ops/data_flow_ops.py
@@ -1613,3 +1613,65 @@ class StagingArea(object):
output.set_shape(shape)
return self._get_return_value(ret)
+
+
+class RecordInput(object):
+ """RecordInput asynchronously reads and randomly yields TFRecords.
+
+ A RecordInput Op will continuously read a batch of records asynchronously
+ into a buffer of some fixed capacity. It can also asynchronously yield
+ random records from this buffer.
+
+ It will not start yielding until at least `buffer_size / 2` elements have been
+ placed into the buffer so that sufficient randomization can take place.
+
+ The order the files are read will be shifted each epoch by `shift_amount` so
+ that the data is presented in a different order every epoch.
+ """
+
+ def __init__(self,
+ file_pattern,
+ batch_size=1,
+ buffer_size=1,
+ parallelism=1,
+ shift_ratio=0,
+ seed=0,
+ name=None):
+ """Constructs a RecordInput Op.
+
+ Args:
+ file_pattern: File path to the dataset, possibly containing wildcards.
+ All matching files will be iterated over each epoch.
+ batch_size: How many records to return at a time.
+ buffer_size: The maximum number of records the buffer will contain. This
+ _must_ be smaller than the total number of records in an epoch or
+ deadlock can occur.
+ parallelism: How many reader threads to use for reading from files.
+ shift_ratio: What percentage of the total number files to move the start
+ file forward by each epoch.
+ seed: Specify the random number seed used by generator that randomizes
+ records.
+ name: Optional name for the operation.
+
+ Raises:
+ ValueError: If one of the arguments is invalid.
+ """
+
+ self._batch_size = batch_size
+ self._file_pattern = file_pattern
+ self._buffer_size = buffer_size
+ self._parallelism = parallelism
+ self._shift_ratio = shift_ratio
+ self._seed = seed
+ self._name = name
+
+ def get_yield_op(self):
+ """Add a node that yields a minibatch every time it is executed."""
+ return gen_data_flow_ops.record_input(
+ file_pattern=self._file_pattern,
+ file_buffer_size=self._buffer_size,
+ file_parallelism=self._parallelism,
+ file_shuffle_shift_ratio=self._shift_ratio,
+ batch_size=self._batch_size,
+ file_random_seed=self._seed,
+ name=self._name)
diff --git a/tensorflow/python/ops/hidden_ops.txt b/tensorflow/python/ops/hidden_ops.txt
index 4b1b9815ca..16068e57d8 100644
--- a/tensorflow/python/ops/hidden_ops.txt
+++ b/tensorflow/python/ops/hidden_ops.txt
@@ -237,6 +237,7 @@ Max
Mean
Min
Mul
+Neg
Pow
Prod
Range
diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py
index b6da60770d..c231ca56bb 100644
--- a/tensorflow/python/ops/image_ops_test.py
+++ b/tensorflow/python/ops/image_ops_test.py
@@ -449,6 +449,42 @@ class AdjustSaturationBenchmark(test.Benchmark):
self._benchmarkAdjustSaturation(test.gpu_device_name(), None)
+class ResizeBilinearBenchmark(test.Benchmark):
+
+ def _benchmarkResize(self, image_size):
+ # 4D float tensor (10 images per batch, 3 channels per image)
+ img = variables.Variable(
+ random_ops.random_normal([10, image_size[0], image_size[1], 3]),
+ name='img')
+
+ deps = []
+ for _ in xrange(100):
+ with ops.control_dependencies(deps):
+ resize_op = image_ops.resize_bilinear(
+ img, [299, 299], align_corners=False)
+ deps = [resize_op]
+ benchmark_op = control_flow_ops.group(*deps)
+
+ with session.Session() as sess:
+ sess.run(variables.global_variables_initializer())
+ print('Variables initalized for resize_bilinear image size: %s.' %
+ (image_size,))
+ benchmark_values = self.run_op_benchmark(
+ sess,
+ benchmark_op,
+ name=('bilinear_%s_%s' % image_size),)
+ print('Benchmark values:\n%s' % benchmark_values)
+
+ def benchmarkSimilar(self):
+ self._benchmarkResize((183, 229))
+
+ def benchmarkScaleUp(self):
+ self._benchmarkResize((141, 186))
+
+ def benchmarkScaleDown(self):
+ self._benchmarkResize((749, 603))
+
+
class ResizeBicubicBenchmark(test.Benchmark):
def _benchmarkResize(self, image_size):
diff --git a/tensorflow/python/ops/losses/BUILD b/tensorflow/python/ops/losses/BUILD
index 47d4d594d6..c4ce11ce0f 100644
--- a/tensorflow/python/ops/losses/BUILD
+++ b/tensorflow/python/ops/losses/BUILD
@@ -22,12 +22,15 @@ py_library(
srcs_version = "PY2AND3",
deps = [
"//tensorflow/python:array_ops",
+ "//tensorflow/python:confusion_matrix",
+ "//tensorflow/python:control_flow_ops",
"//tensorflow/python:framework_for_generated_wrappers",
"//tensorflow/python:math_ops",
"//tensorflow/python:nn",
"//tensorflow/python:nn_ops",
"//tensorflow/python:platform",
"//tensorflow/python:util",
+ "//tensorflow/python:weights_broadcast_ops",
],
)
diff --git a/tensorflow/python/ops/losses/losses_impl.py b/tensorflow/python/ops/losses/losses_impl.py
index 486e25afc7..89daa9594a 100644
--- a/tensorflow/python/ops/losses/losses_impl.py
+++ b/tensorflow/python/ops/losses/losses_impl.py
@@ -20,11 +20,13 @@ from __future__ import print_function
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import confusion_matrix
+from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn
from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops import weights_broadcast_ops
from tensorflow.python.ops.losses import util
-from tensorflow.python.platform import tf_logging as logging
def _scale_losses(losses, weights):
@@ -46,13 +48,8 @@ def _scale_losses(losses, weights):
A scalar tf.float32 `Tensor` whose value represents the sum of the scaled
`losses`.
"""
- # First, compute the sum of the losses over all elements:
- start_index = max(0, weights.get_shape().ndims)
- reduction_indices = list(range(start_index, losses.get_shape().ndims))
- reduced_losses = math_ops.reduce_sum(losses,
- reduction_indices=reduction_indices)
- reduced_losses = math_ops.multiply(reduced_losses, weights)
- return math_ops.reduce_sum(reduced_losses)
+ weighted_losses = math_ops.multiply(losses, weights)
+ return math_ops.reduce_sum(weighted_losses)
def _safe_div(numerator, denominator, name="value"):
@@ -117,51 +114,29 @@ def _num_present(losses, weights, per_batch=False):
`per_batch` is `True`, the value is returned as a tensor of size
`[batch_size]`. Otherwise, a single scalar tensor is returned.
"""
- # If weights is a scalar, its easy to compute:
- if weights.get_shape().ndims == 0:
- if losses.get_shape().ndims == 0:
- batch_size = 1
- else:
- batch_size = array_ops.reshape(array_ops.slice(array_ops.shape(losses),
- [0], [1]), [])
- num_per_batch = math_ops.div(math_ops.to_float(array_ops.size(losses)),
- math_ops.to_float(batch_size))
- num_per_batch = array_ops.where(math_ops.equal(weights, 0),
- 0.0, num_per_batch)
- num_per_batch = math_ops.multiply(array_ops.ones(
- array_ops.reshape(batch_size, [1])), num_per_batch)
- return num_per_batch if per_batch else math_ops.reduce_sum(num_per_batch)
-
- # First, count the number of nonzero weights.
- if weights.get_shape().ndims >= 1:
- reduction_indices = list(range(1, weights.get_shape().ndims))
- num_nonzero_per_batch = math_ops.reduce_sum(
- math_ops.to_float(math_ops.not_equal(weights, 0)),
- reduction_indices=reduction_indices)
-
- # Next, determine the number of elements that weight would broadcast to:
- broadcast_dims = array_ops.slice(array_ops.shape(losses),
- [weights.get_shape().ndims], [-1])
- num_to_broadcast = math_ops.to_float(math_ops.reduce_prod(broadcast_dims))
-
- num_per_batch = math_ops.multiply(num_nonzero_per_batch, num_to_broadcast)
- return num_per_batch if per_batch else math_ops.reduce_sum(num_per_batch)
+ with ops.name_scope(None, "num_present", (losses, weights)) as scope:
+ weights = math_ops.to_float(weights)
+ present = array_ops.where(
+ math_ops.equal(weights, 0.0),
+ array_ops.zeros_like(weights),
+ array_ops.ones_like(weights))
+ present = weights_broadcast_ops.broadcast_weights(present, losses)
+ if per_batch:
+ return math_ops.reduce_sum(
+ present, axis=math_ops.range(1, array_ops.rank(present)),
+ keep_dims=True, name=scope)
+ return math_ops.reduce_sum(present, name=scope)
def compute_weighted_loss(
losses, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES):
"""Computes the weighted loss.
- WARNING: `weights` also supports dimensions of 1, but the broadcasting does
- not work as advertised, you'll wind up with weighted sum instead of weighted
- mean for any but the last dimension. This will be cleaned up soon, so please
- do not rely on the current behavior for anything but the shapes documented for
- `weights` below.
-
Args:
losses: `Tensor` of shape `[batch_size, d1, ... dN]`.
- weights: `Tensor` of shape `[]`, `[batch_size]` or
- `[batch_size, d1, ... dK]`, where K < N.
+ weights: Optional `Tensor` whose rank is either 0, or the same rank as
+ `losses`, and must be broadcastable to `losses` (i.e., all dimensions must
+ be either `1`, or the same as the corresponding `losses` dimension).
scope: the scope for the operations performed in computing the loss.
loss_collection: the loss will be added to these collections.
@@ -173,52 +148,20 @@ def compute_weighted_loss(
`losses`, or if the number of dimensions (rank) of either `losses` or
`weights` is missing.
"""
- with ops.name_scope(scope, "weighted_loss", [losses, weights]):
- losses = ops.convert_to_tensor(losses)
- input_dtype = losses.dtype
- losses = math_ops.to_float(losses)
- weights = math_ops.to_float(ops.convert_to_tensor(weights))
-
- losses_shape = losses.get_shape()
- if losses_shape.ndims is None:
- raise ValueError("losses.get_shape().ndims cannot be None")
- weights_shape = weights.get_shape()
- if weights_shape.ndims is None:
- raise ValueError("weight.get_shape().ndims cannot be None")
-
- # TODO(b/33556118): Remove `ndims > 1` check so shapes [] and [1] behave the
- # same.
- if weights_shape.ndims > 1 and weights_shape.dims[-1].is_compatible_with(1):
- weights = array_ops.squeeze(weights, [-1])
-
- # TODO(b/33556118): Remove this when we require weights shape be either
- # scalar or the same as losses.
- weights_dims = weights_shape.as_list()
- losses_dims = losses_shape.as_list()
- if len(weights_dims) > len(losses_dims):
- raise ValueError(
- "Invalid weights shape %s can not be broadcast to losses %s." % (
- weights_shape, losses_shape))
- for i in range(len(weights_dims)):
- if ((losses_dims[i] is not None) and (losses_dims[i] == 1) and
- (weights_dims[i] is not None) and (weights_dims[i] != 1)):
- raise ValueError(
- "Invalid weights shape %s can not be broadcast to losses %s." % (
- weights_shape, losses_shape))
- for i in range(len(weights_dims)):
- if ((losses_dims[i] is not None) and (losses_dims[i] != 1) and
- (weights_dims[i] is not None) and (weights_dims[i] == 1)):
- logging.warn(
- "WARNING: Weights %s with dimension 1 will result in a sum"
- ", not average, across dimension %d.", weights_shape, i)
-
- total_loss = _scale_losses(losses, weights)
- num_present = _num_present(losses, weights)
- mean_loss = _safe_mean(total_loss, num_present)
- # Convert the result back to the input type.
- mean_loss = math_ops.cast(mean_loss, input_dtype)
- util.add_loss(mean_loss, loss_collection)
- return mean_loss
+ with ops.name_scope(scope, "weighted_loss", (losses, weights)):
+ with ops.control_dependencies((
+ weights_broadcast_ops.assert_broadcastable(weights, losses),)):
+ losses = ops.convert_to_tensor(losses)
+ input_dtype = losses.dtype
+ losses = math_ops.to_float(losses)
+ weights = math_ops.to_float(weights)
+ total_loss = _scale_losses(losses, weights)
+ num_present = _num_present(losses, weights)
+ mean_loss = _safe_mean(total_loss, num_present)
+ # Convert the result back to the input type.
+ mean_loss = math_ops.cast(mean_loss, input_dtype)
+ util.add_loss(mean_loss, loss_collection)
+ return mean_loss
def absolute_difference(
@@ -234,17 +177,12 @@ def absolute_difference(
measurable element of `predictions` is scaled by the corresponding value of
`weights`.
- WARNING: `weights` also supports dimensions of 1, but the broadcasting does
- not work as advertised, you'll wind up with weighted sum instead of weighted
- mean for any but the last dimension. This will be cleaned up soon, so please
- do not rely on the current behavior for anything but the shapes documented for
- `weights` below.
-
Args:
labels: The ground truth output tensor, same dimensions as 'predictions'.
predictions: The predicted outputs.
- weights: Coefficients for the loss a scalar, a tensor of shape
- `[batch_size]` or a tensor whose shape matches `predictions`.
+ weights: Optional `Tensor` whose rank is either 0, or the same rank as
+ `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
+ be either `1`, or the same as the corresponding `losses` dimension).
scope: The scope for the operations performed in computing the loss.
loss_collection: collection to which this loss will be added.
@@ -272,18 +210,13 @@ def cosine_distance(
Note that the function assumes that `predictions` and `labels` are already
unit-normalized.
- WARNING: `weights` also supports dimensions of 1, but the broadcasting does
- not work as advertised, you'll wind up with weighted sum instead of weighted
- mean for any but the last dimension. This will be cleaned up soon, so please
- do not rely on the current behavior for anything but the shapes documented for
- `weights` below.
-
Args:
labels: `Tensor` whose shape matches 'predictions'
predictions: An arbitrary matrix.
dim: The dimension along which the cosine distance is computed.
- weights: Coefficients for the loss a scalar, a tensor of shape
- `[batch_size]` or a tensor whose shape matches `predictions`.
+ weights: Optional `Tensor` whose rank is either 0, or the same rank as
+ `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
+ be either `1`, or the same as the corresponding `losses` dimension).
scope: The scope for the operations performed in computing the loss.
loss_collection: collection to which this loss will be added.
@@ -303,7 +236,7 @@ def cosine_distance(
predictions.get_shape().assert_is_compatible_with(labels.get_shape())
radial_diffs = math_ops.multiply(predictions, labels)
- losses = 1 - math_ops.reduce_sum(radial_diffs, reduction_indices=[dim,])
+ losses = 1 - math_ops.reduce_sum(radial_diffs, axis=(dim,), keep_dims=True)
return compute_weighted_loss(losses, weights, scope, loss_collection)
@@ -311,18 +244,13 @@ def hinge_loss(labels, logits, weights=1.0, scope=None,
loss_collection=ops.GraphKeys.LOSSES):
"""Adds a hinge loss to the training procedure.
- WARNING: `weights` also supports dimensions of 1, but the broadcasting does
- not work as advertised, you'll wind up with weighted sum instead of weighted
- mean for any but the last dimension. This will be cleaned up soon, so please
- do not rely on the current behavior for anything but the shapes documented for
- `weights` below.
-
Args:
labels: The ground truth output tensor. Its shape should match the shape of
logits. The values of the tensor are expected to be 0.0 or 1.0.
logits: The logits, a float tensor.
- weights: Coefficients for the loss a scalar, a tensor of shape
- `[batch_size]` or a tensor whose shape matches `predictions`.
+ weights: Optional `Tensor` whose rank is either 0, or the same rank as
+ `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
+ be either `1`, or the same as the corresponding `losses` dimension).
scope: The scope for the operations performed in computing the loss.
loss_collection: collection to which the loss will be added.
@@ -356,17 +284,12 @@ def log_loss(labels, predictions, weights=1.0, epsilon=1e-7, scope=None,
measurable element of `predictions` is scaled by the corresponding value of
`weights`.
- WARNING: `weights` also supports dimensions of 1, but the broadcasting does
- not work as advertised, you'll wind up with weighted sum instead of weighted
- mean for any but the last dimension. This will be cleaned up soon, so please
- do not rely on the current behavior for anything but the shapes documented for
- `weights` below.
-
Args:
labels: The ground truth output tensor, same dimensions as 'predictions'.
predictions: The predicted outputs.
- weights: Coefficients for the loss a scalar, a tensor of shape
- `[batch_size]` or a tensor whose shape matches `predictions`.
+ weights: Optional `Tensor` whose rank is either 0, or the same rank as
+ `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
+ be either `1`, or the same as the corresponding `losses` dimension).
epsilon: A small increment to add to avoid taking a log of zero.
scope: The scope for the operations performed in computing the loss.
loss_collection: collection to which the loss will be added.
@@ -434,41 +357,39 @@ def mean_pairwise_squared_error(labels, predictions, weights=1.0, scope=None,
"""
with ops.name_scope(scope, "mean_pairwise_squared_error",
(predictions, labels, weights)) as scope:
- predictions = math_ops.to_float(predictions)
+ weights = math_ops.to_float(weights)
labels = math_ops.to_float(labels)
- predictions.get_shape().assert_is_compatible_with(labels.get_shape())
- weights = math_ops.to_float(ops.convert_to_tensor(weights))
-
- diffs = math_ops.subtract(predictions, labels)
+ with ops.control_dependencies((
+ weights_broadcast_ops.assert_broadcastable(weights, labels),)):
+ predictions = math_ops.to_float(predictions)
+ predictions.get_shape().assert_is_compatible_with(labels.get_shape())
- # Need to verify here since the function doesn't use compute_weighted_loss
- if diffs.get_shape().ndims is None:
- raise ValueError("diffs.get_shape().ndims cannot be None")
- if weights.get_shape().ndims is None:
- raise ValueError("weights.get_shape().ndims cannot be None")
+ diffs = math_ops.subtract(predictions, labels)
- reduction_indices = list(range(1, diffs.get_shape().ndims))
+ reduction_indices = math_ops.range(1, array_ops.rank(diffs))
- sum_squares_diff_per_batch = math_ops.reduce_sum(
- math_ops.square(diffs),
- reduction_indices=reduction_indices)
- num_present_per_batch = _num_present(diffs, weights, per_batch=True)
+ sum_squares_diff_per_batch = math_ops.reduce_sum(
+ math_ops.square(diffs), reduction_indices=reduction_indices,
+ keep_dims=True)
+ num_present_per_batch = _num_present(diffs, weights, per_batch=True)
- term1 = 2.0 * _safe_div(sum_squares_diff_per_batch,
- num_present_per_batch)
+ term1 = 2.0 * _safe_div(sum_squares_diff_per_batch,
+ num_present_per_batch)
- sum_diff = math_ops.reduce_sum(diffs, reduction_indices=reduction_indices)
- term2 = 2.0 * _safe_div(math_ops.square(sum_diff),
- math_ops.square(num_present_per_batch))
+ sum_diff = math_ops.reduce_sum(
+ diffs, reduction_indices=reduction_indices, keep_dims=True)
+ term2 = 2.0 * _safe_div(math_ops.square(sum_diff),
+ math_ops.square(num_present_per_batch))
- loss = _scale_losses(term1 - term2, weights)
+ loss = _scale_losses(term1 - term2, weights)
- mean_loss = array_ops.where(math_ops.reduce_sum(num_present_per_batch) > 0,
- loss,
- array_ops.zeros_like(loss),
- name="value")
- util.add_loss(mean_loss, loss_collection)
- return mean_loss
+ mean_loss = array_ops.where(
+ math_ops.reduce_sum(num_present_per_batch) > 0,
+ loss,
+ array_ops.zeros_like(loss),
+ name="value")
+ util.add_loss(mean_loss, loss_collection)
+ return mean_loss
def mean_squared_error(labels, predictions, weights=1.0, scope=None,
@@ -483,17 +404,12 @@ def mean_squared_error(labels, predictions, weights=1.0, scope=None,
measurable element of `predictions` is scaled by the corresponding value of
`weights`.
- WARNING: `weights` also supports dimensions of 1, but the broadcasting does
- not work as advertised, you'll wind up with weighted sum instead of weighted
- mean for any but the last dimension. This will be cleaned up soon, so please
- do not rely on the current behavior for anything but the shapes documented for
- `weights` below.
-
Args:
labels: The ground truth output tensor, same dimensions as 'predictions'.
predictions: The predicted outputs.
- weights: Coefficients for the loss a scalar, a tensor of shape
- `[batch_size]` or a tensor whose shape matches `predictions`.
+ weights: Optional `Tensor` whose rank is either 0, or the same rank as
+ `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
+ be either `1`, or the same as the corresponding `losses` dimension).
scope: The scope for the operations performed in computing the loss.
loss_collection: collection to which the loss will be added.
@@ -523,12 +439,6 @@ def sigmoid_cross_entropy(
tensor of shape `[batch_size]`, then the loss weights apply to each
corresponding sample.
- WARNING: `weights` also supports dimensions of 1, but the broadcasting does
- not work as advertised, you'll wind up with weighted sum instead of weighted
- mean for any but the last dimension. This will be cleaned up soon, so please
- do not rely on the current behavior for anything but the shapes documented for
- `weights` below.
-
If `label_smoothing` is nonzero, smooth the labels towards 1/2:
new_multiclass_labels = multiclass_labels * (1 - label_smoothing)
@@ -538,8 +448,9 @@ def sigmoid_cross_entropy(
multi_class_labels: `[batch_size, num_classes]` target integer labels in
`(0, 1)`.
logits: `[batch_size, num_classes]` logits outputs of the network.
- weights: Coefficients for the loss. This must be of shape `[]`,
- `[batch_size]` or `[batch_size, num_classes]`.
+ weights: Optional `Tensor` whose rank is either 0, or the same rank as
+ `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
+ be either `1`, or the same as the corresponding `losses` dimension).
label_smoothing: If greater than `0` then smooth the labels.
scope: The scope for the operations performed in computing the loss.
loss_collection: collection to which the loss will be added.
@@ -578,12 +489,6 @@ def softmax_cross_entropy(
tensor of shape `[batch_size]`, then the loss weights apply to each
corresponding sample.
- WARNING: `weights` also supports dimensions of 1, but the broadcasting does
- not work as advertised, you'll wind up with weighted sum instead of weighted
- mean for any but the last dimension. This will be cleaned up soon, so please
- do not rely on the current behavior for anything but the shapes documented for
- `weights` below.
-
If `label_smoothing` is nonzero, smooth the labels towards 1/num_classes:
new_onehot_labels = onehot_labels * (1 - label_smoothing)
+ label_smoothing / num_classes
@@ -591,8 +496,10 @@ def softmax_cross_entropy(
Args:
onehot_labels: `[batch_size, num_classes]` target one-hot-encoded labels.
logits: [batch_size, num_classes] logits outputs of the network .
- weights: Coefficients for the loss. This must be of shape `[]`,
- `[batch_size]` or `[batch_size, num_classes]`.
+ weights: Optional `Tensor` whose rank is either 0, or the same rank as
+ `onehot_labels`, and must be broadcastable to `onehot_labels` (i.e., all
+ dimensions must be either `1`, or the same as the corresponding `losses`
+ dimension).
label_smoothing: If greater than 0 then smooth the labels.
scope: the scope for the operations performed in computing the loss.
loss_collection: collection to which the loss will be added.
@@ -623,6 +530,57 @@ def softmax_cross_entropy(
return compute_weighted_loss(losses, weights, scope, loss_collection)
+# TODO(ptucker): Merge this with similar method in metrics_impl.
+def _remove_squeezable_dimensions(
+ labels, predictions, weights=None, expected_rank_diff=0):
+ """Internal version of _remove_squeezable_dimensions which handles weights.
+
+ Squeezes `predictions` and `labels` if their ranks differ from expected by
+ exactly 1.
+ Squeezes `weights` if its rank is 1 more than the new rank of `predictions`
+
+ This will use static shape if available. Otherwise, it will add graph
+ operations, which could result in a performance hit.
+
+ Args:
+ labels: Label values, a `Tensor` whose dimensions match `predictions`.
+ predictions: Predicted values, a `Tensor` of arbitrary dimensions.
+ weights: Optional weight `Tensor`. It will be squeezed if it's not scalar,
+ and its rank is 1 more than the new rank of `labels`.
+ expected_rank_diff: Expected result of `rank(predictions) - rank(labels)`.
+
+ Returns:
+ Tuple of `predictions`, `labels` and `weights`, possibly with the last
+ dimension squeezed.
+ """
+ labels, predictions = confusion_matrix.remove_squeezable_dimensions(
+ labels, predictions, expected_rank_diff=expected_rank_diff)
+
+ if weights is not None:
+ weights = ops.convert_to_tensor(weights)
+ labels_rank = labels.get_shape().ndims
+ weights_shape = weights.get_shape()
+ weights_rank = weights_shape.ndims
+
+ if (labels_rank is not None) and (weights_rank is not None):
+ # Use static rank.
+ rank_diff = weights_rank - labels_rank
+ if rank_diff == 1:
+ weights = array_ops.squeeze(weights, [-1])
+ return labels, predictions, weights
+
+ # Use dynamic rank.
+ rank_diff = array_ops.rank(weights) - array_ops.rank(labels)
+ if (weights_rank is None) or (
+ weights_shape.dims[-1].is_compatible_with(1)):
+ weights = control_flow_ops.cond(
+ math_ops.equal(1, rank_diff),
+ lambda: array_ops.squeeze(weights, [-1]),
+ lambda: weights)
+
+ return labels, predictions, weights
+
+
def sparse_softmax_cross_entropy(labels, logits, weights=1.0, scope=None,
loss_collection=ops.GraphKeys.LOSSES):
"""Cross-entropy loss using `tf.nn.sparse_softmax_cross_entropy_with_logits`.
@@ -632,18 +590,16 @@ def sparse_softmax_cross_entropy(labels, logits, weights=1.0, scope=None,
tensor of shape [`batch_size`], then the loss weights apply to each
corresponding sample.
- WARNING: `weights` also supports dimensions of 1, but the broadcasting does
- not work as advertised, you'll wind up with weighted sum instead of weighted
- mean for any but the last dimension. This will be cleaned up soon, so please
- do not rely on the current behavior for anything but the shapes documented for
- `weights` below.
-
Args:
- labels: [batch_size, 1] or [batch_size] target labels of dtype `int32` or
- `int64` in the range `[0, num_classes)`.
- logits: [batch_size, num_classes] logits outputs of the network .
- weights: Coefficients for the loss. This must be of shape `[batch_size]` or
- `[batch_size, 1]`.
+ labels: `Tensor` of shape `[d_0, d_1, ..., d_{r-1}]` (where `r` is rank of
+ `labels` and result) and dtype `int32` or `int64`. Each entry in `labels`
+ must be an index in `[0, num_classes)`. Other values will raise an
+ exception when this op is run on CPU, and return `NaN` for corresponding
+ loss and gradient rows on GPU.
+ logits: Unscaled log probabilities of shape
+ `[d_0, d_1, ..., d_{r-1}, num_classes]` and dtype `float32` or `float64`.
+ weights: Coefficients for the loss. This must be scalar or of same rank as
+ `labels`
scope: the scope for the operations performed in computing the loss.
loss_collection: collection to which the loss will be added.
@@ -655,12 +611,13 @@ def sparse_softmax_cross_entropy(labels, logits, weights=1.0, scope=None,
if `weights` is None.
"""
with ops.name_scope(scope, "sparse_softmax_cross_entropy_loss",
- [logits, labels, weights]) as scope:
- labels = array_ops.reshape(labels, shape=[array_ops.shape(labels)[0]])
-
+ (logits, labels, weights)) as scope:
+ # As documented above in Args, labels contain class IDs and logits contains
+ # 1 probability per class ID, so we expect rank(logits) - rank(labels) == 1;
+ # therefore, expected_rank_diff=1.
+ labels, logits, weights = _remove_squeezable_dimensions(
+ labels, logits, weights, expected_rank_diff=1)
losses = nn.sparse_softmax_cross_entropy_with_logits(labels=labels,
logits=logits,
name="xentropy")
- # Reshape losses to [batch_size, 1] to be consistent with weights.
- losses = array_ops.reshape(losses, shape=[array_ops.shape(losses)[0], 1])
return compute_weighted_loss(losses, weights, scope, loss_collection)
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index c9ad0936a5..11e7d8382f 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -260,6 +260,8 @@ def argmax(input, axis=None, name=None, dimension=None):
if axis is not None:
raise ValueError("Cannot specify both 'axis' and 'dimension'")
axis = dimension
+ elif axis is None:
+ axis = 0
return gen_math_ops.arg_max(input, axis, name)
@@ -273,6 +275,8 @@ def argmin(input, axis=None, name=None, dimension=None):
if axis is not None:
raise ValueError("Cannot specify both 'axis' and 'dimension'")
axis = dimension
+ elif axis is None:
+ axis = 0
return gen_math_ops.arg_min(input, axis, name)
@@ -399,11 +403,11 @@ def negative(x, name=None):
"""
with ops.name_scope(name, "Neg", [x]) as name:
if isinstance(x, sparse_tensor.SparseTensor):
- x_neg = gen_math_ops.neg(x.values, name=name)
+ x_neg = gen_math_ops._neg(x.values, name=name)
return sparse_tensor.SparseTensor(
indices=x.indices, values=x_neg, dense_shape=x.dense_shape)
else:
- return gen_math_ops.neg(x, name=name)
+ return gen_math_ops._neg(x, name=name)
# pylint: enable=g-docstring-has-escape
@@ -857,7 +861,7 @@ def to_bfloat16(x, name="ToBFloat16"):
return cast(x, dtypes.bfloat16, name=name)
-ops.Tensor._override_operator("__neg__", gen_math_ops.neg)
+ops.Tensor._override_operator("__neg__", gen_math_ops._neg)
ops.Tensor._override_operator("__abs__", abs)
# __invert__ corresponds to the ~ operator. Here we follow the numpy convention
# ~ marks an elementwise bit-wise inverse. This is only implemented for boolean
diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py
index a00625d083..0a109eb99b 100644
--- a/tensorflow/python/ops/metrics_impl.py
+++ b/tensorflow/python/ops/metrics_impl.py
@@ -87,7 +87,7 @@ def _remove_squeezable_dimensions(labels, predictions, weights):
weights = array_ops.squeeze(weights, [-1])
elif (weights_rank is None) or (
weights_shape.dims[-1].is_compatible_with(1)):
- # Use dynamic rank
+ # Use dynamic rank.
weights = control_flow_ops.cond(
math_ops.equal(array_ops.rank(weights),
math_ops.add(array_ops.rank(predictions), 1)),
@@ -354,8 +354,8 @@ def _confusion_matrix_at_thresholds(
If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
Args:
- labels: A `Tensor` whose shape matches `predictions`. `labels` will be cast
- to `bool`.
+ labels: A `Tensor` whose shape matches `predictions`. Will be cast to
+ `bool`.
predictions: A floating point `Tensor` of arbitrary shape and whose values
are in the range `[0, 1]`.
thresholds: A python list or tuple of float thresholds in `[0, 1]`.
@@ -384,6 +384,8 @@ def _confusion_matrix_at_thresholds(
if include not in all_includes:
raise ValueError('Invaild key: %s.' % include)
+ labels = math_ops.cast(labels, dtype=dtypes.bool)
+ predictions = math_ops.to_float(predictions)
labels, predictions, weights = _remove_squeezable_dimensions(
labels, predictions, weights)
predictions.get_shape().assert_is_compatible_with(labels.get_shape())
@@ -503,7 +505,8 @@ def auc(labels, predictions, weights=None, num_thresholds=200,
If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
Args:
- labels: A `bool` `Tensor` whose shape matches `predictions`.
+ labels: A `Tensor` whose shape matches `predictions`. Will be cast to
+ `bool`.
predictions: A floating point `Tensor` of arbitrary shape and whose values
are in the range `[0, 1]`.
weights: Optional `Tensor` whose rank is either 0, or the same rank as
@@ -1101,10 +1104,10 @@ def true_positives(labels, predictions, weights=None,
If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
Args:
- labels: The ground truth values, a `bool` `Tensor` whose dimensions must
- match `predictions`.
- predictions: The predicted values, a `bool` `Tensor` of arbitrary
- dimensions.
+ labels: The ground truth values, a `Tensor` whose dimensions must match
+ `predictions`. Will be cast to `bool`.
+ predictions: The predicted values, a `Tensor` of arbitrary dimensions. Will
+ be cast to `bool`.
weights: Optional `Tensor` whose rank is either 0, or the same rank as
`labels`, and must be broadcastable to `labels` (i.e., all dimensions must
be either `1`, or the same as the corresponding `labels` dimension).
@@ -1127,11 +1130,11 @@ def true_positives(labels, predictions, weights=None,
with variable_scope.variable_scope(
name, 'true_positives', (predictions, labels, weights)):
- predictions = ops.convert_to_tensor(predictions)
- labels = ops.convert_to_tensor(labels)
+ labels = math_ops.cast(labels, dtype=dtypes.bool)
+ predictions = math_ops.cast(predictions, dtype=dtypes.bool)
predictions.get_shape().assert_is_compatible_with(labels.get_shape())
- is_true_positive = math_ops.logical_and(math_ops.equal(labels, 1),
- math_ops.equal(predictions, 1))
+ is_true_positive = math_ops.logical_and(math_ops.equal(labels, True),
+ math_ops.equal(predictions, True))
return _count_condition(is_true_positive, weights, metrics_collections,
updates_collections)
@@ -1145,10 +1148,10 @@ def false_positives(labels, predictions, weights=None,
If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
Args:
- labels: The ground truth values, a `bool` `Tensor` whose dimensions must
- match `predictions`.
- predictions: The predicted values, a `bool` `Tensor` of arbitrary
- dimensions.
+ labels: The ground truth values, a `Tensor` whose dimensions must match
+ `predictions`. Will be cast to `bool`.
+ predictions: The predicted values, a `Tensor` of arbitrary dimensions. Will
+ be cast to `bool`.
weights: Optional `Tensor` whose rank is either 0, or the same rank as
`labels`, and must be broadcastable to `labels` (i.e., all dimensions must
be either `1`, or the same as the corresponding `labels` dimension).
@@ -1171,11 +1174,11 @@ def false_positives(labels, predictions, weights=None,
with variable_scope.variable_scope(
name, 'false_positives', (predictions, labels, weights)):
- predictions = ops.convert_to_tensor(predictions)
- labels = ops.convert_to_tensor(labels)
+ labels = math_ops.cast(labels, dtype=dtypes.bool)
+ predictions = math_ops.cast(predictions, dtype=dtypes.bool)
predictions.get_shape().assert_is_compatible_with(labels.get_shape())
- is_false_positive = math_ops.logical_and(math_ops.equal(labels, 0),
- math_ops.equal(predictions, 1))
+ is_false_positive = math_ops.logical_and(math_ops.equal(labels, False),
+ math_ops.equal(predictions, True))
return _count_condition(is_false_positive, weights, metrics_collections,
updates_collections)
@@ -1199,9 +1202,10 @@ def precision(labels, predictions, weights=None,
If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
Args:
- labels: The ground truth values, a `bool` `Tensor` whose dimensions must
- match `predictions`.
- predictions: The predicted values, a `bool` `Tensor` of arbitrary shape.
+ labels: The ground truth values, a `Tensor` whose dimensions must match
+ `predictions`. Will be cast to `bool`.
+ predictions: The predicted values, a `Tensor` of arbitrary dimensions. Will
+ be cast to `bool`.
weights: Optional `Tensor` whose rank is either 0, or the same rank as
`labels`, and must be broadcastable to `labels` (i.e., all dimensions must
be either `1`, or the same as the corresponding `labels` dimension).
@@ -1227,6 +1231,8 @@ def precision(labels, predictions, weights=None,
with variable_scope.variable_scope(
name, 'precision', (predictions, labels, weights)):
+ labels = math_ops.cast(labels, dtype=dtypes.bool)
+ predictions = math_ops.cast(predictions, dtype=dtypes.bool)
labels, predictions, weights = _remove_squeezable_dimensions(
labels, predictions, weights)
predictions.get_shape().assert_is_compatible_with(labels.get_shape())
@@ -1279,7 +1285,8 @@ def precision_at_thresholds(labels, predictions, thresholds,
If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
Args:
- labels: A `bool` `Tensor` whose shape matches `predictions`.
+ labels: The ground truth values, a `Tensor` whose dimensions must match
+ `predictions`. Will be cast to `bool`.
predictions: A floating point `Tensor` of arbitrary shape and whose values
are in the range `[0, 1]`.
thresholds: A python list or tuple of float thresholds in `[0, 1]`.
@@ -1336,10 +1343,10 @@ def false_negatives(labels, predictions, weights=None,
If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
Args:
- labels: The ground truth values, a `bool` `Tensor` whose dimensions must
- match `predictions`.
- predictions: The predicted values, a `bool` `Tensor` of arbitrary
- dimensions.
+ labels: The ground truth values, a `Tensor` whose dimensions must match
+ `predictions`. Will be cast to `bool`.
+ predictions: The predicted values, a `Tensor` of arbitrary dimensions. Will
+ be cast to `bool`.
weights: Optional `Tensor` whose rank is either 0, or the same rank as
`labels`, and must be broadcastable to `labels` (i.e., all dimensions must
be either `1`, or the same as the corresponding `labels` dimension).
@@ -1361,11 +1368,11 @@ def false_negatives(labels, predictions, weights=None,
with variable_scope.variable_scope(
name, 'false_negatives', (predictions, labels, weights)):
- predictions = ops.convert_to_tensor(predictions)
- labels = ops.convert_to_tensor(labels)
+ labels = math_ops.cast(labels, dtype=dtypes.bool)
+ predictions = math_ops.cast(predictions, dtype=dtypes.bool)
predictions.get_shape().assert_is_compatible_with(labels.get_shape())
- is_false_negative = math_ops.logical_and(math_ops.equal(labels, 1),
- math_ops.equal(predictions, 0))
+ is_false_negative = math_ops.logical_and(math_ops.equal(labels, True),
+ math_ops.equal(predictions, False))
return _count_condition(is_false_negative, weights, metrics_collections,
updates_collections)
@@ -1387,9 +1394,10 @@ def recall(labels, predictions, weights=None,
If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
Args:
- labels: The ground truth values, a `bool` `Tensor` whose dimensions must
- match `predictions`.
- predictions: The predicted values, a `bool` `Tensor` of arbitrary shape.
+ labels: The ground truth values, a `Tensor` whose dimensions must match
+ `predictions`. Will be cast to `bool`.
+ predictions: The predicted values, a `Tensor` of arbitrary dimensions. Will
+ be cast to `bool`.
weights: Optional `Tensor` whose rank is either 0, or the same rank as
`labels`, and must be broadcastable to `labels` (i.e., all dimensions must
be either `1`, or the same as the corresponding `labels` dimension).
@@ -1414,6 +1422,8 @@ def recall(labels, predictions, weights=None,
"""
with variable_scope.variable_scope(
name, 'recall', (predictions, labels, weights)):
+ labels = math_ops.cast(labels, dtype=dtypes.bool)
+ predictions = math_ops.cast(predictions, dtype=dtypes.bool)
labels, predictions, weights = _remove_squeezable_dimensions(
labels, predictions, weights)
predictions.get_shape().assert_is_compatible_with(labels.get_shape())
@@ -1817,7 +1827,8 @@ def recall_at_thresholds(labels, predictions, thresholds,
If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
Args:
- labels: A `bool` `Tensor` whose shape matches `predictions`.
+ labels: The ground truth values, a `Tensor` whose dimensions must match
+ `predictions`. Will be cast to `bool`.
predictions: A floating point `Tensor` of arbitrary shape and whose values
are in the range `[0, 1]`.
thresholds: A python list or tuple of float thresholds in `[0, 1]`.
@@ -1952,7 +1963,8 @@ def sensitivity_at_specificity(
following: https://en.wikipedia.org/wiki/Sensitivity_and_specificity
Args:
- labels: A `bool` `Tensor` whose shape matches `predictions`.
+ labels: The ground truth values, a `Tensor` whose dimensions must match
+ `predictions`. Will be cast to `bool`.
predictions: A floating point `Tensor` of arbitrary shape and whose values
are in the range `[0, 1]`.
specificity: A scalar value in range `[0, 1]`.
@@ -2515,7 +2527,8 @@ def specificity_at_sensitivity(
following: https://en.wikipedia.org/wiki/Sensitivity_and_specificity
Args:
- labels: A `bool` `Tensor` whose shape matches `predictions`.
+ labels: The ground truth values, a `Tensor` whose dimensions must match
+ `predictions`. Will be cast to `bool`.
predictions: A floating point `Tensor` of arbitrary shape and whose values
are in the range `[0, 1]`.
sensitivity: A scalar value in range `[0, 1]`.
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index 9ad2bf998b..344a592106 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -1663,13 +1663,13 @@ def sparse_softmax_cross_entropy_with_logits(_sentinel=None, # pylint: disable=
Args:
_sentinel: Used to prevent positional parameters. Internal, do not use.
- labels: `Tensor` of shape `[d_0, d_1, ..., d_{r-2}]` and dtype `int32` or
- `int64`. Each entry in `labels` must be an index in `[0, num_classes)`.
- Other values will raise an exception when this op is run on CPU, and
- return `NaN` for corresponding corresponding loss and gradient rows
- on GPU.
- logits: Unscaled log probabilities of rank `r` and shape
- `[d_0, d_1, ..., d_{r-2}, num_classes]` and dtype `float32` or `float64`.
+ labels: `Tensor` of shape `[d_0, d_1, ..., d_{r-1}]` (where `r` is rank of
+ `labels` and result) and dtype `int32` or `int64`. Each entry in `labels`
+ must be an index in `[0, num_classes)`. Other values will raise an
+ exception when this op is run on CPU, and return `NaN` for corresponding
+ loss and gradient rows on GPU.
+ logits: Unscaled log probabilities of shape
+ `[d_0, d_1, ..., d_{r-1}, num_classes]` and dtype `float32` or `float64`.
name: A name for the operation (optional).
Returns:
diff --git a/tensorflow/python/tools/freeze_graph.py b/tensorflow/python/tools/freeze_graph.py
index bdd59eeb6b..0c266770ab 100644
--- a/tensorflow/python/tools/freeze_graph.py
+++ b/tensorflow/python/tools/freeze_graph.py
@@ -44,6 +44,7 @@ from google.protobuf import text_format
from tensorflow.core.framework import graph_pb2
from tensorflow.core.protobuf import saver_pb2
+from tensorflow.python import pywrap_tensorflow
from tensorflow.python.client import session
from tensorflow.python.framework import graph_util
from tensorflow.python.framework import importer
@@ -67,6 +68,8 @@ def freeze_graph(input_graph,
variable_names_blacklist=""):
"""Converts all variables in a graph and checkpoint into constants."""
+ del restore_op_name, filename_tensor_name # Unused by updated loading code.
+
if not gfile.Exists(input_graph):
print("Input graph file '" + input_graph + "' does not exist!")
return -1
@@ -96,6 +99,7 @@ def freeze_graph(input_graph,
if clear_devices:
for node in input_graph_def.node:
node.device = ""
+
_ = importer.import_graph_def(input_graph_def, name="")
with session.Session() as sess:
@@ -109,7 +113,19 @@ def freeze_graph(input_graph,
saver = saver_lib.Saver(saver_def=saver_def)
saver.restore(sess, input_checkpoint)
else:
- sess.run([restore_op_name], {filename_tensor_name: input_checkpoint})
+ var_list = {}
+ reader = pywrap_tensorflow.NewCheckpointReader(input_checkpoint)
+ var_to_shape_map = reader.get_variable_to_shape_map()
+ for key in var_to_shape_map:
+ try:
+ tensor = sess.graph.get_tensor_by_name(key + ":0")
+ except KeyError:
+ # This tensor doesn't exist in the graph (for example it's
+ # 'global_step' or a similar housekeeping element) so skip it.
+ continue
+ var_list[key] = tensor
+ saver = saver_lib.Saver(var_list=var_list)
+ saver.restore(sess, input_checkpoint)
if initializer_nodes:
sess.run(initializer_nodes)
diff --git a/tensorflow/python/training/basic_session_run_hooks.py b/tensorflow/python/training/basic_session_run_hooks.py
index b7dde8aa69..cda4fedec7 100644
--- a/tensorflow/python/training/basic_session_run_hooks.py
+++ b/tensorflow/python/training/basic_session_run_hooks.py
@@ -122,7 +122,8 @@ class LoggingTensorHook(session_run_hook.SessionRunHook):
The tensors will be printed to the log, with `INFO` severity.
"""
- def __init__(self, tensors, every_n_iter=None, every_n_secs=None):
+ def __init__(self, tensors, every_n_iter=None, every_n_secs=None,
+ formatter=None):
"""Initializes a LoggingHook monitor.
Args:
@@ -133,6 +134,8 @@ class LoggingTensorHook(session_run_hook.SessionRunHook):
every_n_secs: `int` or `float`, print the values of `tensors` once every N
seconds. Exactly one of `every_n_iter` and `every_n_secs` should be
provided.
+ formatter: function, takes dict of `tag`->`Tensor` and returns a string.
+ If `None` uses default printing all tensors.
Raises:
ValueError: if `every_n_iter` is non-positive.
@@ -143,8 +146,12 @@ class LoggingTensorHook(session_run_hook.SessionRunHook):
if every_n_iter is not None and every_n_iter <= 0:
raise ValueError("invalid every_n_iter=%s." % every_n_iter)
if not isinstance(tensors, dict):
+ self._tag_order = tensors
tensors = {item: item for item in tensors}
+ else:
+ self._tag_order = tensors.keys()
self._tensors = tensors
+ self._formatter = formatter
self._timer = SecondOrStepTimer(every_secs=every_n_secs,
every_steps=every_n_iter)
@@ -164,11 +171,17 @@ class LoggingTensorHook(session_run_hook.SessionRunHook):
def after_run(self, run_context, run_values):
_ = run_context
if self._should_trigger:
- stats = []
- for tag in self._current_tensors.keys():
- stats.append("%s = %s" % (tag, run_values.results[tag]))
- logging.info("%s", ", ".join(stats))
- self._timer.update_last_triggered_step(self._iter_count)
+ original = np.get_printoptions()
+ np.set_printoptions(suppress=True)
+ elapsed_secs, _ = self._timer.update_last_triggered_step(self._iter_count)
+ if self._formatter:
+ logging.info(self._formatter(run_values.results))
+ else:
+ stats = []
+ for tag in self._tag_order:
+ stats.append("%s = %s" % (tag, run_values.results[tag]))
+ logging.info("%s (%.3f sec)", ", ".join(stats), elapsed_secs)
+ np.set_printoptions(**original)
self._iter_count += 1
@@ -647,6 +660,22 @@ class FinalOpsHook(session_run_hook.SessionRunHook):
feed_dict=self._final_ops_feed_dict)
+class FeedFnHook(session_run_hook.SessionRunHook):
+ """Runs `feed_fn` and sets the `feed_dict` accordingly."""
+
+ def __init__(self, feed_fn):
+ """Constructs the FeedFnHook with given `feed_fn`.
+
+ Args:
+ feed_fn: function, no arguments and returns `dict` to feed.
+ """
+ self.feed_fn = feed_fn
+
+ def before_run(self, run_context): # pylint: disable=unused-argument
+ return session_run_hook.SessionRunArgs(
+ fetches=None, feed_dict=self.feed_fn())
+
+
def _as_graph_element(obj):
"""Retrieves Graph element."""
graph = ops.get_default_graph()
diff --git a/tensorflow/python/training/basic_session_run_hooks_test.py b/tensorflow/python/training/basic_session_run_hooks_test.py
index babc651e6c..6c2945396f 100644
--- a/tensorflow/python/training/basic_session_run_hooks_test.py
+++ b/tensorflow/python/training/basic_session_run_hooks_test.py
@@ -251,6 +251,19 @@ class LoggingTensorHookTest(test.TestCase):
mon_sess.run(train_op)
self.assertRegexpMatches(str(self.logged_message), t.name)
+ def test_print_formatter(self):
+ with ops.Graph().as_default(), session_lib.Session() as sess:
+ t = constant_op.constant(42.0, name='foo')
+ train_op = constant_op.constant(3)
+ hook = basic_session_run_hooks.LoggingTensorHook(
+ tensors=[t.name], every_n_iter=10,
+ formatter=lambda items: 'qqq=%s' % items[t.name])
+ hook.begin()
+ mon_sess = monitored_session._HookedSession(sess, [hook])
+ sess.run(variables_lib.global_variables_initializer())
+ mon_sess.run(train_op)
+ self.assertEqual(self.logged_message[0], 'qqq=42.0')
+
class CheckpointSaverHookTest(test.TestCase):
@@ -820,5 +833,18 @@ class FinalOpsHookTest(test.TestCase):
hook.final_ops_values.tolist())
+class FeedFnHookTest(test.TestCase):
+
+ def test_feeding_placeholder(self):
+ with ops.Graph().as_default(), session_lib.Session() as sess:
+ x = array_ops.placeholder(dtype=dtypes.float32)
+ y = x + 1
+ hook = basic_session_run_hooks.FeedFnHook(
+ feed_fn=lambda: {x: 1.0})
+ hook.begin()
+ mon_sess = monitored_session._HookedSession(sess, [hook])
+ self.assertEqual(mon_sess.run(y), 2)
+
+
if __name__ == '__main__':
test.main()
diff --git a/tensorflow/python/training/monitored_session.py b/tensorflow/python/training/monitored_session.py
index 30b9ccf922..26e52464cb 100644
--- a/tensorflow/python/training/monitored_session.py
+++ b/tensorflow/python/training/monitored_session.py
@@ -248,6 +248,7 @@ def MonitoredTrainingSession(master='', # pylint: disable=invalid-name
chief_only_hooks=None,
save_checkpoint_secs=600,
save_summaries_steps=100,
+ save_summaries_secs=None,
config=None):
"""Creates a `MonitoredSession` for training.
@@ -273,8 +274,12 @@ def MonitoredTrainingSession(master='', # pylint: disable=invalid-name
using a default checkpoint saver. If `save_checkpoint_secs` is set to
`None`, then the default checkpoint saver isn't used.
save_summaries_steps: The frequency, in number of global steps, that the
- summaries are written to disk using a default summary saver. If
- `save_summaries_steps` is set to `None`, then the default summary saver
+ summaries are written to disk using a default summary saver. If both
+ `save_summaries_steps` and `save_summaries_secs` are set to `None`, then
+ the default summary saver isn't used.
+ save_summaries_secs: The frequency, in secs, that the summaries are written
+ to disk using a default summary saver. If both `save_summaries_steps` and
+ `save_summaries_secs` are set to `None`, then the default summary saver
isn't used.
config: an instance of `tf.ConfigProto` proto used to configure the session.
It's the `config` argument of constructor of `tf.Session`.
@@ -301,10 +306,12 @@ def MonitoredTrainingSession(master='', # pylint: disable=invalid-name
all_hooks.append(
basic_session_run_hooks.StepCounterHook(output_dir=checkpoint_dir))
- if save_summaries_steps and save_summaries_steps > 0:
+ if (save_summaries_steps and save_summaries_steps > 0) or (
+ save_summaries_secs and save_summaries_secs > 0):
all_hooks.append(basic_session_run_hooks.SummarySaverHook(
scaffold=scaffold,
save_steps=save_summaries_steps,
+ save_secs=save_summaries_secs,
output_dir=checkpoint_dir))
if save_checkpoint_secs and save_checkpoint_secs > 0:
all_hooks.append(basic_session_run_hooks.CheckpointSaverHook(
diff --git a/tensorflow/python/training/monitored_session_test.py b/tensorflow/python/training/monitored_session_test.py
index 3b16073166..444ee68cb8 100644
--- a/tensorflow/python/training/monitored_session_test.py
+++ b/tensorflow/python/training/monitored_session_test.py
@@ -215,15 +215,37 @@ class MonitoredTrainingSessionTest(test.TestCase):
is_chief=True, checkpoint_dir=logdir) as session:
self.assertEqual(2, session.run(gstep))
- def test_summaries(self):
- logdir = _test_dir(self.get_temp_dir(), 'test_summaries')
+ def test_summaries_steps(self):
+ logdir = _test_dir(self.get_temp_dir(), 'test_summaries_steps')
with ops.Graph().as_default():
gstep = variables_lib.get_or_create_global_step()
new_gstep = state_ops.assign_add(gstep, 1)
summary.scalar('my_summary_tag', new_gstep * 2)
with monitored_session.MonitoredTrainingSession(
- is_chief=True, checkpoint_dir=logdir) as session:
- for _ in range(101): # 100 is default summary writing steps
+ is_chief=True,
+ checkpoint_dir=logdir,
+ save_summaries_steps=100) as session:
+ for _ in range(101):
+ session.run(new_gstep)
+ summaries = util_test.latest_summaries(logdir)
+ tags = [s.summary.value[0].tag for s in summaries]
+ self.assertIn('my_summary_tag', tags)
+ self.assertIn('global_step/sec', tags)
+
+ def test_summaries_secs(self):
+ logdir = _test_dir(self.get_temp_dir(), 'test_summaries_secs')
+ with ops.Graph().as_default():
+ gstep = variables_lib.get_or_create_global_step()
+ new_gstep = state_ops.assign_add(gstep, 1)
+ summary.scalar('my_summary_tag', new_gstep * 2)
+ with monitored_session.MonitoredTrainingSession(
+ is_chief=True,
+ checkpoint_dir=logdir,
+ save_summaries_steps=None,
+ save_summaries_secs=0.1) as session:
+ session.run(new_gstep)
+ time.sleep(0.2)
+ for _ in range(101):
session.run(new_gstep)
summaries = util_test.latest_summaries(logdir)
tags = [s.summary.value[0].tag for s in summaries]
diff --git a/tensorflow/python/training/training.py b/tensorflow/python/training/training.py
index 3a2415629a..9f59d270e4 100644
--- a/tensorflow/python/training/training.py
+++ b/tensorflow/python/training/training.py
@@ -88,8 +88,10 @@ See [Threading and Queues](../../how_tos/threading_and_queues/index.md)
for how to use threads and queues. For documentation on the Queue API,
see [Queues](../../api_docs/python/io_ops.md#queues).
+
@@Coordinator
@@QueueRunner
+@@LooperThread
@@add_queue_runner
@@start_queue_runners
@@ -119,14 +121,15 @@ overview of summaries, event files, and visualization in TensorBoard.
@@summary_iterator
-## Training Utilities
+## Training Hooks
+
+Hooks are tools that run in the process of training/evaluation of the model.
-@@global_step
-@@basic_train_loop
-@@get_global_step
-@@assert_global_step
-@@write_graph
@@SessionRunHook
+@@SessionRunArgs
+@@SessionRunContext
+@@SessionRunValues
+
@@LoggingTensorHook
@@StopAtStepHook
@@CheckpointSaverHook
@@ -136,10 +139,16 @@ overview of summaries, event files, and visualization in TensorBoard.
@@NanTensorHook
@@SummarySaverHook
@@GlobalStepWaiterHook
-@@SessionRunArgs
-@@SessionRunContext
-@@SessionRunValues
-@@LooperThread
+@@FinalOpsHook
+@@FeedFnHook
+
+## Training Utilities
+
+@@global_step
+@@basic_train_loop
+@@get_global_step
+@@assert_global_step
+@@write_graph
"""
# pylint: enable=line-too-long
@@ -190,6 +199,8 @@ from tensorflow.python.training.basic_session_run_hooks import NanLossDuringTrai
from tensorflow.python.training.basic_session_run_hooks import NanTensorHook
from tensorflow.python.training.basic_session_run_hooks import SummarySaverHook
from tensorflow.python.training.basic_session_run_hooks import GlobalStepWaiterHook
+from tensorflow.python.training.basic_session_run_hooks import FinalOpsHook
+from tensorflow.python.training.basic_session_run_hooks import FeedFnHook
from tensorflow.python.training.basic_loops import basic_train_loop
from tensorflow.python.training.device_setter import replica_device_setter
from tensorflow.python.training.monitored_session import Scaffold
diff --git a/tensorflow/stream_executor/cuda/cuda_driver.cc b/tensorflow/stream_executor/cuda/cuda_driver.cc
index ac0f15b687..93c312ecfc 100644
--- a/tensorflow/stream_executor/cuda/cuda_driver.cc
+++ b/tensorflow/stream_executor/cuda/cuda_driver.cc
@@ -1212,57 +1212,56 @@ CUDADriver::ContextGetSharedMemConfig(CudaContext* context) {
return false;
}
-/* static */ bool CUDADriver::SynchronousMemcpyD2H(CudaContext* context,
- void *host_dst,
- CUdeviceptr gpu_src,
- uint64 size) {
+/* static */ port::Status CUDADriver::SynchronousMemcpyD2H(CudaContext *context,
+ void *host_dst,
+ CUdeviceptr gpu_src,
+ uint64 size) {
ScopedActivateContext activation{context};
CUresult res = dynload::cuMemcpyDtoH_v2(host_dst, gpu_src, size);
if (res != CUDA_SUCCESS) {
- LOG(ERROR) << port::Printf(
- "failed to synchronous memcpy from device to host: %s; "
- "host dst: %p; GPU src: %p; size: %llu=0x%llx",
- ToString(res).c_str(), host_dst, port::bit_cast<void *>(gpu_src), size, size);
- return false;
+ return port::InternalError(
+ port::Printf("failed to synchronous memcpy from device to host: %s; "
+ "host dst: %p; GPU src: %p; size: %llu=0x%llx",
+ ToString(res).c_str(), host_dst,
+ port::bit_cast<void *>(gpu_src), size, size));
}
VLOG(2) << "successfully sync memcpy'd d2h of " << size << " bytes to "
<< host_dst;
- return true;
+ return port::Status::OK();
}
-/* static */ bool CUDADriver::SynchronousMemcpyH2D(CudaContext* context,
- CUdeviceptr gpu_dst,
- const void *host_src,
- uint64 size) {
+/* static */ port::Status CUDADriver::SynchronousMemcpyH2D(CudaContext *context,
+ CUdeviceptr gpu_dst,
+ const void *host_src,
+ uint64 size) {
ScopedActivateContext activation{context};
CUresult res = dynload::cuMemcpyHtoD_v2(gpu_dst, host_src, size);
if (res != CUDA_SUCCESS) {
- LOG(ERROR) << port::Printf(
+ return port::InternalError(port::Printf(
"failed to synchronous memcpy from host to device: %s; GPU dst: %p;"
" host src: %p; size: %llu=0x%llx",
- ToString(res).c_str(), port::bit_cast<void *>(gpu_dst), host_src, size, size);
- return false;
+ ToString(res).c_str(), port::bit_cast<void *>(gpu_dst), host_src, size,
+ size));
}
VLOG(2) << "successfully enqueued sync memcpy h2d of " << size << " bytes";
- return true;
+ return port::Status::OK();
}
-/* static */ bool CUDADriver::SynchronousMemcpyD2D(CudaContext* context,
- CUdeviceptr gpu_dst,
- CUdeviceptr gpu_src,
- uint64 size) {
+/* static */ port::Status CUDADriver::SynchronousMemcpyD2D(CudaContext *context,
+ CUdeviceptr gpu_dst,
+ CUdeviceptr gpu_src,
+ uint64 size) {
ScopedActivateContext activation{context};
CUresult res = dynload::cuMemcpyDtoD_v2(gpu_dst, gpu_src, size);
if (res != CUDA_SUCCESS) {
- LOG(ERROR) << port::Printf(
+ return port::InternalError(port::Printf(
"failed to synchronous memcpy from host to device: %s; GPU dst: %p; "
"GPU src: %p; size: %llu=0x%llx",
ToString(res).c_str(), port::bit_cast<void *>(gpu_dst),
- port::bit_cast<void *>(gpu_src), size, size);
- return false;
+ port::bit_cast<void *>(gpu_src), size, size));
}
VLOG(2) << "successfully sync memcpy'd d2d of " << size << " bytes";
- return true;
+ return port::Status::OK();
}
/* static */ bool CUDADriver::AsynchronousMemcpyD2H(CudaContext* context,
diff --git a/tensorflow/stream_executor/cuda/cuda_driver.h b/tensorflow/stream_executor/cuda/cuda_driver.h
index ab118e5d40..c5d7d8b32f 100644
--- a/tensorflow/stream_executor/cuda/cuda_driver.h
+++ b/tensorflow/stream_executor/cuda/cuda_driver.h
@@ -251,12 +251,14 @@ class CUDADriver {
// -- Synchronous memcopies.
// http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1g4d32266788c440b0220b1a9ba5795169
- static bool SynchronousMemcpyD2H(CudaContext* context, void *host_dst,
- CUdeviceptr gpu_src, uint64 size);
- static bool SynchronousMemcpyH2D(CudaContext* context, CUdeviceptr gpu_dst,
- const void *host_src, uint64 size);
- static bool SynchronousMemcpyD2D(CudaContext* context, CUdeviceptr gpu_dst,
- CUdeviceptr gpu_src, uint64 size);
+ static port::Status SynchronousMemcpyD2H(CudaContext* context, void* host_dst,
+ CUdeviceptr gpu_src, uint64 size);
+ static port::Status SynchronousMemcpyH2D(CudaContext* context,
+ CUdeviceptr gpu_dst,
+ const void* host_src, uint64 size);
+ static port::Status SynchronousMemcpyD2D(CudaContext* context,
+ CUdeviceptr gpu_dst,
+ CUdeviceptr gpu_src, uint64 size);
// -- Asynchronous memcopies.
// http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1g56f30236c7c5247f8e061b59d3268362
diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
index b2da109bf0..ae1bf991a1 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@@ -508,20 +508,21 @@ bool CUDAExecutor::SynchronousMemSet(DeviceMemoryBase *location, int value,
value, size);
}
-bool CUDAExecutor::SynchronousMemcpy(DeviceMemoryBase *gpu_dst,
- const void *host_src, uint64 size) {
+port::Status CUDAExecutor::SynchronousMemcpy(DeviceMemoryBase *gpu_dst,
+ const void *host_src,
+ uint64 size) {
return CUDADriver::SynchronousMemcpyH2D(context_, AsCudaDevicePtr(gpu_dst),
host_src, size);
}
-bool CUDAExecutor::SynchronousMemcpy(void *host_dst,
- const DeviceMemoryBase &gpu_src,
- uint64 size) {
+port::Status CUDAExecutor::SynchronousMemcpy(void *host_dst,
+ const DeviceMemoryBase &gpu_src,
+ uint64 size) {
return CUDADriver::SynchronousMemcpyD2H(context_, host_dst,
AsCudaDevicePtr(gpu_src), size);
}
-bool CUDAExecutor::SynchronousMemcpyDeviceToDevice(
+port::Status CUDAExecutor::SynchronousMemcpyDeviceToDevice(
DeviceMemoryBase *gpu_dst, const DeviceMemoryBase &gpu_src, uint64 size) {
return CUDADriver::SynchronousMemcpyD2D(context_, AsCudaDevicePtr(gpu_dst),
AsCudaDevicePtr(gpu_src), size);
diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
index 3959d04439..a9917cc89f 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
@@ -108,15 +108,16 @@ class CUDAExecutor : public internal::StreamExecutorInterface {
bool SynchronousMemSet(DeviceMemoryBase *location, int value,
uint64 size) override;
- bool SynchronousMemcpy(DeviceMemoryBase *gpu_dst, const void *host_src,
- uint64 size) override;
+ port::Status SynchronousMemcpy(DeviceMemoryBase *gpu_dst,
+ const void *host_src, uint64 size) override;
- bool SynchronousMemcpy(void *host_dst, const DeviceMemoryBase &gpu_src,
- uint64 size) override;
+ port::Status SynchronousMemcpy(void *host_dst,
+ const DeviceMemoryBase &gpu_src,
+ uint64 size) override;
- bool SynchronousMemcpyDeviceToDevice(DeviceMemoryBase *gpu_dst,
- const DeviceMemoryBase &gpu_src,
- uint64 size) override;
+ port::Status SynchronousMemcpyDeviceToDevice(DeviceMemoryBase *gpu_dst,
+ const DeviceMemoryBase &gpu_src,
+ uint64 size) override;
bool MemZero(Stream *stream, DeviceMemoryBase *location,
uint64 size) override;
diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h
index d83d3042d5..5db86cefc3 100644
--- a/tensorflow/stream_executor/dnn.h
+++ b/tensorflow/stream_executor/dnn.h
@@ -38,6 +38,7 @@ limitations under the License.
namespace perftools {
namespace gputools {
+class HostBuffer;
class Stream;
class ScratchAllocator;
@@ -125,6 +126,15 @@ enum class RnnDirectionMode {
kRnnBidirectional = 1,
};
+// Relevant to DepthToSpace and SpaceToDepth. This is the write layout when
+// performing depth to space and the read layout when performing space to depth.
+// It's specified with most-major dimension first and most-minor dimension last.
+// In DepthToSpace, the D*M² values are read in and then, for DepthHeightWidth,
+// written out to the output patch, by varying first width, then height, then
+// depth. In C array format, it looks like [depth][height][width]. See
+// DepthToSpace comment for more information.
+enum class DepthToSpaceLayout { DepthHeightWidth };
+
// Specifies the descriptor for a RNN model.
//
// An example use case:
@@ -530,6 +540,13 @@ enum class PoolingMode : int64 {
kAverage,
};
+// Specify the dimension in which to concatenate inputs in space.
+// Specify int64 so there's no padding in SpaceConcatenateMode.
+enum class SpaceConcatenateMode : int64 {
+ XDirection,
+ YDirection,
+};
+
// Returns a short name for the pooling mode, e.g. "Avg".
string ShortPoolingModeString(PoolingMode mode);
@@ -1319,6 +1336,129 @@ class DnnSupport {
port::ArraySlice<const DeviceMemory<float>*> input_data,
DeviceMemory<float>* output_data) = 0;
+ // Concatenates several layers into one, by concatenating each in the
+ // x-dimension or y-dimension, based on a user-specified flag.
+ // For x-concatenation, layers are aligned at matching y and depth
+ // coordinates, and for y-concatenation, they are aligned at matching x and
+ // depth coordinates. The inputs must all have the same depth and batch size.
+ // For x-concatenation, the inputs must have the same height (y-size), and the
+ // output will have the same depth and height as the inputs and its width (x-
+ // size) will be the sum of the input widths. For y-concatenation, the inputs
+ // must have the same width, and the output will have the same depth and width
+ // as the inputs, and its height will be the sum of the input heights.
+ //
+ // Arguments:
+ // stream: borrowed pointer to the stream that the 'space concatenate'
+ // operation should be enqueued onto.
+ // input_dimensions: the dimensions of each input.
+ // input_data: un-owned device memory region which contains the input data
+ // for each input layer.
+ // output_data: un-owned device memory region in which to place the space
+ // concatenate result.
+ // concat_direction: either dnn:SpaceConcatenateMode::XDirection or
+ // dnn::SpaceConcatenateMode::YDirection.
+ virtual bool DoSpaceConcatenate(
+ Stream* stream, port::ArraySlice<dnn::BatchDescriptor> input_dimensions,
+ port::ArraySlice<const DeviceMemory<float>*> input_data,
+ DeviceMemory<float>* output_data,
+ dnn::SpaceConcatenateMode concat_direction) {
+ return false;
+ }
+
+ // Change the layout of the data by shrinking one dimension (or set of
+ // dimensions) and growing another dimension (or set of dimensions), while
+ // keeping the total number of data elements constant, and maintaining the
+ // current data ordering.
+ //
+ // Currently, the only supported operation is depth into space by a power of
+ // 2. E.g. (y, x, z) -> (y*2, x*2, z/4)
+ //
+ // Note that Reshape may not be a no-op, depending on the platform and which
+ // dimensions are being changed.
+ //
+ // Example: forgetting about batch for the moment, let's take a tensor that's
+ // 2x1x8 (y by x by z) and reshape to a tensor that's 4x2x2. The memory layout
+ // is row-major order: y,x,z. I.e. z changes the fastest, then x, then y. The
+ // elements of the tensor range from 0 to 15. The x,y,z indices are below each
+ // element.
+ //
+ // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+ // y0 y0 y0 y0 y0 y0 y0 y0 y1 y1 y1 y1 y1 y1 y1 y1
+ // x0 x0 x0 x0 x0 x0 x0 x0 x0 x0 x0 x0 x0 x0 x0 x0
+ // z0 z1 z2 z3 z4 z5 z6 z7 z0 z1 z2 z3 z4 z5 z6 z7
+ //
+ // reshape to 4x2x2
+ //
+ // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+ // y0 y0 y0 y0 y1 y1 y1 y1 y2 y2 y2 y2 y3 y3 y3 y3
+ // x0 x0 x1 x1 x0 x0 x1 x1 x0 x0 x1 x1 x0 x0 x1 x1
+ // z0 z1 z0 z1 z0 z1 z0 z1 z0 z1 z0 z1 z0 z1 z0 z1
+ virtual bool DoReshape(Stream* stream,
+ const dnn::BatchDescriptor& input_dimensions,
+ const DeviceMemory<float>& input_data,
+ const dnn::BatchDescriptor& output_dimensions,
+ DeviceMemory<float>* output_data) {
+ return false;
+ }
+
+ // Depth to space takes an X by Y image with depth D*M² and changes it to an
+ // MX x MY image with depth D. Each input location (x,y) with depth D*M² in
+ // the input image is changed to an MxM contiguous area in the output image,
+ // with the values being laid out in the raster order by DepthToSpaceLayout,
+ // and will have a new depth of D.
+ //
+ // Example.
+ // M=2, Din =8, Xin=2, Yin=2. Xout=4, Yout=4, Dout=2
+ // DepthHeightWidth layout
+ // Values within a 'cell' are at different depths and same x & y.
+ // Input:
+ // abcdefgh ijklmnop
+ // qrstuvwx yz012345
+ // Output:
+ // ae bf im jn
+ // cg dh ko lp
+ // qu rv y2 z3
+ // sw tx 04 15
+ //
+ // sqrt_depth_reduction: 'M' in the comment above
+ virtual bool DoDepthToSpace(Stream* stream,
+ const dnn::BatchDescriptor& input_dimensions,
+ const DeviceMemory<float>& input_data,
+ const DepthToSpaceLayout& depth_to_space_layout,
+ const int& sqrt_depth_reduction,
+ DeviceMemory<float>* output_data) {
+ return false;
+ }
+
+ // Space to depth is the inverse of depth to space. Space to depth takes each
+ // non-overlapping M by M patch (in the X and Y dimensions) with depth D of
+ // the input, and transforms it to a 1 by 1 patch with depth D*M². If the
+ // input has size (MX, MY, D), the output has size (X, Y, D*M²). The number of
+ // data elements is not changed.
+ //
+ // Example.
+ // M=2, Din =2, Xin=4, Yin=4, Dout=8
+ // DepthHeightWidth layout
+ // Values within a 'cell' are at different depths and same x & y.
+ // Input:
+ // ae bf im jn
+ // cg dh ko lp
+ // qu rv y2 z3
+ // sw tx 04 15
+ // Output:
+ // abcdefgh ijklmnop
+ // qrstuvwx yz012345
+ //
+ // sqrt_depth_increase: 'M' in the comment above
+ virtual bool DoSpaceToDepth(Stream* stream,
+ const dnn::BatchDescriptor& input_dimensions,
+ const DeviceMemory<float>& input_data,
+ const DepthToSpaceLayout& space_to_depth_layout,
+ const int& sqrt_depth_increase,
+ DeviceMemory<float>* output_data) {
+ return false;
+ }
+
// Computes the specified operation (e.g. addition or multiplication)
// between corresponding elements in the inputs and stores the result in the
// output element.
@@ -1342,6 +1482,37 @@ class DnnSupport {
const dnn::BatchDescriptor& output_dimensions,
DeviceMemory<float>* output_data) = 0;
+ // Computes the specified operation (e.g. addition or multiplication)
+ // between corresponding elements in the inputs and stores the result in the
+ // output element. Each input is multiplied by a scalar constant and the
+ // result is divided by a scalar constant.
+ // e.g. To perform Z = 0.9*X + 1.1*Y, set the input multiplicands to 9 and 11
+ // and the output divisor to 10.
+ // The inputs and output must all have the same dimensions, but may have
+ // different quantization parameters (min_value and max_value).
+ //
+ // Arguments (all borrowed):
+ // stream: borrowed pointer to the stream that the 'elementwise operation'
+ // should be enqueued onto.
+ // operation: The operation to perform.
+ // input_multiplicands: Amount to scale each input.
+ // output_divisor: Amount to divide the output.
+ // input_dimensions: The dimensions of each input.
+ // input_data: un-owned device memory region which contains the
+ // input data for each input layer.
+ // output_dimensions: The dimensions of the output.
+ // output_data: un-owned device memory region in which to place the
+ // operation result.
+ virtual bool DoElementwiseOperateScaledQuantized(
+ Stream* stream, ElementwiseOperation operation,
+ port::ArraySlice<int> input_multiplicands, int output_divisor,
+ port::ArraySlice<dnn::BatchDescriptor> input_dimensions,
+ port::ArraySlice<const DeviceMemory<float>*> input_data,
+ const dnn::BatchDescriptor& output_dimensions,
+ DeviceMemory<float>* output_data) {
+ return false;
+ }
+
// Pads the input with zeros in the X and Y dimensions. The feature_map
// dimension is unchanged.
//
@@ -1382,6 +1553,43 @@ class DnnSupport {
int64 left_trim, int64 right_trim, int64 top_trim,
int64 bottom_trim, DeviceMemory<float> *output_data) = 0;
+ // Grows the input tensor by replicating the X and Y dimensions. The batch and
+ // depth/feature_map dimensions are unchanged. Currently, the input tensor is
+ // limited to X=1 and Y=1.
+ //
+ // For example, the input has dimensions x=2, y=3, and replicate_x=3,
+ // replicate_y=2. The diagonal elements of the output would be: [x0y0, x1y1,
+ // x0y2, x1y0, x0y1, x1y2].
+ // Here is the example as a picture. input:
+ // AB
+ // CD
+ // EF
+ // broadcast result:
+ // ABABAB
+ // CDCDCD
+ // EFEFEF
+ // ABABAB
+ // CDCDCD
+ // EFEFEF
+ //
+ // Arguments (all borrowed):
+ // stream: borrowed pointer to the stream that the 'elementwise operation'
+ // should be enqueued onto.
+ // dimensions: The dimensions of the input.
+ // input_data: un-owned device memory region which contains the
+ // input data for the input layer.
+ // replicate_x: Amount to replicate the input's X dimension.
+ // replicate_y: Amount to replicate the input's Y dimension.
+ // output_data: un-owned device memory region in which to place the
+ // padded result.
+ virtual bool DoXYBroadcast(Stream* stream,
+ const dnn::BatchDescriptor& dimensions,
+ const DeviceMemory<float>& input_data,
+ int64 replicate_x, int64 replicate_y,
+ DeviceMemory<float>* output_data) {
+ return false;
+ }
+
// Enqueues an asynchronous memcpy of the *quantized* output of a layer (that
// is, bytes instead of scaled floats) into 'host_dst' if they are available
// for the underlying DNN implementation. If this quantized output is not
@@ -1425,6 +1633,21 @@ class DnnSupport {
QuantizedActivationMode mode,
DeviceMemory<float>* gpu_unquantized_dst) = 0;
+ // Enqueues an asynchronous copy of the contents of buffer_src to
+ // gpu_unquantized_dst.
+ virtual bool DoCopyHostBuffer2Device(
+ Stream* stream, HostBuffer* buffer_src,
+ DeviceMemory<float>* gpu_unquantized_dst) {
+ return false;
+ }
+
+ // Enqueues an asynchronous copy of the contents of gpu_unquantized_src to
+ // buffer_dst.
+ virtual bool DoCopyDevice2HostBuffer(
+ Stream* stream, const DeviceMemory<float>& gpu_unquantized_src,
+ HostBuffer* buffer_dst) {
+ return false;
+ }
// Create an RNN descriptor based on model shapes and configurations.
// The caller retains the ownership of the descriptor.
diff --git a/tensorflow/stream_executor/host/host_gpu_executor.cc b/tensorflow/stream_executor/host/host_gpu_executor.cc
index ff07432bb7..830bc9a681 100644
--- a/tensorflow/stream_executor/host/host_gpu_executor.cc
+++ b/tensorflow/stream_executor/host/host_gpu_executor.cc
@@ -129,23 +129,24 @@ bool HostExecutor::Memset32(Stream *stream, DeviceMemoryBase *location,
return true;
}
-bool HostExecutor::SynchronousMemcpy(DeviceMemoryBase *gpu_dst,
- const void *host_src, uint64 size) {
+port::Status HostExecutor::SynchronousMemcpy(DeviceMemoryBase *gpu_dst,
+ const void *host_src,
+ uint64 size) {
memcpy(gpu_dst->opaque(), host_src, size);
- return true;
+ return port::Status::OK();
}
-bool HostExecutor::SynchronousMemcpy(void *host_dst,
- const DeviceMemoryBase &gpu_src,
- uint64 size) {
+port::Status HostExecutor::SynchronousMemcpy(void *host_dst,
+ const DeviceMemoryBase &gpu_src,
+ uint64 size) {
memcpy(host_dst, gpu_src.opaque(), size);
- return true;
+ return port::Status::OK();
}
-bool HostExecutor::SynchronousMemcpyDeviceToDevice(
+port::Status HostExecutor::SynchronousMemcpyDeviceToDevice(
DeviceMemoryBase *gpu_dst, const DeviceMemoryBase &gpu_src, uint64 size) {
memcpy(gpu_dst->opaque(), gpu_src.opaque(), size);
- return true;
+ return port::Status::OK();
}
bool HostExecutor::HostCallback(Stream *stream,
diff --git a/tensorflow/stream_executor/host/host_gpu_executor.h b/tensorflow/stream_executor/host/host_gpu_executor.h
index f217f7947f..77b07e4a57 100644
--- a/tensorflow/stream_executor/host/host_gpu_executor.h
+++ b/tensorflow/stream_executor/host/host_gpu_executor.h
@@ -95,13 +95,14 @@ class HostExecutor : public internal::StreamExecutorInterface {
bool SynchronousMemSet(DeviceMemoryBase *location, int value,
uint64 size) override;
- bool SynchronousMemcpy(DeviceMemoryBase *gpu_dst, const void *host_src,
- uint64 size) override;
- bool SynchronousMemcpy(void *host_dst, const DeviceMemoryBase &gpu_src,
- uint64 size) override;
- bool SynchronousMemcpyDeviceToDevice(DeviceMemoryBase *gpu_dst,
- const DeviceMemoryBase &gpu_src,
- uint64 size) override;
+ port::Status SynchronousMemcpy(DeviceMemoryBase *gpu_dst,
+ const void *host_src, uint64 size) override;
+ port::Status SynchronousMemcpy(void *host_dst,
+ const DeviceMemoryBase &gpu_src,
+ uint64 size) override;
+ port::Status SynchronousMemcpyDeviceToDevice(DeviceMemoryBase *gpu_dst,
+ const DeviceMemoryBase &gpu_src,
+ uint64 size) override;
bool HostCallback(Stream *stream, std::function<void()> callback) override;
diff --git a/tensorflow/stream_executor/host_buffer.h b/tensorflow/stream_executor/host_buffer.h
new file mode 100644
index 0000000000..8fa542e9ff
--- /dev/null
+++ b/tensorflow/stream_executor/host_buffer.h
@@ -0,0 +1,48 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_STREAM_EXECUTOR_HOST_BUFFER_H_
+#define TENSORFLOW_STREAM_EXECUTOR_HOST_BUFFER_H_
+
+#include "tensorflow/stream_executor/dnn.h"
+
+namespace perftools {
+namespace gputools {
+
+// A HostBuffer is a block of memory in host memory containing the data for a
+// dnn::BatchDescriptor using a device-dependent memory layout.
+// Derived classes provide methods to construct a HostBuffer for a specific
+// device, and to copy data in and out of the buffer.
+class HostBuffer {
+ public:
+ const dnn::BatchDescriptor& descriptor() const { return descriptor_; }
+
+ // Returns a string describing the HostBuffer.
+ virtual string AsString() const = 0;
+
+ protected:
+ // Construct a HostBuffer from the supplied dnn::BatchDescriptor.
+ explicit HostBuffer(const dnn::BatchDescriptor& descriptor)
+ : descriptor_(descriptor) {}
+ virtual ~HostBuffer() {}
+
+ private:
+ const dnn::BatchDescriptor descriptor_;
+};
+
+} // namespace gputools
+} // namespace perftools
+
+#endif // TENSORFLOW_STREAM_EXECUTOR_HOST_BUFFER_H_
diff --git a/tensorflow/stream_executor/lib/status.h b/tensorflow/stream_executor/lib/status.h
index 493fc656e1..0aec2917dc 100644
--- a/tensorflow/stream_executor/lib/status.h
+++ b/tensorflow/stream_executor/lib/status.h
@@ -20,6 +20,7 @@ limitations under the License.
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/stream_executor/lib/error.h" // IWYU pragma: export
+#include "tensorflow/stream_executor/lib/stringpiece.h"
#include "tensorflow/stream_executor/platform/logging.h"
namespace perftools {
@@ -33,6 +34,17 @@ using Status = tensorflow::Status;
#define SE_ASSERT_OK(val) \
ASSERT_EQ(::perftools::gputools::port::Status::OK(), (val))
+// Define some canonical error helpers.
+inline Status UnimplementedError(StringPiece message) {
+ return Status(error::UNIMPLEMENTED, message);
+}
+inline Status InternalError(StringPiece message) {
+ return Status(error::INTERNAL, message);
+}
+inline Status FailedPreconditionError(StringPiece message) {
+ return Status(error::FAILED_PRECONDITION, message);
+}
+
} // namespace port
} // namespace gputools
} // namespace perftools
diff --git a/tensorflow/stream_executor/stream.cc b/tensorflow/stream_executor/stream.cc
index 512e882cad..980d544b01 100644
--- a/tensorflow/stream_executor/stream.cc
+++ b/tensorflow/stream_executor/stream.cc
@@ -18,6 +18,7 @@ limitations under the License.
#include "tensorflow/stream_executor/platform/port.h"
#include "tensorflow/stream_executor/blas.h"
+#include "tensorflow/stream_executor/host_buffer.h"
#include "tensorflow/stream_executor/lib/stacktrace.h"
#include "tensorflow/stream_executor/lib/strcat.h"
#include "tensorflow/stream_executor/platform.h"
@@ -85,6 +86,8 @@ string ToVlogString(const void *ptr) {
return out.str();
}
+string ToVlogString(const HostBuffer &buffer) { return buffer.AsString(); }
+
template <class T>
string ToVlogString(const std::complex<T> &c) {
// StrCat does not convert std::complex to text.
@@ -149,6 +152,13 @@ string ToVlogString(port::MutableArraySlice<T> elements) {
return ToVlogString(port::ArraySlice<T>(elements));
}
+string ToVlogString(dnn::DepthToSpaceLayout depth_to_space_layout) {
+ switch (depth_to_space_layout) {
+ case dnn::DepthToSpaceLayout::DepthHeightWidth:
+ return "DepthToSpaceLayout::DepthHeightWidth";
+ }
+}
+
// Used together with PARAM to VLOG calls made to the stream. Intended
// to be used like this:
//
@@ -299,10 +309,7 @@ Stream &Stream::ThenBatchNormalizationForward(
saved_inv_var, is_training, std::move(var_to_inv_var),
std::move(inv_var_to_var)));
} else {
- SetError();
- LOG(WARNING)
- << "attempting to perform DNN operation using StreamExecutor "
- "without DNN support";
+ SetErrorAndLogNoDnnSupport();
}
}
return *this;
@@ -324,10 +331,7 @@ Stream &Stream::ThenBatchNormalizationBackward(
this, y_backprop, x, scale, mean, variance, x_desc, scale_offset_desc,
epsilon, x_backprop, scale_backprop, offset_backprop));
} else {
- SetError();
- LOG(WARNING)
- << "attempting to perform DNN operation using StreamExecutor "
- "without DNN support";
+ SetErrorAndLogNoDnnSupport();
}
}
return *this;
@@ -355,10 +359,7 @@ Stream &Stream::ThenConvolveWithScratch(
/*scratch_allocator=*/scratch_allocator, dnn::AlgorithmConfig(),
nullptr));
} else {
- SetError();
- LOG(WARNING)
- << "attempting to perform DNN operation using StreamExecutor "
- "without DNN support";
+ SetErrorAndLogNoDnnSupport();
}
}
return *this;
@@ -385,10 +386,7 @@ Stream &Stream::ThenConvolveWithScratch(
/*scratch_allocator=*/scratch_allocator, dnn::AlgorithmConfig(),
nullptr));
} else {
- SetError();
- LOG(WARNING)
- << "attempting to perform DNN operation using StreamExecutor "
- "without DNN support";
+ SetErrorAndLogNoDnnSupport();
}
}
return *this;
@@ -419,10 +417,7 @@ Stream &Stream::ThenConvolveWithAlgorithm(
SetError();
}
} else {
- SetError();
- LOG(WARNING)
- << "attempting to perform DNN operation using StreamExecutor "
- "without DNN support";
+ SetErrorAndLogNoDnnSupport();
}
}
return *this;
@@ -453,10 +448,7 @@ Stream &Stream::ThenConvolveWithAlgorithm(
SetError();
}
} else {
- SetError();
- LOG(WARNING)
- << "attempting to perform DNN operation using StreamExecutor "
- "without DNN support";
+ SetErrorAndLogNoDnnSupport();
}
}
return *this;
@@ -497,10 +489,7 @@ Stream &Stream::ThenSeparableConvolve(
depth_multiplier, first_weights, second_weights,
convolution_descriptor, output_descriptor, output));
} else {
- SetError();
- LOG(WARNING)
- << "attempting to perform DNN operation using StreamExecutor "
- "without DNN support";
+ SetErrorAndLogNoDnnSupport();
}
}
return *this;
@@ -528,10 +517,7 @@ Stream &Stream::ThenConvolveBackwardDataWithScratch(
backward_input_data, scratch_allocator, dnn::AlgorithmConfig(),
nullptr));
} else {
- SetError();
- LOG(WARNING)
- << "attempting to perform DNN operation using StreamExecutor "
- "without DNN support";
+ SetErrorAndLogNoDnnSupport();
}
}
return *this;
@@ -564,10 +550,7 @@ Stream &Stream::ThenConvolveBackwardDataWithAlgorithm(
SetError();
}
} else {
- SetError();
- LOG(WARNING)
- << "attempting to perform DNN operation using StreamExecutor "
- "without DNN support";
+ SetErrorAndLogNoDnnSupport();
}
}
return *this;
@@ -600,10 +583,7 @@ Stream &Stream::ThenConvolveBackwardDataWithAlgorithm(
SetError();
}
} else {
- SetError();
- LOG(WARNING)
- << "attempting to perform DNN operation using StreamExecutor "
- "without DNN support";
+ SetErrorAndLogNoDnnSupport();
}
}
return *this;
@@ -631,10 +611,7 @@ Stream &Stream::ThenConvolveBackwardDataWithScratch(
backward_input_data, scratch_allocator, dnn::AlgorithmConfig(),
nullptr));
} else {
- SetError();
- LOG(WARNING)
- << "attempting to perform DNN operation using StreamExecutor "
- "without DNN support";
+ SetErrorAndLogNoDnnSupport();
}
}
return *this;
@@ -676,10 +653,7 @@ Stream &Stream::ThenConvolveBackwardFilterWithScratch(
backward_filter_data, scratch_allocator, dnn::AlgorithmConfig(),
nullptr));
} else {
- SetError();
- LOG(WARNING)
- << "attempting to perform DNN operation using StreamExecutor "
- "without DNN support";
+ SetErrorAndLogNoDnnSupport();
}
}
return *this;
@@ -712,10 +686,7 @@ Stream &Stream::ThenConvolveBackwardFilterWithAlgorithm(
SetError();
}
} else {
- SetError();
- LOG(WARNING)
- << "attempting to perform DNN operation using StreamExecutor "
- "without DNN support";
+ SetErrorAndLogNoDnnSupport();
}
}
return *this;
@@ -743,10 +714,7 @@ Stream &Stream::ThenConvolveBackwardFilterWithScratch(
backward_filter_data, scratch_allocator, dnn::AlgorithmConfig(),
nullptr));
} else {
- SetError();
- LOG(WARNING)
- << "attempting to perform DNN operation using StreamExecutor "
- "without DNN support";
+ SetErrorAndLogNoDnnSupport();
}
}
return *this;
@@ -779,10 +747,7 @@ Stream &Stream::ThenConvolveBackwardFilterWithAlgorithm(
SetError();
}
} else {
- SetError();
- LOG(WARNING)
- << "attempting to perform DNN operation using StreamExecutor "
- "without DNN support";
+ SetErrorAndLogNoDnnSupport();
}
}
return *this;
@@ -817,10 +782,7 @@ Stream &Stream::ThenConvolveBackwardBiasImpl(
bias_descriptor,
backward_bias_data));
} else {
- SetError();
- LOG(WARNING)
- << "attempting to perform DNN operation using StreamExecutor "
- "without DNN support";
+ SetErrorAndLogNoDnnSupport();
}
}
return *this;
@@ -866,10 +828,7 @@ Stream &Stream::ThenMatMul(const DeviceMemory<float> &input_data,
CheckError(dnn->DoMatMul(this, input_data, weights, input_dimensions,
output_dimensions, output_data));
} else {
- SetError();
- LOG(WARNING)
- << "attempting to perform DNN operation using StreamExecutor "
- "without DNN support";
+ SetErrorAndLogNoDnnSupport();
}
}
return *this;
@@ -891,10 +850,7 @@ Stream &Stream::ThenMatMulQuantized(
weight_scales, input_dimensions,
output_dimensions, output_data));
} else {
- SetError();
- LOG(WARNING)
- << "attempting to perform DNN operation using StreamExecutor "
- "without DNN support";
+ SetErrorAndLogNoDnnSupport();
}
}
return *this;
@@ -916,10 +872,7 @@ Stream &Stream::ThenMatMulQuantized(
weight_scales, input_dimensions,
output_dimensions, output_data));
} else {
- SetError();
- LOG(WARNING)
- << "attempting to perform DNN operation using StreamExecutor "
- "without DNN support";
+ SetErrorAndLogNoDnnSupport();
}
}
return *this;
@@ -937,10 +890,7 @@ Stream &Stream::ThenBiasAdd(const DeviceMemory<float> &input_data,
CheckError(
dnn->DoBiasAdd(this, input_data, biases, dimensions, output_data));
} else {
- SetError();
- LOG(WARNING)
- << "attempting to perform DNN operation using StreamExecutor "
- "without DNN support";
+ SetErrorAndLogNoDnnSupport();
}
}
return *this;
@@ -961,10 +911,7 @@ Stream &Stream::ThenPoolForward(
input_data, output_dimensions,
output_data));
} else {
- SetError();
- LOG(WARNING)
- << "attempting to perform DNN operation using StreamExecutor "
- "without DNN support";
+ SetErrorAndLogNoDnnSupport();
}
}
return *this;
@@ -985,10 +932,7 @@ Stream &Stream::ThenPoolForward(
input_data, output_dimensions,
output_data));
} else {
- SetError();
- LOG(WARNING)
- << "attempting to perform DNN operation using StreamExecutor "
- "without DNN support";
+ SetErrorAndLogNoDnnSupport();
}
}
return *this;
@@ -1012,10 +956,7 @@ Stream &Stream::ThenPoolBackward(
input_data, output_dimensions, output_data,
input_diff_data, output_diff_data));
} else {
- SetError();
- LOG(WARNING)
- << "attempting to perform DNN operation using StreamExecutor "
- "without DNN support";
+ SetErrorAndLogNoDnnSupport();
}
}
return *this;
@@ -1039,10 +980,7 @@ Stream &Stream::ThenPoolBackward(
input_data, output_dimensions, output_data,
input_diff_data, output_diff_data));
} else {
- SetError();
- LOG(WARNING)
- << "attempting to perform DNN operation using StreamExecutor "
- "without DNN support";
+ SetErrorAndLogNoDnnSupport();
}
}
return *this;
@@ -1058,10 +996,7 @@ Stream &Stream::ThenNormalize(
CheckError(dnn->DoNormalize(this, normalize_descriptor, input_data,
output_data));
} else {
- SetError();
- LOG(WARNING)
- << "attempting to perform DNN operation using StreamExecutor "
- "without DNN support";
+ SetErrorAndLogNoDnnSupport();
}
}
return *this;
@@ -1079,10 +1014,7 @@ Stream &Stream::ThenNormalizeWithDimensions(
CheckError(dnn->DoNormalizeWithDimensions(
this, normalize_descriptor, dimensions, input_data, output_data));
} else {
- SetError();
- LOG(WARNING)
- << "attempting to perform DNN operation using StreamExecutor "
- "without DNN support";
+ SetErrorAndLogNoDnnSupport();
}
}
return *this;
@@ -1104,10 +1036,7 @@ Stream &Stream::ThenNormalizeBackwardWithDimensions(
this, normalize_descriptor, dimensions, raw_data, normalized_data,
normalized_variable_gradient, raw_variable_gradient));
} else {
- SetError();
- LOG(WARNING)
- << "attempting to perform DNN operation using StreamExecutor "
- "without DNN support";
+ SetErrorAndLogNoDnnSupport();
}
}
return *this;
@@ -1125,10 +1054,7 @@ Stream &Stream::ThenActivate(dnn::ActivationMode activation_mode,
CheckError(dnn->DoActivate(this, activation_mode, dimensions, input_data,
output_data));
} else {
- SetError();
- LOG(WARNING)
- << "attempting to perform DNN operation using StreamExecutor "
- "without DNN support";
+ SetErrorAndLogNoDnnSupport();
}
}
return *this;
@@ -1158,10 +1084,114 @@ Stream &Stream::ThenDepthConcatenate(
CheckError(dnn->DoDepthConcatenate(this, input_dimensions, input_data,
output_data));
} else {
+ SetErrorAndLogNoDnnSupport();
+ }
+ }
+ return *this;
+}
+
+Stream &Stream::ThenSpaceConcatenate(
+ port::ArraySlice<dnn::BatchDescriptor> input_dimensions,
+ port::ArraySlice<const DeviceMemory<float> *> input_data,
+ DeviceMemory<float> *output_data,
+ dnn::SpaceConcatenateMode concat_direction) {
+ VLOG_CALL(PARAM(input_dimensions), PARAM(input_data), PARAM(output_data));
+
+ // Check that the input dimensions of all the other batches match those of the
+ // first batch.
+ for (size_t i = 1; i < input_dimensions.size(); ++i) {
+ if ((concat_direction == dnn::SpaceConcatenateMode::XDirection) &&
+ (input_dimensions[i].count() != input_dimensions[0].count() ||
+ input_dimensions[i].height() != input_dimensions[0].height() ||
+ input_dimensions[i].feature_map_count() !=
+ input_dimensions[0].feature_map_count())) {
SetError();
- LOG(WARNING)
- << "attempting to perform DNN operation using StreamExecutor "
- "without DNN support";
+ LOG(ERROR) << "Incompatible dimensions for X concatenation.\n"
+ << "input_dimensions[0]: " << input_dimensions[0].ToString()
+ << "input_dimensions[" << i
+ << "]: " << input_dimensions[i].ToString();
+ return *this;
+ }
+
+ if ((concat_direction == dnn::SpaceConcatenateMode::YDirection) &&
+ (input_dimensions[i].count() != input_dimensions[0].count() ||
+ input_dimensions[i].width() != input_dimensions[0].width() ||
+ input_dimensions[i].feature_map_count() !=
+ input_dimensions[0].feature_map_count())) {
+ SetError();
+ LOG(ERROR) << "Incompatible dimensions for Y concatenation.\n"
+ << "input_dimensions[0]: " << input_dimensions[0].ToString()
+ << "input_dimensions[" << i
+ << "]: " << input_dimensions[i].ToString();
+ return *this;
+ }
+ }
+ if (ok()) {
+ if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+ CheckError(dnn->DoSpaceConcatenate(this, input_dimensions, input_data,
+ output_data, concat_direction));
+ } else {
+ SetErrorAndLogNoDnnSupport();
+ }
+ }
+ return *this;
+}
+
+Stream &Stream::ThenReshape(const dnn::BatchDescriptor &input_dimensions,
+ const DeviceMemory<float> &input_data,
+ const dnn::BatchDescriptor &output_dimensions,
+ DeviceMemory<float> *output_data) {
+ VLOG_CALL(PARAM(input_dimensions), PARAM(input_data),
+ PARAM(output_dimensions), PARAM(output_data));
+
+ if (ok()) {
+ if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+ CheckError(dnn->DoReshape(this, input_dimensions, input_data,
+ output_dimensions, output_data));
+ } else {
+ SetErrorAndLogNoDnnSupport();
+ }
+ }
+ return *this;
+}
+
+Stream &Stream::ThenDepthToSpace(
+ const dnn::BatchDescriptor &input_dimensions,
+ const DeviceMemory<float> &input_data,
+ const dnn::DepthToSpaceLayout &depth_to_space_layout,
+ const int sqrt_depth_reduction, DeviceMemory<float> *output_data) {
+ VLOG_CALL(PARAM(input_dimensions), PARAM(input_data),
+ PARAM(depth_to_space_layout), PARAM(sqrt_depth_reduction),
+ PARAM(output_data));
+
+ if (ok()) {
+ if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+ CheckError(dnn->DoDepthToSpace(this, input_dimensions, input_data,
+ depth_to_space_layout,
+ sqrt_depth_reduction, output_data));
+ } else {
+ SetErrorAndLogNoDnnSupport();
+ }
+ }
+ return *this;
+}
+
+Stream &Stream::ThenSpaceToDepth(
+ const dnn::BatchDescriptor &input_dimensions,
+ const DeviceMemory<float> &input_data,
+ const dnn::DepthToSpaceLayout &space_to_depth_layout,
+ const int sqrt_depth_increase, DeviceMemory<float> *output_data) {
+ VLOG_CALL(PARAM(input_dimensions), PARAM(input_data),
+ PARAM(space_to_depth_layout), PARAM(sqrt_depth_increase),
+ PARAM(output_data));
+
+ if (ok()) {
+ if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+ CheckError(dnn->DoSpaceToDepth(this, input_dimensions, input_data,
+ space_to_depth_layout, sqrt_depth_increase,
+ output_data));
+ } else {
+ SetErrorAndLogNoDnnSupport();
}
}
return *this;
@@ -1182,10 +1212,30 @@ Stream &Stream::ThenElementwiseOperate(
input_data, output_dimensions,
output_data));
} else {
- SetError();
- LOG(WARNING)
- << "attempting to perform DNN operation using StreamExecutor "
- "without DNN support";
+ SetErrorAndLogNoDnnSupport();
+ }
+ }
+ return *this;
+}
+
+Stream &Stream::ThenElementwiseOperateScaledQuantized(
+ dnn::ElementwiseOperation operation,
+ port::ArraySlice<int> input_multiplicands, int output_divisor,
+ port::ArraySlice<dnn::BatchDescriptor> input_dimensions,
+ port::ArraySlice<const DeviceMemory<float> *> input_data,
+ const dnn::BatchDescriptor &output_dimensions,
+ DeviceMemory<float> *output_data) {
+ VLOG_CALL(PARAM(operation), PARAM(input_multiplicands), PARAM(output_divisor),
+ PARAM(input_dimensions), PARAM(input_data),
+ PARAM(output_dimensions), PARAM(output_data));
+
+ if (ok()) {
+ if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+ CheckError(dnn->DoElementwiseOperateScaledQuantized(
+ this, operation, input_multiplicands, output_divisor,
+ input_dimensions, input_data, output_dimensions, output_data));
+ } else {
+ SetErrorAndLogNoDnnSupport();
}
}
return *this;
@@ -1204,10 +1254,7 @@ Stream &Stream::ThenXYPad(const dnn::BatchDescriptor &dimensions,
CheckError(dnn->DoXYPad(this, dimensions, input_data, left_pad, right_pad,
top_pad, bottom_pad, output_data));
} else {
- SetError();
- LOG(WARNING)
- << "attempting to perform DNN operation using StreamExecutor "
- "without DNN support";
+ SetErrorAndLogNoDnnSupport();
}
}
return *this;
@@ -1228,10 +1275,25 @@ Stream &Stream::ThenXYSlice(const dnn::BatchDescriptor &dimensions,
right_trim, top_trim, bottom_trim,
output_data));
} else {
- SetError();
- LOG(WARNING)
- << "attempting to perform DNN operation using StreamExecutor "
- "without DNN support";
+ SetErrorAndLogNoDnnSupport();
+ }
+ }
+ return *this;
+}
+
+Stream &Stream::ThenXYBroadcast(const dnn::BatchDescriptor &dimensions,
+ const DeviceMemory<float> &input_data,
+ int64 replicate_x, int64 replicate_y,
+ DeviceMemory<float> *output_data) {
+ VLOG_CALL(PARAM(dimensions), PARAM(input_data), PARAM(replicate_x),
+ PARAM(replicate_y), PARAM(output_data));
+
+ if (ok()) {
+ if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+ CheckError(dnn->DoXYBroadcast(this, dimensions, input_data, replicate_x,
+ replicate_y, output_data));
+ } else {
+ SetErrorAndLogNoDnnSupport();
}
}
return *this;
@@ -1248,10 +1310,7 @@ Stream &Stream::ThenMemcpyD2HQuantized(
CheckError(dnn->DoMemcpyD2HQuantized(this, gpu_unquantized_src, mode,
host_dst, size));
} else {
- SetError();
- LOG(WARNING)
- << "attempting to perform DNN operation using StreamExecutor "
- "without DNN support";
+ SetErrorAndLogNoDnnSupport();
}
}
return *this;
@@ -1268,10 +1327,37 @@ Stream &Stream::ThenMemcpyH2DQuantized(
CheckError(dnn->DoMemcpyH2DQuantized(this, host_src, size, mode,
gpu_unquantized_dst));
} else {
- SetError();
- LOG(WARNING)
- << "attempting to perform DNN operation using StreamExecutor "
- "without DNN support";
+ SetErrorAndLogNoDnnSupport();
+ }
+ }
+ return *this;
+}
+
+Stream &Stream::ThenCopyHostBuffer2Device(
+ HostBuffer *buffer_src, DeviceMemory<float> *gpu_unquantized_dst) {
+ VLOG_CALL(PARAM(*buffer_src), PARAM(gpu_unquantized_dst));
+
+ if (ok()) {
+ if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+ CheckError(
+ dnn->DoCopyHostBuffer2Device(this, buffer_src, gpu_unquantized_dst));
+ } else {
+ SetErrorAndLogNoDnnSupport();
+ }
+ }
+ return *this;
+}
+
+Stream &Stream::ThenCopyDevice2HostBuffer(
+ const DeviceMemory<float> &gpu_unquantized_src, HostBuffer *buffer_dst) {
+ VLOG_CALL(PARAM(gpu_unquantized_src), PARAM(*buffer_dst));
+
+ if (ok()) {
+ if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+ CheckError(
+ dnn->DoCopyDevice2HostBuffer(this, gpu_unquantized_src, buffer_dst));
+ } else {
+ SetErrorAndLogNoDnnSupport();
}
}
return *this;
diff --git a/tensorflow/stream_executor/stream.h b/tensorflow/stream_executor/stream.h
index 0d16495a1d..711eb3079a 100644
--- a/tensorflow/stream_executor/stream.h
+++ b/tensorflow/stream_executor/stream.h
@@ -499,6 +499,44 @@ class Stream {
port::ArraySlice<const DeviceMemory<float> *> input_data,
DeviceMemory<float> *output_data);
+ Stream &ThenSpaceConcatenate(
+ port::ArraySlice<dnn::BatchDescriptor> input_dimensions,
+ port::ArraySlice<const DeviceMemory<float> *> input_data,
+ DeviceMemory<float> *output_data,
+ dnn::SpaceConcatenateMode concat_direction);
+
+ // Change the layout of the data by shrinking one dimension (or set of
+ // dimensions) and growing another dimension (or set of dimensions), while
+ // keeping the total number of data elements constant, and maintaining the
+ // current data ordering.
+ Stream &ThenReshape(const dnn::BatchDescriptor &input_dimensions,
+ const DeviceMemory<float> &input_data,
+ const dnn::BatchDescriptor &output_dimensions,
+ DeviceMemory<float> *output_data);
+
+ // Depth to space takes an X by Y image with depth D*M² and changes it to an
+ // MX x MY image with depth D. Each input location (x,y) with depth D*M² in
+ // the input image is changed to an MxM contiguous area in the output image,
+ // with the values being laid out in raster order specified by
+ // DepthToSpaceLayout, and will have a new depth of D.
+ // See the DoDepthToSpace comment for more information.
+ Stream &ThenDepthToSpace(const dnn::BatchDescriptor &input_dimensions,
+ const DeviceMemory<float> &input_data,
+ const dnn::DepthToSpaceLayout &depth_to_space_layout,
+ const int sqrt_depth_reduction,
+ DeviceMemory<float> *output_data);
+
+ // Space to depth is the inverse of depth to space. Space to depth takes each
+ // non-overlapping M by M patch (in the X and Y dimensions) with depth D of
+ // the input, and transforms it to a 1 by 1 patch with depth D*M². If the
+ // input has size (MX, MY, D), the output has size (X, Y, D*M²). The number of
+ // data elements is not changed.
+ Stream &ThenSpaceToDepth(const dnn::BatchDescriptor &input_dimensions,
+ const DeviceMemory<float> &input_data,
+ const dnn::DepthToSpaceLayout &space_to_depth_layout,
+ const int sqrt_depth_increase,
+ DeviceMemory<float> *output_data);
+
Stream &ThenElementwiseOperate(
dnn::ElementwiseOperation operation,
port::ArraySlice<dnn::BatchDescriptor> input_dimensions,
@@ -506,6 +544,14 @@ class Stream {
const dnn::BatchDescriptor &output_dimensions,
DeviceMemory<float> *output_data);
+ Stream &ThenElementwiseOperateScaledQuantized(
+ dnn::ElementwiseOperation operation,
+ port::ArraySlice<int> input_multiplicands, int output_divisor,
+ port::ArraySlice<dnn::BatchDescriptor> input_dimensions,
+ port::ArraySlice<const DeviceMemory<float> *> input_data,
+ const dnn::BatchDescriptor &output_dimensions,
+ DeviceMemory<float> *output_data);
+
Stream &ThenXYPad(const dnn::BatchDescriptor &dimensions,
const DeviceMemory<float> &input_data, int64 left_pad,
int64 right_pad, int64 top_pad, int64 bottom_pad,
@@ -516,6 +562,14 @@ class Stream {
int64 right_trim, int64 top_trim, int64 bottom_trim,
DeviceMemory<float> *output_data);
+ // Grows the input tensor by replicating the X and Y dimensions. The batch and
+ // depth/feature_map dimensions are unchanged. Currently, the input tensor is
+ // limited to X=1 and Y=1.
+ Stream &ThenXYBroadcast(const dnn::BatchDescriptor &dimensions,
+ const DeviceMemory<float> &input_data,
+ int64 replicate_x, int64 replicate_y,
+ DeviceMemory<float> *output_data);
+
// See DnnSupport::DoMemcpyD2HQuantized.
Stream &ThenMemcpyD2HQuantized(const DeviceMemory<float> &gpu_unquantized_src,
dnn::QuantizedActivationMode mode,
@@ -549,6 +603,14 @@ class Stream {
Quantization<ElementType>::kModeId, gpu_unquantized_dst);
}
+ // See DnnSupport::DoCopyHostBuffer2Device.
+ Stream &ThenCopyHostBuffer2Device(HostBuffer *buffer_src,
+ DeviceMemory<float> *gpu_unquantized_dst);
+
+ // See DnnSupport::DoCopyDevice2HostBuffer.
+ Stream &ThenCopyDevice2HostBuffer(
+ const DeviceMemory<float> &gpu_unquantized_src, HostBuffer *buffer_dst);
+
/////////////////
// BLAS support
@@ -1527,6 +1589,12 @@ class Stream {
void SetError() { CheckError(false /* = operation_retcode */); }
+ void SetErrorAndLogNoDnnSupport() {
+ SetError();
+ LOG(WARNING) << "attempting to perform DNN operation using StreamExecutor "
+ "without DNN support";
+ }
+
// The StreamExecutor that supports the operation of this stream.
StreamExecutor *parent_;
diff --git a/tensorflow/stream_executor/stream_executor_internal.h b/tensorflow/stream_executor/stream_executor_internal.h
index 57db7775a6..d6d55fd623 100644
--- a/tensorflow/stream_executor/stream_executor_internal.h
+++ b/tensorflow/stream_executor/stream_executor_internal.h
@@ -199,14 +199,14 @@ class StreamExecutorInterface {
virtual bool SynchronousMemZero(DeviceMemoryBase *location, uint64 size) = 0;
virtual bool SynchronousMemSet(DeviceMemoryBase *location, int value,
uint64 size) = 0;
- virtual bool SynchronousMemcpy(DeviceMemoryBase *gpu_dst,
- const void *host_src, uint64 size) = 0;
- virtual bool SynchronousMemcpy(void *host_dst,
- const DeviceMemoryBase &gpu_src,
- uint64 size) = 0;
- virtual bool SynchronousMemcpyDeviceToDevice(DeviceMemoryBase *gpu_dst,
- const DeviceMemoryBase &gpu_src,
- uint64 size) = 0;
+ virtual port::Status SynchronousMemcpy(DeviceMemoryBase *gpu_dst,
+ const void *host_src, uint64 size) = 0;
+ virtual port::Status SynchronousMemcpy(void *host_dst,
+ const DeviceMemoryBase &gpu_src,
+ uint64 size) = 0;
+ virtual port::Status SynchronousMemcpyDeviceToDevice(
+ DeviceMemoryBase *gpu_dst, const DeviceMemoryBase &gpu_src,
+ uint64 size) = 0;
virtual bool MemZero(Stream *stream, DeviceMemoryBase *location,
uint64 size) = 0;
virtual bool Memset(Stream *stream, DeviceMemoryBase *location,
diff --git a/tensorflow/stream_executor/stream_executor_pimpl.cc b/tensorflow/stream_executor/stream_executor_pimpl.cc
index 7739d31662..71a5a45b67 100644
--- a/tensorflow/stream_executor/stream_executor_pimpl.cc
+++ b/tensorflow/stream_executor/stream_executor_pimpl.cc
@@ -491,7 +491,12 @@ bool StreamExecutor::SynchronousMemcpy(DeviceMemoryBase *gpu_dst,
// Tracing overloaded methods is very difficult due to issues with type
// inference on template args. Since use of these overloaded methods is
// discouraged anyway, this isn't a huge deal.
- return implementation_->SynchronousMemcpy(gpu_dst, host_src, size);
+ port::Status status =
+ implementation_->SynchronousMemcpy(gpu_dst, host_src, size);
+ if (!status.ok()) {
+ LOG(ERROR) << "synchronous memcpy: " << status;
+ }
+ return status.ok();
}
bool StreamExecutor::SynchronousMemcpy(void *host_dst,
@@ -501,7 +506,12 @@ bool StreamExecutor::SynchronousMemcpy(void *host_dst,
<< ", gpu_src=" << gpu_src.opaque() << ", size=" << size << ") D2H"
<< StackTraceIfVLOG10();
- return implementation_->SynchronousMemcpy(host_dst, gpu_src, size);
+ port::Status status =
+ implementation_->SynchronousMemcpy(host_dst, gpu_src, size);
+ if (!status.ok()) {
+ LOG(ERROR) << "synchronous memcpy: " << status;
+ }
+ return status.ok();
}
bool StreamExecutor::SynchronousMemcpy(DeviceMemoryBase *gpu_dst,
@@ -511,8 +521,12 @@ bool StreamExecutor::SynchronousMemcpy(DeviceMemoryBase *gpu_dst,
<< gpu_dst->opaque() << ", gpu_src=" << gpu_src.opaque()
<< ", size=" << size << ") D2D" << StackTraceIfVLOG10();
- return implementation_->SynchronousMemcpyDeviceToDevice(gpu_dst, gpu_src,
- size);
+ port::Status status =
+ implementation_->SynchronousMemcpyDeviceToDevice(gpu_dst, gpu_src, size);
+ if (!status.ok()) {
+ LOG(ERROR) << "synchronous memcpy: " << status;
+ }
+ return status.ok();
}
port::Status StreamExecutor::SynchronousMemcpyD2H(
@@ -525,13 +539,15 @@ port::Status StreamExecutor::SynchronousMemcpyD2H(
SCOPED_TRACE(TraceListener::SynchronousMemcpyD2H,
&result, gpu_src, size, host_dst);
- if (!implementation_->SynchronousMemcpy(host_dst, gpu_src, size)) {
+ port::Status status =
+ implementation_->SynchronousMemcpy(host_dst, gpu_src, size);
+ if (!status.ok()) {
return port::Status{
port::error::INTERNAL,
port::Printf(
"failed to synchronously memcpy device-to-host: GPU %p to host %p "
- "size %lld",
- gpu_src.opaque(), host_dst, size)};
+ "size %lld: %s",
+ gpu_src.opaque(), host_dst, size, status.ToString().c_str())};
}
return result;
@@ -548,12 +564,15 @@ port::Status StreamExecutor::SynchronousMemcpyH2D(const void *host_src,
SCOPED_TRACE(TraceListener::SynchronousMemcpyH2D,
&result, host_src, size, gpu_dst);
- if (!implementation_->SynchronousMemcpy(gpu_dst, host_src, size)) {
+ port::Status status =
+ implementation_->SynchronousMemcpy(gpu_dst, host_src, size);
+ if (!status.ok()) {
result = port::Status{
port::error::INTERNAL,
port::Printf("failed to synchronously memcpy host-to-device: host "
- "%p to GPU %p size %lld",
- host_src, gpu_dst->opaque(), size)};
+ "%p to GPU %p size %lld: %s",
+ host_src, gpu_dst->opaque(), size,
+ status.ToString().c_str())};
}
return result;
diff --git a/tensorflow/tensorboard/BUILD b/tensorflow/tensorboard/BUILD
index 21f6519cab..2887fb4362 100644
--- a/tensorflow/tensorboard/BUILD
+++ b/tensorflow/tensorboard/BUILD
@@ -30,6 +30,7 @@ py_binary(
deps = [
"//tensorflow/python:platform",
"//tensorflow/tensorboard/backend:server",
+ "@werkzeug",
],
)
diff --git a/tensorflow/tensorboard/tensorboard.py b/tensorflow/tensorboard/tensorboard.py
index 9adcee7e36..42d5aedced 100644
--- a/tensorflow/tensorboard/tensorboard.py
+++ b/tensorflow/tensorboard/tensorboard.py
@@ -23,6 +23,7 @@ from __future__ import print_function
import os
import socket
+from werkzeug.serving import run_simple
from tensorflow.python.platform import app
from tensorflow.python.platform import flags
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 2a9fcae5e5..7fa7e4a91d 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -140,28 +140,27 @@ def tf_gen_op_libs(op_lib_names, deps=None):
linkstatic=1,)
def tf_gen_op_wrapper_cc(name, out_ops_file, pkg="",
- op_gen="//tensorflow/cc:cc_op_gen_main"):
+ op_gen="//tensorflow/cc:cc_op_gen_main",
+ deps=None,
+ include_internal_ops=0):
# Construct an op generator binary for these ops.
tool = out_ops_file + "_gen_cc"
+ if deps == None:
+ deps = [pkg + ":" + name + "_op_lib"]
native.cc_binary(
name = tool,
copts = tf_copts(),
linkopts = ["-lm"],
linkstatic = 1, # Faster to link this one-time-use binary dynamically
- deps = ([op_gen, pkg + ":" + name + "_op_lib"])
+ deps = [op_gen] + deps
)
- # Run the op generator.
- if name == "sendrecv_ops" or name == "function_ops":
- include_internal = "1"
- else:
- include_internal = "0"
native.genrule(
name=name + "_genrule",
outs=[out_ops_file + ".h", out_ops_file + ".cc"],
tools=[":" + tool],
cmd=("$(location :" + tool + ") $(location :" + out_ops_file + ".h) " +
- "$(location :" + out_ops_file + ".cc) " + include_internal))
+ "$(location :" + out_ops_file + ".cc) " + str(include_internal_ops)))
# Given a list of "op_lib_names" (a list of files in the ops directory
# without their .cc extensions), generate individual C++ .cc and .h
@@ -192,11 +191,14 @@ def tf_gen_op_wrappers_cc(name,
"//tensorflow/cc:const_op",
],
op_gen="//tensorflow/cc:cc_op_gen_main",
+ include_internal_ops=0,
visibility=None):
subsrcs = other_srcs
subhdrs = other_hdrs
for n in op_lib_names:
- tf_gen_op_wrapper_cc(n, "ops/" + n, pkg=pkg, op_gen=op_gen)
+ tf_gen_op_wrapper_cc(
+ n, "ops/" + n, pkg=pkg, op_gen=op_gen,
+ include_internal_ops=include_internal_ops)
subsrcs += ["ops/" + n + ".cc"]
subhdrs += ["ops/" + n + ".h"]
diff --git a/tensorflow/tools/ci_build/builds/libtensorflow.sh b/tensorflow/tools/ci_build/builds/libtensorflow.sh
index a9989fe504..683ab9f77b 100755
--- a/tensorflow/tools/ci_build/builds/libtensorflow.sh
+++ b/tensorflow/tools/ci_build/builds/libtensorflow.sh
@@ -46,20 +46,17 @@ function build_libtensorflow_tarball() {
fi
bazel clean --expunge
yes "" | ./configure
-
- # TODO(ashankar): Once
- # https://github.com/tensorflow/tensorflow/commit/1b32b698eddc10c0d85b0b8cf838f42023394de7
- # can be undone, i.e., when bazel supports pkg_tar with python3+ then all of this below
- # can be replaced with something like:
- # bazel build ${BAZEL_OPTS} //tensorflow/tools/lib_package:libtensorflow.tar.gz
-
- bazel build ${BAZEL_OPTS} //tensorflow:libtensorflow.so
+
+ # Remove this test call when
+ # https://github.com/bazelbuild/bazel/issues/2352
+ # and https://github.com/bazelbuild/bazel/issues/1580
+ # have been resolved and the "manual" tags on the BUILD targets
+ # in tensorflow/tools/lib_package/BUILD are removed.
+ # Till then, must manually run the test.
+ bazel test ${BAZEL_OPTS} //tensorflow/tools/lib_package/...
+
+ bazel build ${BAZEL_OPTS} //tensorflow/tools/lib_package:libtensorflow.tar.gz
DIR=lib_package
- rm -rf ${DIR}
- mkdir -p ${DIR}/build/lib
- mkdir -p ${DIR}/build/include/tensorflow/c
- cp bazel-bin/tensorflow/libtensorflow.so ${DIR}/build/lib
- cp tensorflow/c/c_api.h ${DIR}/build/include/tensorflow/c
- tar -C ${DIR}/build -cvf ${DIR}/libtensorflow${TARBALL_SUFFIX}.tar.gz include/tensorflow/c/c_api.h lib/libtensorflow.so
- rm -rf ${DIR}/build
+ mkdir -p ${DIR}
+ cp bazel-bin/tensorflow/tools/lib_package/libtensorflow.tar.gz ${DIR}/libtensorflow${TARBALL_SUFFIX}.tar.gz
}
diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh
index 6f6684dcdf..46f97891d3 100755
--- a/tensorflow/tools/ci_build/ci_parameterized_build.sh
+++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh
@@ -332,6 +332,11 @@ else
EXTRA_ARGS="${TF_BUILD_APPEND_ARGUMENTS} --test_tag_filters=-benchmark-test"
fi
+# For any "tool" dependencies in genrules, Bazel will build them for host
+# instead of the target configuration. We can save some build time by setting
+# this flag, and it only affects a few tests.
+EXTRA_ARGS="${EXTRA_ARGS} --distinct_host_configuration=false"
+
# Process PIP install-test option
if [[ ${TF_BUILD_IS_PIP} == "no_pip" ]] ||
[[ ${TF_BUILD_IS_PIP} == "both" ]]; then
diff --git a/tensorflow/tools/ci_build/ci_sanity.sh b/tensorflow/tools/ci_build/ci_sanity.sh
index 0d890f5684..975a14e7d5 100755
--- a/tensorflow/tools/ci_build/ci_sanity.sh
+++ b/tensorflow/tools/ci_build/ci_sanity.sh
@@ -292,8 +292,8 @@ do_buildifier(){
}
do_external_licenses_check(){
- echo "Running do_external_licenses_check"
- echo ""
+ BUILD_TARGET="$1"
+ LICENSES_TARGET="$2"
EXTERNAL_LICENSES_CHECK_START_TIME=$(date +'%s')
@@ -302,8 +302,8 @@ do_external_licenses_check(){
MISSING_LICENSES_FILE="$(mktemp)_missing_licenses.log"
EXTRA_LICENSES_FILE="$(mktemp)_extra_licenses.log"
- echo "Getting external dependencies for //tensorflow/tools/pip_package:build_pip_package."
- bazel query 'attr("licenses", "notice", deps(//tensorflow/tools/pip_package:build_pip_package))' --no_implicit_deps --no_host_deps --keep_going \
+ echo "Getting external dependencies for ${BUILD_TARGET}"
+ bazel query "attr('licenses', 'notice', deps(${BUILD_TARGET}))" --no_implicit_deps --no_host_deps --keep_going \
| egrep -v "^//tensorflow" \
| sed -e 's|:.*||' \
| sort \
@@ -311,8 +311,8 @@ do_external_licenses_check(){
| tee ${EXTERNAL_DEPENDENCIES_FILE}
echo
- echo "Getting list of external licenses."
- bazel query 'deps(//tensorflow/tools/pip_package:licenses)' --no_implicit_deps --no_host_deps --keep_going \
+ echo "Getting list of external licenses mentioned in ${LICENSES_TARGET}."
+ bazel query "deps(${LICENSES_TARGET})" --no_implicit_deps --no_host_deps --keep_going \
| egrep -v "^//tensorflow" \
| sed -e 's|:.*||' \
| sort \
@@ -331,7 +331,7 @@ do_external_licenses_check(){
echo
if [[ -s ${MISSING_LICENSES_FILE} ]] || [[ -s ${EXTRA_LICENSES_FILE} ]] ; then
- echo "FAIL: pip package external dependencies vs licenses mismatch."
+ echo "FAIL: mismatch in packaged licenses and external dependencies"
if [[ -s ${MISSING_LICENSES_FILE} ]] ; then
echo "Missing the licenses for the following external dependencies:"
cat ${MISSING_LICENSES_FILE}
@@ -355,6 +355,21 @@ do_external_licenses_check(){
fi
}
+do_pip_package_licenses_check() {
+ echo "Running do_pip_package_licenses_check"
+ echo ""
+ do_external_licenses_check \
+ "//tensorflow/tools/pip_package:build_pip_package" \
+ "//tensorflow/tools/pip_package:licenses"
+}
+
+do_lib_package_licenses_check() {
+ echo "Running do_lib_package_licenses_check"
+ echo ""
+ do_external_licenses_check \
+ "//tensorflow:libtensorflow.so" \
+ "//tensorflow/tools/lib_package:clicenses_generate"
+}
# Run bazel build --nobuild to test the validity of the BUILD files
do_bazel_nobuild() {
@@ -376,8 +391,8 @@ do_bazel_nobuild() {
}
# Supply all sanity step commands and descriptions
-SANITY_STEPS=("do_pylint PYTHON2" "do_pylint PYTHON3" "do_buildifier" "do_bazel_nobuild" "do_external_licenses_check")
-SANITY_STEPS_DESC=("Python 2 pylint" "Python 3 pylint" "buildifier check" "bazel nobuild" "external dependencies licenses check")
+SANITY_STEPS=("do_pylint PYTHON2" "do_pylint PYTHON3" "do_buildifier" "do_bazel_nobuild" "do_pip_package_licenses_check" "do_lib_package_licenses_check")
+SANITY_STEPS_DESC=("Python 2 pylint" "Python 3 pylint" "buildifier check" "bazel nobuild" "pip: license check for external dependencies" "C library: license check for external dependencies")
INCREMENTAL_FLAG=""
diff --git a/tensorflow/tools/ci_build/install/install_deb_packages.sh b/tensorflow/tools/ci_build/install/install_deb_packages.sh
index 71e2a6c852..227b83ab9f 100755
--- a/tensorflow/tools/ci_build/install/install_deb_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_deb_packages.sh
@@ -21,18 +21,11 @@ ubuntu_version=$(cat /etc/issue | grep -i ubuntu | awk '{print $2}' | \
# Install dependencies from ubuntu deb repository.
apt-get update
-set +e
-ffmpeg_location=$(which ffmpeg)
-if [[ -z "$ffmpeg_location" && "$ubuntu_version" == "14" ]]; then
- set -e
+if [[ "$ubuntu_version" == "14" ]]; then
# specifically for trusty linked from ffmpeg.org
add-apt-repository -y ppa:mc3man/trusty-media
apt-get update
apt-get dist-upgrade -y
- apt-get install -y ffmpeg libav-tools
-else
- set -e
- apt-get install -y ffmpeg libav-tools
fi
apt-get install -y --no-install-recommends \
@@ -41,6 +34,7 @@ apt-get install -y --no-install-recommends \
build-essential \
cmake \
curl \
+ ffmpeg \
git \
libcurl4-openssl-dev \
libtool \
diff --git a/tensorflow/tools/graph_transforms/summarize_graph_main.cc b/tensorflow/tools/graph_transforms/summarize_graph_main.cc
index 638296b923..55b55e0a15 100644
--- a/tensorflow/tools/graph_transforms/summarize_graph_main.cc
+++ b/tensorflow/tools/graph_transforms/summarize_graph_main.cc
@@ -65,7 +65,8 @@ Status SummarizeGraph(const GraphDef& graph) {
MapNodesToOutputs(graph, &output_map);
std::vector<const NodeDef*> outputs;
for (const NodeDef& node : graph.node()) {
- if (output_map.count(node.name()) == 0) {
+ if ((output_map.count(node.name()) == 0) && (node.op() != "Const") &&
+ (node.op() != "Assign")) {
outputs.push_back(&node);
}
}
diff --git a/tensorflow/tools/lib_package/BUILD b/tensorflow/tools/lib_package/BUILD
new file mode 100644
index 0000000000..41e7221efe
--- /dev/null
+++ b/tensorflow/tools/lib_package/BUILD
@@ -0,0 +1,107 @@
+# Packaging the TensorFlow C API into a small, standalone archive for use with
+# language bindings and installations without Python.
+#
+# TODO(ashankar): Something similar for the JNI library for Java?
+# TODO(ashankar): Something similar for the C++ API (caveat: ABI compatibility)
+
+package(default_visibility = ["//visibility:private"])
+
+load("@bazel_tools//tools/build_defs/pkg:pkg.bzl", "pkg_tar")
+
+pkg_tar(
+ name = "libtensorflow",
+ extension = "tar.gz",
+ # Mark as "manual" till
+ # https://github.com/bazelbuild/bazel/issues/2352
+ # and https://github.com/bazelbuild/bazel/issues/1580
+ # are resolved, otherwise these rules break when built
+ # with Python 3.
+ tags = ["manual"],
+ deps = [
+ ":cheaders",
+ ":clib",
+ ":clicenses",
+ ],
+)
+
+pkg_tar(
+ name = "cheaders",
+ files = ["//tensorflow/c:headers"],
+ package_dir = "include/tensorflow/c",
+ # Mark as "manual" till
+ # https://github.com/bazelbuild/bazel/issues/2352
+ # and https://github.com/bazelbuild/bazel/issues/1580
+ # are resolved, otherwise these rules break when built
+ # with Python 3.
+ tags = ["manual"],
+)
+
+pkg_tar(
+ name = "clib",
+ files = ["//tensorflow:libtensorflow.so"],
+ package_dir = "lib",
+ # Mark as "manual" till
+ # https://github.com/bazelbuild/bazel/issues/2352
+ # and https://github.com/bazelbuild/bazel/issues/1580
+ # are resolved, otherwise these rules break when built
+ # with Python 3.
+ tags = ["manual"],
+)
+
+pkg_tar(
+ name = "clicenses",
+ files = [":include/tensorflow/c/LICENSE"],
+ package_dir = "include/tensorflow/c",
+ # Mark as "manual" till
+ # https://github.com/bazelbuild/bazel/issues/2352
+ # and https://github.com/bazelbuild/bazel/issues/1580
+ # are resolved, otherwise these rules break when built
+ # with Python 3.
+ tags = ["manual"],
+)
+
+genrule(
+ name = "clicenses_generate",
+ srcs = [
+ "//third_party/hadoop:LICENSE.txt",
+ "//third_party/eigen3:LICENSE",
+ "@boringssl//:LICENSE",
+ "@com_googlesource_code_re2//:LICENSE",
+ "@curl//:COPYING",
+ "@eigen_archive//:COPYING.MPL2",
+ "@farmhash_archive//:COPYING",
+ "@gemmlowp//:LICENSE",
+ "@gif_archive//:COPYING",
+ "@grpc//:LICENSE",
+ "@highwayhash//:LICENSE",
+ "@jemalloc//:COPYING",
+ "@jpeg//:LICENSE.md",
+ "@libxsmm_archive//:LICENSE",
+ "@local_config_sycl//sycl:LICENSE.text",
+ "@nanopb_git//:LICENSE.txt",
+ "@png_archive//:LICENSE",
+ "@protobuf//:LICENSE",
+ "@zlib_archive//:zlib.h",
+ ],
+ outs = ["include/tensorflow/c/LICENSE"],
+ cmd = "$(location :concat_licenses.sh) $(SRCS) >$@",
+ tools = [":concat_licenses.sh"],
+)
+
+sh_test(
+ name = "libtensorflow_test",
+ size = "small",
+ srcs = ["libtensorflow_test.sh"],
+ data = [
+ "libtensorflow_test.c",
+ ":libtensorflow.tar.gz",
+ ],
+ # Mark as "manual" till
+ # https://github.com/bazelbuild/bazel/issues/2352
+ # and https://github.com/bazelbuild/bazel/issues/1580
+ # are resolved, otherwise these rules break when built
+ # with Python 3.
+ # Till then, this test is explicitly executed when building
+ # the release by tensorflow/tools/ci_build/builds/libtensorflow.sh
+ tags = ["manual"],
+)
diff --git a/tensorflow/tools/lib_package/README.md b/tensorflow/tools/lib_package/README.md
new file mode 100644
index 0000000000..fbec0a067a
--- /dev/null
+++ b/tensorflow/tools/lib_package/README.md
@@ -0,0 +1,31 @@
+Bazel rules to package the TensorFlow C-library and [header
+files](https://www.tensorflow.org/code/tensorflow/c/c_api.h)
+into an archive.
+
+## TensorFlow C library
+
+The TensorFlow [C
+API](https://www.tensorflow.org/code/tensorflow/c/c_api.h)
+is typically a requirement of TensorFlow APIs in other languages such as
+[Go](https://www.tensorflow.org/code/tensorflow/go)
+and [Rust](https://github.com/tensorflow/rust).
+
+The command:
+
+```sh
+bazel build -c opt //tensorflow/tools/lib_package:libtensorflow
+```
+
+produces `bazel-bin/tensorflow/tools/lib_package/libtensorflow.tar.gz`, which
+can be distributed and installed using something like:
+
+```sh
+tar -C /usr/local -xzf libtensorflow.tar.gz
+```
+
+## Release
+
+Scripts to generate archives using these rules for release are in
+[tensorflow/tools/ci_build/linux](https://www.tensorflow.org/code/tensorflow/tools/ci_build/linux)
+and
+[tensorflow/tools/ci_build/osx](https://www.tensorflow.org/code/tensorflow/tools/ci_build/osx)
diff --git a/tensorflow/tools/lib_package/concat_licenses.sh b/tensorflow/tools/lib_package/concat_licenses.sh
new file mode 100755
index 0000000000..2070f64e9f
--- /dev/null
+++ b/tensorflow/tools/lib_package/concat_licenses.sh
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Script aimed to combining multiple license files into a single one.
+
+for f in $@
+do
+ echo "--------------------------------------------------------------------------------"
+ echo "BEGIN LICENSE FOR $f"
+ echo "--------------------------------------------------------------------------------"
+ cat $f
+ echo "--------------------------------------------------------------------------------"
+ echo "END LICENSE FOR $f"
+ echo "--------------------------------------------------------------------------------"
+done
diff --git a/tensorflow/tools/lib_package/libtensorflow_test.c b/tensorflow/tools/lib_package/libtensorflow_test.c
new file mode 100644
index 0000000000..dff6fb77ec
--- /dev/null
+++ b/tensorflow/tools/lib_package/libtensorflow_test.c
@@ -0,0 +1,28 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Companion source file for libtensorflow_test.sh
+
+#include <tensorflow/c/c_api.h>
+
+int main() {
+ TF_Status* s = TF_NewStatus();
+ TF_SetStatus(s, TF_UNKNOWN, "Some error");
+ if (TF_GetCode(s) != TF_UNKNOWN) {
+ return 1;
+ }
+ TF_DeleteStatus(s);
+ return 0;
+}
diff --git a/tensorflow/tools/lib_package/libtensorflow_test.sh b/tensorflow/tools/lib_package/libtensorflow_test.sh
new file mode 100755
index 0000000000..6463ecea70
--- /dev/null
+++ b/tensorflow/tools/lib_package/libtensorflow_test.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+set -ex
+
+# Sanity test for the package C-library archive.
+# - Unarchive
+# - Compile a trivial C file that uses the archive
+# - Run it
+
+# Tools needed: A C-compiler and tar
+CC="${CC}"
+TAR="${TAR}"
+
+[ -z "${CC}" ] && CC="/usr/bin/gcc"
+[ -z "${TAR}"] && TAR="tar"
+
+# bazel tests run with ${PWD} set to the root of the bazel workspace
+TARFILE="${PWD}/tensorflow/tools/lib_package/libtensorflow.tar.gz"
+CFILE="${PWD}/tensorflow/tools/lib_package/libtensorflow_test.c"
+
+cd ${TEST_TMPDIR}
+
+# Extract the archive into tensorflow/
+mkdir tensorflow
+${TAR} -xzf ${TARFILE} -Ctensorflow
+
+# Compile the test .c file
+${CC} ${CFILE} -Itensorflow/include -Ltensorflow/lib -ltensorflow -oa.out
+
+# Execute it, with the shared library available.
+# DYLD_LIBRARY_PATH is used on OS X, LD_LIBRARY_PATH on Linux
+export DYLD_LIBRARY_PATH=tensorflow/lib
+export LD_LIBRARY_PATH=tensorflow/lib
+./a.out
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index 62fb9b9176..0ef09835e9 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -95,6 +95,7 @@ filegroup(
"@png_archive//:LICENSE",
"@protobuf//:LICENSE",
"@six_archive//:LICENSE",
+ "@werkzeug//:LICENSE",
"@zlib_archive//:zlib.h",
] + tf_additional_license_deps(),
)
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index a03e844ea2..1ad739d6cf 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -76,10 +76,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
native.new_http_archive(
name = "libxsmm_archive",
urls = [
- "https://github.com/hfp/libxsmm/archive/1.6.4.tar.gz",
+ # "http://bazel-mirror.storage.googleapis.com/github.com/hfp/libxsmm/archive/1.6.1.tar.gz",
+ "https://github.com/hfp/libxsmm/archive/1.6.5.tar.gz",
],
- sha256 = "3788bf1cdb60f119f8a04ed7ed96861322e539ce2d2ea977f00431d6b2b80beb",
- strip_prefix = "libxsmm-1.6.4",
+ sha256 = "5231419a8e13e7a6d286cf25d32a3aa75c443a625e5ea57024d36468bc3d5936",
+ strip_prefix = "libxsmm-1.6.5",
build_file = str(Label("//third_party:libxsmm.BUILD")),
)
@@ -191,6 +192,17 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
build_file = str(Label("//third_party:six.BUILD")),
)
+ native.new_http_archive(
+ name = "werkzeug",
+ urls = [
+ "http://bazel-mirror.storage.googleapis.com/pypi.python.org/packages/b7/7f/44d3cfe5a12ba002b253f6985a4477edfa66da53787a2a838a40f6415263/Werkzeug-0.11.10.tar.gz",
+ "https://pypi.python.org/packages/b7/7f/44d3cfe5a12ba002b253f6985a4477edfa66da53787a2a838a40f6415263/Werkzeug-0.11.10.tar.gz",
+ ],
+ strip_prefix = "Werkzeug-0.11.10",
+ sha256 = "cc64dafbacc716cdd42503cf6c44cb5a35576443d82f29f6829e5c49264aeeee",
+ build_file = str(Label("//third_party:werkzeug.BUILD")),
+ )
+
native.bind(
name = "six",
actual = "@six_archive//:six",
@@ -314,7 +326,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
# TODO(phawkins): currently, this rule uses an unofficial LLVM mirror.
# Switch to an official source of snapshots if/when possible.
- native.new_http_archive(
+ temp_workaround_http_archive(
name = "llvm",
urls = [
"http://bazel-mirror.storage.googleapis.com/github.com/llvm-mirror/llvm/archive/4e9e4f277ad254e02a0cff33c61cd827e600da62.tar.gz",
@@ -323,6 +335,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
sha256 = "ec67c57dfd85c2bb857fd13011c5c2aa3f1dc9f40c0a5bac13e78e76d6b61aa6",
strip_prefix = "llvm-4e9e4f277ad254e02a0cff33c61cd827e600da62",
build_file = str(Label("//third_party/llvm:llvm.BUILD")),
+ repository = tf_repo_name,
)
native.new_http_archive(
@@ -395,7 +408,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
actual = "@junit_jar//jar",
)
- native.new_http_archive(
+ temp_workaround_http_archive(
name = "jemalloc",
urls = [
"http://bazel-mirror.storage.googleapis.com/github.com/jemalloc/jemalloc/archive/4.4.0.tar.gz",
@@ -404,4 +417,5 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
sha256 = "3c8f25c02e806c3ce0ab5fb7da1817f89fc9732709024e2a81b6b82f7cc792a8",
strip_prefix = "jemalloc-4.4.0",
build_file = str(Label("//third_party:jemalloc.BUILD")),
+ repository = tf_repo_name,
)
diff --git a/third_party/jemalloc.BUILD b/third_party/jemalloc.BUILD
index 2496d12627..aabff39d7b 100644
--- a/third_party/jemalloc.BUILD
+++ b/third_party/jemalloc.BUILD
@@ -5,7 +5,7 @@ licenses(["notice"]) # BSD
exports_files(["COPYING"])
-load("@//third_party:common.bzl", "template_rule")
+load("@%ws%//third_party:common.bzl", "template_rule")
cc_library(
name = "jemalloc",
diff --git a/third_party/llvm/llvm.BUILD b/third_party/llvm/llvm.BUILD
index 0f7ef74545..330d8b79ce 100644
--- a/third_party/llvm/llvm.BUILD
+++ b/third_party/llvm/llvm.BUILD
@@ -7,18 +7,18 @@ licenses(["notice"])
exports_files(["LICENSE.TXT"])
load(
- "@//third_party/llvm:llvm.bzl",
+ "@%ws%//third_party/llvm:llvm.bzl",
"gentbl",
"expand_cmake_vars",
"llvm_target_cmake_vars",
"cmake_var_string",
)
load(
- "@//third_party:common.bzl",
+ "@%ws%//third_party:common.bzl",
"template_rule",
)
-package(default_visibility = ["@//tensorflow/compiler/xla:internal"])
+package(default_visibility = ["@%ws%//tensorflow/compiler/xla:internal"])
llvm_host_triple = "x86_64-unknown-linux_gnu"
@@ -147,7 +147,7 @@ darwin_cmake_vars = {
# TODO(phawkins): use a better method to select the right host triple, rather
# than hardcoding x86_64.
all_cmake_vars = select({
- "@//tensorflow:darwin": cmake_var_string(
+ "@%ws%//tensorflow:darwin": cmake_var_string(
cmake_vars + llvm_target_cmake_vars("X86", "x86_64-apple-darwin") +
darwin_cmake_vars,
),
diff --git a/third_party/werkzeug.BUILD b/third_party/werkzeug.BUILD
new file mode 100644
index 0000000000..aaf1614bb9
--- /dev/null
+++ b/third_party/werkzeug.BUILD
@@ -0,0 +1,14 @@
+# Description:
+# Werkzeug provides utilities for making WSGI applications
+
+licenses(["notice"]) # BSD 3-Clause
+
+exports_files(["LICENSE"])
+
+# Note: this library includes test code. Consider creating a testonly target.
+py_library(
+ name = "werkzeug",
+ srcs = glob(["werkzeug/werkzeug/*.py"]),
+ srcs_version = "PY2AND3",
+ visibility = ["//visibility:public"],
+)