From 9e5529cd62446a883293e8c3f9484b95211add5b Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Mon, 14 May 2018 19:10:20 -0700 Subject: Added segment graphdef conversion functions Functional Dyn Ops --- configure.py | 74 +++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 43 insertions(+), 31 deletions(-) (limited to 'configure.py') diff --git a/configure.py b/configure.py index 6d9aba61bb..69c9378a9c 100644 --- a/configure.py +++ b/configure.py @@ -977,6 +977,35 @@ def set_tf_cudnn_version(environ_cp): write_action_env_to_bazelrc('TF_CUDNN_VERSION', tf_cudnn_version) +def is_cuda_compatible(lib, cuda_ver, cudnn_ver): + """Check the compatibility between given library and cudnn/cudart libraries.""" + ldd_bin = which('ldd') or '/usr/bin/ldd' + ldd_out = run_shell([ldd_bin, lib], True) + ldd_out = ldd_out.split(os.linesep) + cudnn_pattern = re.compile('.*libcudnn.so\\.?(.*) =>.*$') + cuda_pattern = re.compile('.*libcudart.so\\.?(.*) =>.*$') + cudnn = None + cudart = None + cudnn_ok = True # assume no cudnn dependency by default + cuda_ok = True # assume no cuda dependency by default + for line in ldd_out: + if 'libcudnn.so' in line: + cudnn = cudnn_pattern.search(line) + cudnn_ok = False + elif 'libcudart.so' in line: + cudart = cuda_pattern.search(line) + cuda_ok = False + if cudnn and len(cudnn.group(1)): + cudnn = convert_version_to_int(cudnn.group(1)) + if cudart and len(cudart.group(1)): + cudart = convert_version_to_int(cudart.group(1)) + if cudnn is not None: + cudnn_ok = (cudnn == cudnn_ver) + if cudart is not None: + cuda_ok = (cudart == cuda_ver) + return cudnn_ok and cuda_ok + + def set_tf_tensorrt_install_path(environ_cp): """Set TENSORRT_INSTALL_PATH and TF_TENSORRT_VERSION. @@ -993,8 +1022,8 @@ def set_tf_tensorrt_install_path(environ_cp): raise ValueError('Currently TensorRT is only supported on Linux platform.') # Ask user whether to add TensorRT support. - if str(int(get_var( - environ_cp, 'TF_NEED_TENSORRT', 'TensorRT', False))) != '1': + if str(int(get_var(environ_cp, 'TF_NEED_TENSORRT', 'TensorRT', + False))) != '1': return for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS): @@ -1007,47 +1036,29 @@ def set_tf_tensorrt_install_path(environ_cp): # Result returned from "read" will be used unexpanded. That make "~" # unusable. Going through one more level of expansion to handle that. - trt_install_path = os.path.realpath( - os.path.expanduser(trt_install_path)) + trt_install_path = os.path.realpath(os.path.expanduser(trt_install_path)) def find_libs(search_path): """Search for libnvinfer.so in "search_path".""" fl = set() if os.path.exists(search_path) and os.path.isdir(search_path): - fl.update([os.path.realpath(os.path.join(search_path, x)) - for x in os.listdir(search_path) if 'libnvinfer.so' in x]) + fl.update([ + os.path.realpath(os.path.join(search_path, x)) + for x in os.listdir(search_path) + if 'libnvinfer.so' in x + ]) return fl possible_files = find_libs(trt_install_path) possible_files.update(find_libs(os.path.join(trt_install_path, 'lib'))) possible_files.update(find_libs(os.path.join(trt_install_path, 'lib64'))) - - def is_compatible(tensorrt_lib, cuda_ver, cudnn_ver): - """Check the compatibility between tensorrt and cudnn/cudart libraries.""" - ldd_bin = which('ldd') or '/usr/bin/ldd' - ldd_out = run_shell([ldd_bin, tensorrt_lib]).split(os.linesep) - cudnn_pattern = re.compile('.*libcudnn.so\\.?(.*) =>.*$') - cuda_pattern = re.compile('.*libcudart.so\\.?(.*) =>.*$') - cudnn = None - cudart = None - for line in ldd_out: - if 'libcudnn.so' in line: - cudnn = cudnn_pattern.search(line) - elif 'libcudart.so' in line: - cudart = cuda_pattern.search(line) - if cudnn and len(cudnn.group(1)): - cudnn = convert_version_to_int(cudnn.group(1)) - if cudart and len(cudart.group(1)): - cudart = convert_version_to_int(cudart.group(1)) - return (cudnn == cudnn_ver) and (cudart == cuda_ver) - cuda_ver = convert_version_to_int(environ_cp['TF_CUDA_VERSION']) cudnn_ver = convert_version_to_int(environ_cp['TF_CUDNN_VERSION']) nvinfer_pattern = re.compile('.*libnvinfer.so.?(.*)$') highest_ver = [0, None, None] for lib_file in possible_files: - if is_compatible(lib_file, cuda_ver, cudnn_ver): + if is_cuda_compatible(lib_file, cuda_ver, cudnn_ver): matches = nvinfer_pattern.search(lib_file) if len(matches.groups()) == 0: continue @@ -1063,12 +1074,13 @@ def set_tf_tensorrt_install_path(environ_cp): # Try another alternative from ldconfig. ldconfig_bin = which('ldconfig') or '/sbin/ldconfig' ldconfig_output = run_shell([ldconfig_bin, '-p']) - search_result = re.search( - '.*libnvinfer.so\\.?([0-9.]*).* => (.*)', ldconfig_output) + search_result = re.search('.*libnvinfer.so\\.?([0-9.]*).* => (.*)', + ldconfig_output) if search_result: libnvinfer_path_from_ldconfig = search_result.group(2) if os.path.exists(libnvinfer_path_from_ldconfig): - if is_compatible(libnvinfer_path_from_ldconfig, cuda_ver, cudnn_ver): + if is_cuda_compatible(libnvinfer_path_from_ldconfig, cuda_ver, + cudnn_ver): trt_install_path = os.path.dirname(libnvinfer_path_from_ldconfig) tf_tensorrt_version = search_result.group(1) break @@ -1227,7 +1239,7 @@ def set_tf_cuda_compute_capabilities(environ_cp): # Check whether all capabilities from the input is valid all_valid = True # Remove all whitespace characters before splitting the string - # that users may insert by accident, as this will result in error + # that users may insert by accident, as this will result in error tf_cuda_compute_capabilities = ''.join(tf_cuda_compute_capabilities.split()) for compute_capability in tf_cuda_compute_capabilities.split(','): m = re.match('[0-9]+.[0-9]+', compute_capability) -- cgit v1.2.3 From e80732c9895d1283af9b98d6277ad1a1015e2e9a Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Mon, 18 Jun 2018 09:57:19 -0700 Subject: Merge changes from github. PiperOrigin-RevId: 201011811 --- CONTRIBUTING.md | 2 +- README.md | 1 + RELEASE.md | 67 +++- configure.py | 5 + tensorflow/BUILD | 4 +- tensorflow/c/generate-pc.sh | 11 +- tensorflow/cc/gradients/math_grad.cc | 1 + tensorflow/cc/gradients/nn_grad.cc | 47 +++ tensorflow/cc/gradients/nn_grad_test.cc | 84 ++++- tensorflow/compiler/aot/codegen_test_h.golden | 4 +- .../compiler/aot/embedded_protocol_buffers.h | 2 +- tensorflow/compiler/aot/runtime.h | 4 +- tensorflow/compiler/aot/runtime_test.cc | 16 +- tensorflow/compiler/xla/service/cpu/BUILD | 18 +- tensorflow/compiler/xla/service/cpu/cpu_runtime.cc | 2 + tensorflow/compiler/xla/service/cpu/cpu_runtime.h | 1 + tensorflow/compiler/xla/service/cpu/ir_emitter.cc | 8 +- .../compiler/xla/service/cpu/runtime_fft_impl.h | 20 +- .../xla/service/cpu/runtime_single_threaded_fft.cc | 32 ++ .../xla/service/cpu/runtime_single_threaded_fft.h | 31 ++ .../compiler/xla/service/cpu/simple_orc_jit.cc | 2 + tensorflow/compiler/xla/service/pattern_matcher.h | 2 +- .../compiler/xla/service/tuple_simplifier.cc | 7 + tensorflow/compiler/xla/service/tuple_simplifier.h | 9 +- .../compiler/xla/service/tuple_simplifier_test.cc | 77 ++++ tensorflow/contrib/autograph/__init__.py | 3 + tensorflow/contrib/cmake/tf_c.cmake | 22 +- tensorflow/contrib/cmake/tf_cc_ops.cmake | 2 +- tensorflow/contrib/cmake/tf_python.cmake | 3 +- tensorflow/contrib/cmake/tools/create_def_file.py | 9 +- .../bijectors/sinh_arcsinh_bijector_test.py | 28 +- tensorflow/contrib/eager/python/datasets.py | 3 +- .../python/examples/notebooks/4_high_level.ipynb | 4 +- .../feature_column/sequence_feature_column.py | 22 +- .../feature_column/sequence_feature_column_test.py | 41 ++ tensorflow/contrib/ffmpeg/__init__.py | 1 - tensorflow/contrib/ffmpeg/ffmpeg_ops.py | 1 - tensorflow/contrib/framework/__init__.py | 3 +- .../ops/fused_conv2d_bias_activation_op_test.py | 11 +- .../src_impl/hexagon_controller.c | 2 +- tensorflow/contrib/lite/download_dependencies.sh | 4 +- .../contrib/lite/examples/minimal/minimal.cc | 2 +- .../contrib/lite/g3doc/tf_ops_compatibility.md | 14 +- tensorflow/contrib/lite/java/ovic/README.md | 4 +- .../kernels/internal/reference/reference_ops.h | 4 +- tensorflow/contrib/lite/python/interpreter.py | 2 +- .../interpreter_wrapper/interpreter_wrapper.cc | 9 +- .../interpreter_wrapper/interpreter_wrapper.h | 3 +- tensorflow/contrib/lite/python/lite.py | 11 + tensorflow/contrib/lite/toco/import_tensorflow.cc | 2 +- tensorflow/contrib/lite/toco/toco_port.cc | 6 + tensorflow/contrib/lite/toco/toco_port.h | 18 + tensorflow/contrib/makefile/compile_nsync.sh | 2 +- .../contrib/makefile/download_dependencies.sh | 4 +- .../contrib/metrics/python/ops/metric_ops.py | 2 +- tensorflow/contrib/mpi_collectives/kernels/ring.h | 2 +- .../contrib/opt/python/training/adamax_test.py | 6 +- .../opt/python/training/model_average_optimizer.py | 2 +- tensorflow/contrib/periodic_resample/BUILD | 20 +- .../kernels/periodic_resample_op.cc | 5 + .../kernels/periodic_resample_op.h | 415 +++++++++++++++------ .../contrib/periodic_resample/ops/array_ops.cc | 53 ++- .../periodic_resample/ops/array_ops_test.cc | 41 ++ .../kernel_tests/periodic_resample_op_test.py | 27 +- .../python/ops/periodic_resample_op.py | 8 +- .../predictor/contrib_estimator_predictor.py | 5 +- .../contrib/predictor/core_estimator_predictor.py | 5 +- .../contrib/predictor/predictor_factories.py | 24 +- .../contrib/predictor/predictor_factories_test.py | 19 + .../contrib/predictor/saved_model_predictor.py | 6 +- tensorflow/contrib/quantize/README.md | 2 +- .../contrib/slim/python/slim/evaluation_test.py | 25 +- tensorflow/contrib/summary/summary.py | 5 +- .../contrib/tensor_forest/client/eval_metrics.py | 45 +-- .../contrib/tensor_forest/python/tensor_forest.py | 34 +- .../tensor_forest/python/tensor_forest_test.py | 45 +++ .../contrib/tensorrt/convert/convert_graph.cc | 66 ++-- .../contrib/tensorrt/convert/convert_nodes.cc | 97 +++-- tensorflow/contrib/tpu/python/tpu/datasets.py | 16 +- tensorflow/contrib/tpu/python/tpu/datasets_test.py | 26 ++ tensorflow/core/BUILD | 9 +- .../core/api_def/base_api/api_def_Selu.pbtxt | 4 + .../api_def/base_api/api_def_StringSplitV2.pbtxt | 48 +++ .../api_def/python_api/api_def_StringSplitV2.pbtxt | 4 + tensorflow/core/common_runtime/bfc_allocator.cc | 8 +- tensorflow/core/common_runtime/bfc_allocator.h | 3 +- .../direct_session_with_tracking_alloc_test.cc | 16 + .../common_runtime/mkl_threadpool_device_test.cc | 53 +++ tensorflow/core/common_runtime/process_util.cc | 11 +- .../core/common_runtime/threadpool_device.cc | 25 +- .../rpc/grpc_master_service_impl.cc | 4 +- .../core/distributed_runtime/rpc/grpc_testlib.cc | 10 +- tensorflow/core/framework/allocator.h | 5 - tensorflow/core/framework/op_gen_lib.cc | 1 + .../remote_fused_graph_execute_info.proto | 2 +- tensorflow/core/framework/tensor_test.cc | 24 +- tensorflow/core/graph/mkl_layout_pass.cc | 148 +++++++- tensorflow/core/graph/mkl_layout_pass_test.cc | 31 ++ tensorflow/core/grappler/costs/graph_properties.cc | 1 - tensorflow/core/grappler/optimizers/BUILD | 2 +- tensorflow/core/grappler/optimizers/remapper.cc | 4 +- tensorflow/core/kernels/as_string_op.cc | 2 + tensorflow/core/kernels/cwise_op_clip.cc | 43 +-- .../core/kernels/dense_update_functor_gpu.cu.cc | 1 + tensorflow/core/kernels/gather_functor.cc | 1 + tensorflow/core/kernels/gather_functor_gpu.cu.cc | 1 + tensorflow/core/kernels/gather_nd_op.cc | 4 + tensorflow/core/kernels/gather_nd_op_gpu.cu.cc | 2 + tensorflow/core/kernels/gather_op.cc | 1 + tensorflow/core/kernels/mkl_concat_op.cc | 213 ++++++++--- tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc | 2 + tensorflow/core/kernels/mkl_pooling_ops_common.h | 6 +- tensorflow/core/kernels/scatter_nd_op.cc | 4 + tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc | 1 + .../core/kernels/scoped_allocator_ops_test.cc | 9 +- tensorflow/core/kernels/segment_reduction_ops.h | 10 +- tensorflow/core/kernels/sparse_matmul_op.cc | 2 +- tensorflow/core/kernels/string_split_op.cc | 130 +++++++ tensorflow/core/ops/candidate_sampling_ops.cc | 5 +- tensorflow/core/ops/dataset_ops.cc | 24 +- tensorflow/core/ops/image_ops.cc | 4 +- tensorflow/core/ops/math_ops.cc | 2 +- tensorflow/core/ops/nn_ops.cc | 1 + tensorflow/core/ops/string_ops.cc | 20 +- tensorflow/core/platform/cpu_info.cc | 23 ++ tensorflow/core/platform/cpu_info.h | 7 + tensorflow/core/platform/default/build_config.bzl | 2 + .../core/platform/hadoop/hadoop_file_system.cc | 21 +- tensorflow/core/platform/posix/port.cc | 5 + tensorflow/core/public/version.h | 4 +- tensorflow/core/util/mkl_util.h | 50 ++- tensorflow/docs_src/community/groups.md | 29 +- tensorflow/docs_src/get_started/eager.md | 2 +- tensorflow/docs_src/get_started/index.md | 4 +- tensorflow/docs_src/install/install_c.md | 2 +- tensorflow/docs_src/install/install_go.md | 2 +- tensorflow/docs_src/install/install_java.md | 24 +- tensorflow/docs_src/install/install_linux.md | 24 +- tensorflow/docs_src/install/install_mac.md | 10 +- tensorflow/docs_src/install/install_sources.md | 17 +- tensorflow/docs_src/mobile/linking_libs.md | 2 +- tensorflow/docs_src/mobile/prepare_models.md | 4 +- tensorflow/docs_src/performance/quantization.md | 2 +- .../docs_src/programmers_guide/estimators.md | 19 +- .../docs_src/programmers_guide/feature_columns.md | 4 +- tensorflow/examples/learn/iris.py | 7 +- tensorflow/go/op/wrappers.go | 12 +- tensorflow/java/src/gen/cc/op_generator.cc | 11 +- tensorflow/java/src/gen/cc/op_specs.cc | 1 + tensorflow/python/eager/backprop.py | 4 +- tensorflow/python/estimator/BUILD | 5 +- tensorflow/python/estimator/exporter.py | 4 +- tensorflow/python/estimator/inputs/numpy_io.py | 8 +- .../python/estimator/inputs/numpy_io_test.py | 5 +- tensorflow/python/estimator/inputs/pandas_io.py | 7 +- .../python/estimator/inputs/pandas_io_test.py | 5 +- .../estimator/inputs/queues/feeding_functions.py | 2 +- tensorflow/python/estimator/keras.py | 4 +- tensorflow/python/estimator/keras_test.py | 14 +- .../python/grappler/layout_optimizer_test.py | 4 +- tensorflow/python/keras/activations.py | 2 + tensorflow/python/keras/callbacks.py | 21 +- tensorflow/python/keras/callbacks_test.py | 2 + tensorflow/python/keras/engine/network.py | 2 +- tensorflow/python/keras/engine/saving_test.py | 4 +- tensorflow/python/keras/engine/training.py | 7 +- tensorflow/python/keras/engine/training_eager.py | 2 +- tensorflow/python/keras/initializers_test.py | 26 +- tensorflow/python/keras/layers/core.py | 26 +- tensorflow/python/keras/models_test.py | 14 + .../python/kernel_tests/as_string_op_test.py | 10 + tensorflow/python/kernel_tests/betainc_op_test.py | 4 +- tensorflow/python/kernel_tests/clip_ops_test.py | 13 + tensorflow/python/kernel_tests/conv_ops_test.py | 32 +- .../python/kernel_tests/gather_nd_op_test.py | 32 +- tensorflow/python/kernel_tests/gather_op_test.py | 20 +- tensorflow/python/kernel_tests/init_ops_test.py | 27 ++ tensorflow/python/kernel_tests/pooling_ops_test.py | 4 +- tensorflow/python/kernel_tests/py_func_test.py | 31 +- .../python/kernel_tests/scatter_nd_ops_test.py | 6 +- tensorflow/python/kernel_tests/scatter_ops_test.py | 14 +- .../kernel_tests/segment_reduction_ops_test.py | 4 +- .../python/kernel_tests/string_split_op_test.py | 96 +++++ tensorflow/python/ops/array_ops.py | 4 + tensorflow/python/ops/gradient_checker.py | 8 +- tensorflow/python/ops/image_ops_impl.py | 74 ++-- tensorflow/python/ops/image_ops_test.py | 261 ++++++++++--- tensorflow/python/ops/init_ops.py | 3 +- tensorflow/python/ops/logging_ops.py | 5 +- tensorflow/python/ops/math_ops.py | 28 +- tensorflow/python/ops/nn_impl.py | 5 +- tensorflow/python/ops/nn_ops.py | 4 +- tensorflow/python/ops/nn_test.py | 10 + tensorflow/python/ops/script_ops.py | 35 +- tensorflow/python/ops/sparse_ops.py | 4 + tensorflow/python/ops/string_ops.py | 53 +++ tensorflow/python/ops/variable_scope.py | 21 +- .../python/tools/import_pb_to_tensorboard.py | 0 tensorflow/tensorflow.bzl | 2 +- .../tools/api/generator/create_python_api.py | 8 +- tensorflow/tools/api/golden/tensorflow.image.pbtxt | 2 +- tensorflow/tools/api/golden/tensorflow.pbtxt | 4 + .../tools/api/golden/tensorflow.strings.pbtxt | 4 + tensorflow/tools/ci_build/builds/pip.sh | 4 + .../tools/ci_build/builds/with_the_same_user | 2 +- tensorflow/tools/ci_build/ci_build.sh | 7 + tensorflow/tools/ci_build/copy_binary.py | 3 +- .../tools/ci_build/install/install_pip_packages.sh | 4 + .../install/install_python3.5_pip_packages.sh | 4 +- .../install/install_python3.6_pip_packages.sh | 5 +- .../tools/ci_build/linux/mkl/basic-mkl-test.sh | 29 ++ tensorflow/tools/ci_build/pi/build_raspberry_pi.sh | 8 +- .../def_file_filter/def_file_filter_configure.bzl | 6 +- tensorflow/tools/dist_test/local_test.sh | 12 +- tensorflow/tools/dist_test/remote_test.sh | 11 +- tensorflow/tools/docker/Dockerfile.devel | 2 +- tensorflow/tools/docker/Dockerfile.devel-cpu-mkl | 2 +- tensorflow/tools/docker/Dockerfile.devel-gpu | 6 +- tensorflow/tools/docker/Dockerfile.gpu | 2 +- tensorflow/tools/pip_package/BUILD | 1 + tensorflow/tools/pip_package/build_pip_package.sh | 160 +++++--- tensorflow/tools/pip_package/setup.py | 3 +- .../proto_text/gen_proto_text_functions_lib.cc | 3 + .../tools/quantization/quantize_graph_test.py | 12 +- tensorflow/tools/test/upload_test_benchmarks.py | 1 - tensorflow/workspace.bzl | 40 +- third_party/eigen.BUILD | 1 + third_party/highwayhash.BUILD | 1 + third_party/jpeg/jpeg.BUILD | 2 + third_party/png.BUILD | 9 +- third_party/py/python_configure.bzl | 24 +- third_party/repo.bzl | 5 +- 232 files changed, 3343 insertions(+), 909 deletions(-) create mode 100644 tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.cc create mode 100644 tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.h create mode 100644 tensorflow/contrib/periodic_resample/ops/array_ops_test.cc create mode 100644 tensorflow/core/api_def/base_api/api_def_StringSplitV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_StringSplitV2.pbtxt create mode 100644 tensorflow/core/common_runtime/mkl_threadpool_device_test.cc mode change 100755 => 100644 tensorflow/python/tools/import_pb_to_tensorboard.py create mode 100755 tensorflow/tools/ci_build/linux/mkl/basic-mkl-test.sh (limited to 'configure.py') diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 8669c25c45..db4b1581ae 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -90,7 +90,7 @@ Bazel BUILD files also need to include a license section, e.g., Changes to TensorFlow C++ code should conform to [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html). -Use `clang-tidy` to check your C/C++ changes. To install clang-tidy on ubuntu:16.04, do: +Use `clang-tidy` to check your C/C++ changes. To install `clang-tidy` on ubuntu:16.04, do: ```bash apt-get install -y clang-tidy diff --git a/README.md b/README.md index 6fb4486d0d..63853137cf 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,7 @@ $ python 42 >>> sess.close() ``` +Learn more examples about how to do specific tasks in TensorFlow at the [tutorials page of tensorflow.org](https://www.tensorflow.org/tutorials/). ## Contribution guidelines diff --git a/RELEASE.md b/RELEASE.md index 84d9d52868..e09e9c6190 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,3 +1,62 @@ +# Release 1.9.0 + +## Major Features And Improvements +* Update tf.keras to the Keras 2.1.6 API. +* `tfe.Network` is deprecated. Please inherit from `tf.keras.Model`. +* Adding support of core feature columns and losses to gradient boosted trees estimators. +* The distributions.Bijector API supports broadcasting for Bijectors with new API changes. See [here](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/distributions/bijectors/Bijector) for more details. +* Layered variable names have changed in the following conditions: + * Using `tf.keras.layers` with custom variable scopes. + * Using `tf.layers` in a subclassed `tf.keras.Model` class. See [here](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/layers) for more details + +## Breaking Chances + * If you're opening empty variable scopes; replace `variable_scope`('', ...) by `variable_scope`(`tf.get_variable_scope()`, ...). + +## Bug Fixes and Other Changes +* `tf.data`: + * The `DatasetBase::DebugString()` method is now `const`. + * Added the `tf.contrib.data.sample_from_datasets()` API for randomly sampling from multiple datasets. +* Eager Execution: +* `tf.keras`: + * Move Keras code out of _impl folder and remove API files. + * `tf.keras.Model.save_weights` now saves in TensorFlow format by default. + * Enable dataset iterators to be passed to `tf.keras.Model` training/eval methods. +* Accelerated Linear Algebra (XLA): +* TensorFlow Debugger (tfdbg): fix an issue in which the TensorBoard Debugger Plugin could not handle total source file size exceeding gRPC message size limit (4 MB). +* `tf.contrib`: + * Add `tf.contrib.data.choose_from_datasets()`. + * `tf.contrib.data.make_csv_dataset()` now supports line breaks in quoted strings. Two arguments were removed from `make_csv_dataset`. + * `tf.contrib.framework.zero_initializer` supports ResourceVariable. + * Adding "constrained_optimization" to tensorflow/contrib. +* Other: + * Add GCS Configuration Ops. + * Changing signature of `MakeIterator` to enable propagating error status. + * KL divergence for two Dirichlet distributions. + * More consistent GcsFileSystem behavior for certain reads past EOF. + * Update benchmark for tf.scan to match ranges across eager and graph modes. + * Fixed bug in `tf.reduce_prod gradient` for complex dtypes. + * Add optional `args` argument to `Dataset.from_generator()`. + * Allow the use of '.' in variables (e.g. "hparams.parse('a.b=1.0')"), which would previously raise an error. This will correspond to an attribute name with an embedded '.' symbol (e.g. 'a.b'), which can only be accessed indirectly (e.g. through getattr and setattr). To set this up the user will first need to explicitly add the variable to the hparam object (e.g. "hparams.add_hparam(name='a.b', value=0.0)"). + * Benchmark for tf.scan in graph and eager modes. + * Added complex128 support to FFT, FFT2D, FFT3D, IFFT, IFFT2D, and IFFT3D. + * Making ids unique in `nn.embedding_lookup_sparse`. This helps to reduce RPC calls for looking up the embeddings when there are repeated ids in the batch. + * Support indicator column in boosted trees. + * Prevent `tf.gradients()` from backpropagating through integer tensors. + * LinearOperator[1D,2D,3D]Circulant added to `tensorflow.linalg`. + * Conv3D, Conv3DBackpropInput, Conv3DBackpropFilter now supports arbitrary. + * Added `tf.train.Checkpoint` for reading/writing object-based checkpoints. + * `Dataset.list_files()` now produces determinstic results when `shuffle=False` or a `seed` is passed. + * Added LinearOperatorKronecker, a dense-free implementation of the Kronecker Product. + * Allow LinearOperator to broadcast. + * SavedModelBuilder will now deduplicate asset names that point to files with the same basename and the same contents. Note that this may result in new asset files included in SavedModels in cases where assets with the same name but different contents were previously overwriting each other. + + +## Thanks to our Contributors + +This release contains contributions from many people at Google, as well as: + +Abdullah Alrasheed, Achal Shah, Ad-530, ADiegoCAlonso, Aditya Yogi, Ag Ramesh, akindyakov, Andy Kernahan, Anya Petrova, Aurelien Geron, Ben, Ben Barsdell, Bhavani-Subramanian, braincodercn, Brett Koonce, Brian Nemsick, Brian Zier, Bryan Heden, candy.dc, cclauss, Clayne Robison, ctiijima, Dalmo Cirne, David Norman, David T.H. Kao, DosLin, ekelsen, Elson Rodriguez, Erik Smistad, Felix Abecassis, Fergal Cotter, fo40225, foo0x29a, Freedom" Koan-Sin Tan, FréDéRic Branchaud-Charron, gdh1995, Geoffrey Irving, Giuseppe, gracehoney, Guido Zuidhof, Guillaume Klein, Guozhong Zhuang, Haggai, Harald Husum, imsheridan, Ivan Zhang, Jan Zikes, Jayaram Bobba, Jesse Benson, Jesse Gumz, Jiajia Li, Jie, jinghuangintel, Jingwen, jjsjann123, Joe Yearsley, Joel Hestness, Joel Shor, josephyearsley, Junpeng Lao, Karol M. Langner, Kb Sriram, krantideep95, Krish Ravindranath, Letian Feng, Loo Rong Jie, Lukas Geiger, Maciej, Mahmoud Abuzaina, ManHyuk, Mark Ryan, mbhuiyan, Michal Turek, Mostafa Alaa, Myungsung Kwak, Nand Dalal, Nehal J Wani, Neil Tenenholtz, ngc92, Nicholas Nadeau, P.Eng., Avs, Niranjan Hasabnis, P-Hidringer, Paul Van Eck, Peng Yu, Qing Zhao, Qingying Chen, Quanlong, Rajendra Arora, Rholais Lii, rmanyari, Robin Richtsfeld, Russell Klopfer, Sagi, Sam Sendelbach, Sandeep N Gupta, Sandip Giri, Sarah Edkins, Scott Tseng, Sdalbsoo, Sergii Khomenko, Seungwoo Choi (Biggie), Seyed Majid Azimi, Shaoning Zeng, shengfuintel, Siu Kei, Muk, Smit Shilu, soonson, Stefan Schweter, Sukhwan Kim, Sunitha Kambhampati, Taehoon Lee, tamimaddari82, Tang, Wenyi, Ted Chang, u2takey, Utkarsh Upadhyay, Vadim Markovtsev, voegtlel, Wai Hon Law, wangsiyu, Wenhao Hu, wenhao.hu, William D. Irons, Yan Facai (颜发才), Yanbo Liang, Yihong Wang, Yilei (Dolee) Yang, Yong Tang, Yuan (Terry) Tang + # Release 1.8.0 ## Major Features And Improvements @@ -404,14 +463,6 @@ answered questions, and were part of inspiring discussions. # Release 1.4.0 -## Major Features And Improvements -* `tf.keras` is now part of the core TensorFlow API. -* [`tf.data`](http://tensorflow.org/programmers_guide/datasets) is now part of - the core TensorFlow API. - * The API is now subject to backwards compatibility guarantees. - -# Release 1.4.0 - ## Major Features And Improvements * `tf.keras` is now part of the core TensorFlow API. * [`tf.data`](http://tensorflow.org/programmers_guide/datasets) is now part of diff --git a/configure.py b/configure.py index bde7af8c0e..ada342a50a 100644 --- a/configure.py +++ b/configure.py @@ -1397,6 +1397,10 @@ def set_grpc_build_flags(): write_to_bazelrc('build --define grpc_no_ares=true') +def set_build_strip_flag(): + write_to_bazelrc('build --strip=always') + + def set_windows_build_flags(): if is_windows(): # The non-monolithic build is not supported yet @@ -1519,6 +1523,7 @@ def main(): set_grpc_build_flags() set_cc_opt_flags(environ_cp) + set_build_strip_flag() set_windows_build_flags() if get_var( diff --git a/tensorflow/BUILD b/tensorflow/BUILD index a73c4ca3aa..6d134dbb80 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -475,7 +475,7 @@ tf_cc_shared_object( # excludes all but a subset of function names. # On MacOS, the linker does not support version_script, but has an # an "-exported_symbols_list" command. -z defs disallows undefined -# symbols in object files and -s strips the output. +# symbols in object files. tf_cc_shared_object( name = "libtensorflow.so", @@ -489,7 +489,6 @@ tf_cc_shared_object( "//tensorflow:windows_msvc": [], "//conditions:default": [ "-z defs", - "-s", "-Wl,--version-script", # This line must be directly followed by the version_script.lds file "$(location //tensorflow/c:version_script.lds)", ], @@ -515,7 +514,6 @@ tf_cc_shared_object( "//tensorflow:windows_msvc": [], "//conditions:default": [ "-z defs", - "-s", "-Wl,--version-script", # This line must be directly followed by the version_script.lds file "$(location //tensorflow:tf_version_script.lds)", ], diff --git a/tensorflow/c/generate-pc.sh b/tensorflow/c/generate-pc.sh index 02a6a58b61..7184ad68fb 100755 --- a/tensorflow/c/generate-pc.sh +++ b/tensorflow/c/generate-pc.sh @@ -15,10 +15,12 @@ # ============================================================================== TF_PREFIX='/usr/local' +LIBDIR='lib' usage() { echo "Usage: $0 OPTIONS" echo -e "-p, --prefix\tset installation prefix (default: /usr/local)" + echo -e "-l, --libdir\tset lib directory (default: lib)" echo -e "-v, --version\tset TensorFlow version" echo -e "-h, --help\tdisplay this message" } @@ -26,7 +28,7 @@ usage() { [ $# == 0 ] && usage && exit 0 # read the options -ARGS=$(getopt -o p:v:h --long prefix:,version:,help -n $0 -- "$@") +ARGS=$(getopt -o p:l:v:h --long prefix:,libdir:,version:,help -n $0 -- "$@") eval set -- "$ARGS" # extract options and their arguments into variables. @@ -38,6 +40,11 @@ while true ; do "") shift 2 ;; *) TF_PREFIX=$2 ; shift 2 ;; esac ;; + -l|--libdir) + case "$2" in + "") shift 2 ;; + *) LIBDIR=$2 ; shift 2 ;; + esac ;; -v|--version) case "$2" in "") shift 2 ;; @@ -55,7 +62,7 @@ echo "Generating pkgconfig file for TensorFlow $TF_VERSION in $TF_PREFIX" cat << EOF > tensorflow.pc prefix=${TF_PREFIX} exec_prefix=\${prefix} -libdir=\${exec_prefix}/lib +libdir=\${exec_prefix}/${LIBDIR} includedir=\${prefix}/include Name: TensorFlow diff --git a/tensorflow/cc/gradients/math_grad.cc b/tensorflow/cc/gradients/math_grad.cc index 52c177212a..35a01e0341 100644 --- a/tensorflow/cc/gradients/math_grad.cc +++ b/tensorflow/cc/gradients/math_grad.cc @@ -38,6 +38,7 @@ REGISTER_NO_GRADIENT_OP("NotEqual"); REGISTER_NO_GRADIENT_OP("LogicalAnd"); REGISTER_NO_GRADIENT_OP("LogicalOr"); REGISTER_NO_GRADIENT_OP("LogicalNot"); +REGISTER_NO_GRADIENT_OP("Floor"); // Conjugate helper function returns the conjugate of an Output if it // is complex valued. diff --git a/tensorflow/cc/gradients/nn_grad.cc b/tensorflow/cc/gradients/nn_grad.cc index 0cb3132e94..c73482d5f4 100644 --- a/tensorflow/cc/gradients/nn_grad.cc +++ b/tensorflow/cc/gradients/nn_grad.cc @@ -255,6 +255,53 @@ Status LRNGradHelper(const Scope& scope, const Operation& op, } REGISTER_GRADIENT_OP("LRN", LRNGradHelper); +Status SoftplusGradHelper(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, + std::vector* grad_outputs) { + auto dx = internal::SoftplusGrad(scope, grad_inputs[0], op.input(0)); + grad_outputs->push_back(dx); + return scope.status(); +} +REGISTER_GRADIENT_OP("Softplus", SoftplusGradHelper); + +Status SoftsignGradHelper(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, + std::vector* grad_outputs) { + auto dx = internal::SoftsignGrad(scope, grad_inputs[0], op.input(0)); + grad_outputs->push_back(dx); + return scope.status(); +} +REGISTER_GRADIENT_OP("Softsign", SoftsignGradHelper); + +Status FractionalAvgPoolGradHelper(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, + std::vector* grad_outputs) { + bool overlapping; + TF_RETURN_IF_ERROR( + GetNodeAttr(op.output(0).node()->attrs(), "overlapping", &overlapping)); + auto dx = internal::FractionalAvgPoolGrad( + scope, Shape(scope, op.input(0), Shape::OutType(DT_INT64)), + grad_inputs[0], op.output(1), op.output(2), + internal::FractionalAvgPoolGrad::Overlapping(overlapping)); + grad_outputs->push_back(dx); + return scope.status(); +} +REGISTER_GRADIENT_OP("FractionalAvgPool", FractionalAvgPoolGradHelper); + +Status FractionalMaxPoolGradHelper(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, + std::vector* grad_outputs) { + bool overlapping; + TF_RETURN_IF_ERROR( + GetNodeAttr(op.output(0).node()->attrs(), "overlapping", &overlapping)); + auto dx = internal::FractionalMaxPoolGrad( + scope, op.input(0), op.output(0), grad_inputs[0], op.output(1), + op.output(2), internal::FractionalMaxPoolGrad::Overlapping(overlapping)); + grad_outputs->push_back(dx); + return scope.status(); +} +REGISTER_GRADIENT_OP("FractionalMaxPool", FractionalMaxPoolGradHelper); + } // anonymous namespace } // namespace ops } // namespace tensorflow diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc index c4eba7ecb0..b4d457a9d1 100644 --- a/tensorflow/cc/gradients/nn_grad_test.cc +++ b/tensorflow/cc/gradients/nn_grad_test.cc @@ -28,6 +28,8 @@ namespace { using ops::BiasAdd; using ops::Conv2D; using ops::Elu; +using ops::FractionalAvgPool; +using ops::FractionalMaxPool; using ops::L2Loss; using ops::LogSoftmax; using ops::LRN; @@ -41,6 +43,8 @@ using ops::Relu; using ops::Relu6; using ops::Selu; using ops::Softmax; +using ops::Softplus; +using ops::Softsign; class NNGradTest : public ::testing::Test { protected: @@ -71,22 +75,30 @@ class NNGradTest : public ::testing::Test { EXPECT_LT(max_error, 1e-3); } - // Sets tensor with random values, ensuring that the max value is largest by - // a reasonable amount. - // This is an issue for MaxPool, MaxPoolV2 and MaxPool3D, in which - // perturbations by the numeric gradient computation in the gradient checker - // can change the max value if values are too close together. + // Sets tensor with random values, ensuring that every pair of elements are at + // least a reasonable amount apart. + // This is an issue for max pooling operations, in which perturbations by the + // numeric gradient computation in the gradient checker can change the max + // value if a pool has values that are too close together. template - void SetRandomValuesWithBumpedMax(Tensor* tensor) { + void SetRandomValuesForMaxPooling(Tensor* tensor) { auto tensor_flat = tensor->flat(); - tensor_flat.setRandom(); - int32 max_index = 0; - for (size_t i = 1; i < tensor->NumElements(); i++) { - if (tensor_flat(i) > tensor_flat(max_index)) { - max_index = i; - } + // First set the array to an increasing sequence of values spaced + // a reasonable amount apart + T cur = 0; + for (size_t i = 0; i < tensor->NumElements(); i++) { + tensor_flat(i) = cur; + cur += 5e-2; + } + // Fischer-Yates shuffle the array + for (size_t i = tensor->NumElements() - 1; i >= 1; i--) { + // j <- random integer 0 <= j <= i + size_t j = random::New64() % (i + 1); + // swap values at i, j + T tmp = tensor_flat(i); + tensor_flat(i) = tensor_flat(j); + tensor_flat(j) = tmp; } - tensor_flat(max_index) += 1e-2; } Scope scope_; @@ -189,7 +201,7 @@ TEST_F(NNGradTest, MaxPoolGradHelper) { const std::vector strides{1, 2, 2, 1}; auto y = MaxPool(scope_, x, ksize, strides, "VALID"); Tensor x_init_value = Tensor(DT_FLOAT, x_shape); - SetRandomValuesWithBumpedMax(&x_init_value); + SetRandomValuesForMaxPooling(&x_init_value); RunTest(x, x_init_value, y, y_shape); } @@ -202,7 +214,7 @@ TEST_F(NNGradTest, MaxPoolGradV2Helper) { Tensor strides = test::AsTensor({1, 2, 2, 1}, {4}); auto y = MaxPoolV2(scope_, x, ksize, strides, "VALID"); Tensor x_init_value = Tensor(DT_FLOAT, x_shape); - SetRandomValuesWithBumpedMax(&x_init_value); + SetRandomValuesForMaxPooling(&x_init_value); RunTest(x, x_init_value, y, y_shape); } @@ -215,7 +227,7 @@ TEST_F(NNGradTest, MaxPool3DGradHelper) { const std::vector strides{1, 3, 3, 3, 1}; auto y = MaxPool3D(scope_, x, ksize, strides, "VALID"); Tensor x_init_value = Tensor(DT_FLOAT, x_shape); - SetRandomValuesWithBumpedMax(&x_init_value); + SetRandomValuesForMaxPooling(&x_init_value); RunTest(x, x_init_value, y, y_shape); } @@ -248,5 +260,45 @@ TEST_F(NNGradTest, LRN){ RunTest(x, x_shape, y, x_shape); } +TEST_F(NNGradTest, SoftplusGrad) { + TensorShape shape({3, 7}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape)); + auto y = Softplus(scope_, x); + RunTest(x, shape, y, shape); +} + +TEST_F(NNGradTest, SoftsignGrad) { + TensorShape shape({3, 7}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape)); + auto y = Softsign(scope_, x); + RunTest(x, shape, y, shape); +} + +TEST_F(NNGradTest, FractionalAvgPoolGradHelper) { + TensorShape x_shape({1, 3, 7, 1}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape)); + // Force consistent pooling regions for unit testing. + auto y = FractionalAvgPool( + scope_, x, {1, 1.2, 1.9, 1}, + FractionalAvgPool::Deterministic(true).Overlapping(true).Seed(1).Seed2( + 2)); + TensorShape y_shape({1, 2, 3, 1}); + RunTest(x, x_shape, y.output, y_shape); +} + +TEST_F(NNGradTest, FractionalMaxPoolGradHelper) { + TensorShape x_shape({1, 3, 7, 1}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape)); + // Force consistent pooling regions for unit testing. + auto y = FractionalMaxPool( + scope_, x, {1, 1.2, 1.9, 1}, + FractionalMaxPool::Deterministic(true).Overlapping(true).Seed(1).Seed2( + 2)); + Tensor x_init_value = Tensor(DT_FLOAT, x_shape); + SetRandomValuesForMaxPooling(&x_init_value); + TensorShape y_shape({1, 2, 3, 1}); + RunTest(x, x_init_value, y.output, y_shape); +} + } // namespace } // namespace tensorflow diff --git a/tensorflow/compiler/aot/codegen_test_h.golden b/tensorflow/compiler/aot/codegen_test_h.golden index 6e050cf564..6641d45e83 100644 --- a/tensorflow/compiler/aot/codegen_test_h.golden +++ b/tensorflow/compiler/aot/codegen_test_h.golden @@ -56,9 +56,9 @@ namespace bar { // // Memory stats: // arg bytes total: 104 -// arg bytes aligned: 128 +// arg bytes aligned: 192 // temp bytes total: 126 -// temp bytes aligned: 224 +// temp bytes aligned: 320 class MyClass : public tensorflow::XlaCompiledCpuFunction { public: // Number of input arguments for the compiled computation. diff --git a/tensorflow/compiler/aot/embedded_protocol_buffers.h b/tensorflow/compiler/aot/embedded_protocol_buffers.h index ebfe4806c2..4e194a6aba 100644 --- a/tensorflow/compiler/aot/embedded_protocol_buffers.h +++ b/tensorflow/compiler/aot/embedded_protocol_buffers.h @@ -71,7 +71,7 @@ struct ProtobufToEmbed { const ::tensorflow::protobuf::MessageLite* message; }; -// Embeds a a sequence of protocol buffers into an object file. +// Embeds a sequence of protocol buffers into an object file. // // `target_triple` is the target triple for the target architecture for the // generated object file. diff --git a/tensorflow/compiler/aot/runtime.h b/tensorflow/compiler/aot/runtime.h index d085864f00..d1a669ceb1 100644 --- a/tensorflow/compiler/aot/runtime.h +++ b/tensorflow/compiler/aot/runtime.h @@ -25,8 +25,8 @@ namespace tensorflow { namespace tfcompile { namespace runtime { -// Align to 32-bytes, to mimic tensorflow::Allocator::kAllocatorAlignment. -static constexpr size_t kAlign = 32; +// Align to 64-bytes, to mimic tensorflow::Allocator::kAllocatorAlignment. +static constexpr size_t kAlign = 64; // aligned_buffer_bytes returns the sum of each size in `sizes`, skipping -1 // values. There are `n` entries in `sizes`. Each buffer is aligned to kAlign diff --git a/tensorflow/compiler/aot/runtime_test.cc b/tensorflow/compiler/aot/runtime_test.cc index 6d603a02eb..06ec623eb2 100644 --- a/tensorflow/compiler/aot/runtime_test.cc +++ b/tensorflow/compiler/aot/runtime_test.cc @@ -24,7 +24,7 @@ namespace runtime { namespace { TEST(Runtime, AlignmentValue) { - // We've chosen 32 byte alignment for the tfcompile runtime to mimic the + // We've chosen 64 byte alignment for the tfcompile runtime to mimic the // regular tensorflow allocator, which was chosen to play nicely with Eigen. // The tfcompile runtime also has a requirement that comes from the xla // generated code, on the relation: buffer_size >= 16 ? 2 * sizeof(void*) : 8 @@ -39,13 +39,13 @@ TEST(Runtime, AlignedBufferBytes) { EXPECT_EQ(aligned_buffer_bytes(sizesA, 1), 0); static constexpr intptr_t sizesB[1] = {3}; - EXPECT_EQ(aligned_buffer_bytes(sizesB, 1), 32); + EXPECT_EQ(aligned_buffer_bytes(sizesB, 1), 64); static constexpr intptr_t sizesC[1] = {32}; - EXPECT_EQ(aligned_buffer_bytes(sizesC, 1), 32); + EXPECT_EQ(aligned_buffer_bytes(sizesC, 1), 64); static constexpr intptr_t sizesD[7] = {1, -1, 32, -1, 64, 2, 3}; - EXPECT_EQ(aligned_buffer_bytes(sizesD, 7), 192); + EXPECT_EQ(aligned_buffer_bytes(sizesD, 7), 320); } void* add_ptr(void* base, uintptr_t delta) { @@ -101,11 +101,11 @@ TEST(Runtime, MallocFreeContiguousBuffers) { EXPECT_NE(base, nullptr); EXPECT_EQ(bufD[0], add_ptr(base, 0)); EXPECT_EQ(bufD[1], nullptr); - EXPECT_EQ(bufD[2], add_ptr(base, 32)); + EXPECT_EQ(bufD[2], add_ptr(base, 64)); EXPECT_EQ(bufD[3], nullptr); - EXPECT_EQ(bufD[4], add_ptr(base, 64)); - EXPECT_EQ(bufD[5], add_ptr(base, 128)); - EXPECT_EQ(bufD[6], add_ptr(base, 160)); + EXPECT_EQ(bufD[4], add_ptr(base, 128)); + EXPECT_EQ(bufD[5], add_ptr(base, 192)); + EXPECT_EQ(bufD[6], add_ptr(base, 256)); for (int i = 0; i < 7; ++i) { const intptr_t size = sizesD[i]; if (size != -1) { diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index d82922a359..1067b38f93 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -178,6 +178,7 @@ cc_library( ":runtime_matmul", ":runtime_matmul_mkl", ":runtime_single_threaded_conv2d", + ":runtime_single_threaded_fft", ":runtime_single_threaded_matmul", "@llvm//:execution_engine", "@llvm//:core", @@ -516,7 +517,6 @@ cc_library( deps = [ "//tensorflow/compiler/xla:executable_run_options", "//tensorflow/compiler/xla:xla_data_proto", - "//tensorflow/core:framework", "//tensorflow/core:framework_lite", "//third_party/eigen3", ], @@ -578,6 +578,22 @@ cc_library( ], ) +cc_library( + name = "runtime_single_threaded_fft", + srcs = [ + "runtime_fft_impl.h", + "runtime_single_threaded_fft.cc", + ], + hdrs = ["runtime_single_threaded_fft.h"], + copts = runtime_copts(), + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/core:framework_lite", + "//third_party/eigen3", + ], +) + cc_library( name = "runtime_single_threaded_matmul", srcs = ["runtime_single_threaded_matmul.cc"], diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc b/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc index 215405f680..54c52bc08f 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc @@ -51,6 +51,8 @@ extern const char* const kEigenConvF16SymbolName = extern const char* const kEigenConvF32SymbolName = "__xla_cpu_runtime_EigenConvF32"; extern const char* const kEigenFftSymbolName = "__xla_cpu_runtime_EigenFft"; +extern const char* const kEigenSingleThreadedFftSymbolName = + "__xla_cpu_runtime_EigenSingleThreadedFft"; extern const char* const kEigenSingleThreadedMatMulF16SymbolName = "__xla_cpu_runtime_EigenSingleThreadedMatMulF16"; extern const char* const kEigenSingleThreadedMatMulF32SymbolName = diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime.h b/tensorflow/compiler/xla/service/cpu/cpu_runtime.h index 1dce6efa5c..aa0e967123 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_runtime.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime.h @@ -52,6 +52,7 @@ extern const char* const kMKLSingleThreadedMatMulF64SymbolName; extern const char* const kEigenConvF16SymbolName; extern const char* const kEigenConvF32SymbolName; extern const char* const kEigenFftSymbolName; +extern const char* const kEigenSingleThreadedFftSymbolName; extern const char* const kEigenSingleThreadedMatMulF16SymbolName; extern const char* const kEigenSingleThreadedMatMulF32SymbolName; extern const char* const kEigenSingleThreadedMatMulF64SymbolName; diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index 2c20be155f..758b8c62b4 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -1172,7 +1172,13 @@ Status IrEmitter::HandleFft(HloInstruction* fft) { {int8_ptr_type, int8_ptr_type, int8_ptr_type, int32_type, int32_type, int64_type, int64_type, int64_type, int64_type}, /*isVarArg=*/false); - const char* fn_name = runtime::kEigenFftSymbolName; + + bool multi_threaded_eigen = + hlo_module_config_.debug_options().xla_cpu_multi_thread_eigen(); + const char* fn_name = multi_threaded_eigen + ? runtime::kEigenFftSymbolName + : runtime::kEigenSingleThreadedFftSymbolName; + llvm::Function* fft_func = llvm::cast( module_->getOrInsertFunction(fn_name, fft_type)); fft_func->setCallingConv(llvm::CallingConv::C); diff --git a/tensorflow/compiler/xla/service/cpu/runtime_fft_impl.h b/tensorflow/compiler/xla/service/cpu/runtime_fft_impl.h index 984cb0616e..0bf693edd0 100644 --- a/tensorflow/compiler/xla/service/cpu/runtime_fft_impl.h +++ b/tensorflow/compiler/xla/service/cpu/runtime_fft_impl.h @@ -21,8 +21,6 @@ limitations under the License. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/framework/numeric_types.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/platform/types.h" // 'tensorflow' namespace is used so that int64 and other types don't require @@ -71,11 +69,9 @@ void EigenFftR2C(const EigenDevice& device, complex64* out, float* operand, in_dims[0] = input_batch; Eigen::DSizes out_dims; out_dims[0] = input_batch; - TensorShape temp_shape{input_batch}; for (int i = 0; i < FFTRank; i++) { in_dims[i + 1] = fft_shape[i]; out_dims[i + 1] = i == FFTRank - 1 ? fft_shape[i] / 2 + 1 : fft_shape[i]; - temp_shape.AddDim(fft_shape[i]); } const Eigen::TensorMap, Eigen::Aligned> @@ -88,8 +84,8 @@ void EigenFftR2C(const EigenDevice& device, complex64* out, float* operand, const auto axes = Eigen::ArrayXi::LinSpaced(FFTRank, 1, FFTRank); // Compute the full FFT using a temporary tensor. - Tensor temp(DataTypeToEnum::v(), temp_shape); - auto full_fft = temp.flat_inner_dims(); + Eigen::Tensor full_fft(in_dims); + const Eigen::DSizes zero_start_indices; full_fft.device(device) = input.template fft(axes); @@ -112,11 +108,9 @@ void EigenFftC2R(const EigenDevice& device, float* out, complex64* operand, in_dims[0] = input_batch; Eigen::DSizes out_dims; out_dims[0] = input_batch; - TensorShape temp_shape{input_batch}; for (int i = 0; i < FFTRank; i++) { in_dims[i + 1] = i == FFTRank - 1 ? fft_shape[i] / 2 + 1 : fft_shape[i]; out_dims[i + 1] = fft_shape[i]; - temp_shape.AddDim(fft_shape[i]); } const Eigen::TensorMap, Eigen::Aligned> @@ -129,8 +123,7 @@ void EigenFftC2R(const EigenDevice& device, float* out, complex64* operand, // region we will slice from input given fft_shape. We slice input to // fft_shape on its inner-most dimensions, except the last (which we // slice to fft_shape[-1] / 2 + 1). - Tensor temp(DataTypeToEnum::v(), temp_shape); - auto full_fft = temp.flat_inner_dims(); + Eigen::Tensor full_fft(out_dims); // Calculate the starting point and range of the source of // negative frequency part. @@ -179,7 +172,6 @@ template void EigenFftWithRank(const EigenDevice& device, void* out, void* operand, int32 fft_type, int64 input_batch, int64 fft_length0, int64 fft_length1, int64 fft_length2) { - CHECK(::xla::FftType_IsValid(fft_type)) << fft_type; switch (fft_type) { case ::xla::FftType::FFT: EigenFftC2C( @@ -204,7 +196,8 @@ void EigenFftWithRank(const EigenDevice& device, void* out, void* operand, input_batch, fft_length0, fft_length1, fft_length2); break; default: - LOG(FATAL) << "Unsupported FFT type: " << fft_type; + // Unsupported FFT type + abort(); } } @@ -230,7 +223,8 @@ void EigenFftImpl(const EigenDevice& device, void* out, void* operand, fft_length1, fft_length2); break; default: - LOG(FATAL) << "Unsupported FFT rank " << fft_rank; + // Unsupported FFT rank + abort(); } } diff --git a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.cc b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.cc new file mode 100644 index 0000000000..2613ddb127 --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.cc @@ -0,0 +1,32 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.h" + +#include "tensorflow/compiler/xla/service/cpu/runtime_fft_impl.h" +#include "tensorflow/core/platform/dynamic_annotations.h" +#include "tensorflow/core/platform/types.h" + +using tensorflow::int32; +using tensorflow::int64; + +TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenSingleThreadedFft( + const void* run_options_ptr, void* out, void* operand, int32 fft_type, + int32 fft_rank, int64 input_batch, int64 fft_length0, int64 fft_length1, + int64 fft_length2) { + tensorflow::xla::EigenFftImpl(Eigen::DefaultDevice(), out, operand, fft_type, + fft_rank, input_batch, fft_length0, fft_length1, + fft_length2); +} diff --git a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.h b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.h new file mode 100644 index 0000000000..dcd133d012 --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.h @@ -0,0 +1,31 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_SINGLE_THREADED_FFT_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_SINGLE_THREADED_FFT_H_ + +#include "tensorflow/core/platform/types.h" + +extern "C" { + +extern void __xla_cpu_runtime_EigenSingleThreadedFft( + const void* /* xla::ExecutableRunOptions* */ run_options_ptr, void* out, + void* operand, tensorflow::int32 fft_type, tensorflow::int32 fft_rank, + tensorflow::int64 input_batch, tensorflow::int64 fft_length0, + tensorflow::int64 fft_length1, tensorflow::int64 fft_length2); + +} // extern "C" + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_SINGLE_THREADED_FFT_H_ diff --git a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc index 8d8c5e4c44..c4c90515ac 100644 --- a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc +++ b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc @@ -38,6 +38,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/cpu/runtime_matmul.h" #include "tensorflow/compiler/xla/service/cpu/runtime_matmul_mkl.h" #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.h" +#include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.h" #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.h" #include "tensorflow/compiler/xla/service/cpu/windows_compatibility.h" #include "tensorflow/compiler/xla/types.h" @@ -202,6 +203,7 @@ bool RegisterKnownJITSymbols() { REGISTER_CPU_RUNTIME_SYMBOL(MKLSingleThreadedMatMulF64); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF16); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF32); + REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedFft); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF16); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF32); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF64); diff --git a/tensorflow/compiler/xla/service/pattern_matcher.h b/tensorflow/compiler/xla/service/pattern_matcher.h index d3bc47e61e..2515222cf2 100644 --- a/tensorflow/compiler/xla/service/pattern_matcher.h +++ b/tensorflow/compiler/xla/service/pattern_matcher.h @@ -204,7 +204,7 @@ class LayoutPattern { // Modifies the pattern to match only if the layout equals the given proto. // The layout must outlive the returned pattern. constexpr LayoutPattern> EqualTo( - const Layout* layout) const { + const ::xla::Layout* layout) const { return LayoutPattern>( LayoutPatternEqualImpl(impl_, layout), matched_layout_); } diff --git a/tensorflow/compiler/xla/service/tuple_simplifier.cc b/tensorflow/compiler/xla/service/tuple_simplifier.cc index e536c8afbf..77bdcc9de0 100644 --- a/tensorflow/compiler/xla/service/tuple_simplifier.cc +++ b/tensorflow/compiler/xla/service/tuple_simplifier.cc @@ -30,10 +30,17 @@ limitations under the License. namespace xla { +TupleSimplifier::TupleSimplifier(bool exclude_entry_computation) : + exclude_entry_computation_(exclude_entry_computation) {} + StatusOr TupleSimplifier::Run(HloModule* module) { // Initially add all GTE and Tuple instructions to the worklist. std::queue worklist; for (auto* computation : module->computations()) { + if (exclude_entry_computation_ && + computation == module->entry_computation()) { + continue; + } for (auto* instruction : computation->instructions()) { if (instruction->opcode() == HloOpcode::kTuple || instruction->opcode() == HloOpcode::kGetTupleElement) { diff --git a/tensorflow/compiler/xla/service/tuple_simplifier.h b/tensorflow/compiler/xla/service/tuple_simplifier.h index e5e9b10b5b..7509501883 100644 --- a/tensorflow/compiler/xla/service/tuple_simplifier.h +++ b/tensorflow/compiler/xla/service/tuple_simplifier.h @@ -27,13 +27,20 @@ namespace xla { // the module. class TupleSimplifier : public HloPassInterface { public: - TupleSimplifier() {} + TupleSimplifier() : TupleSimplifier(/*exclude_entry_computation=*/false) {} + explicit TupleSimplifier(bool exclude_entry_computation); ~TupleSimplifier() override {} tensorflow::StringPiece name() const override { return "tuple-simplifier"; } // Run tuple simplification on the given computation. Returns whether the // computation was changed. StatusOr Run(HloModule* module) override; + + private: + // When set, this pipeline stage will perform optimization of all computations + // apart from the module's entry computation. This is used by Graphcore's + // backend. + bool exclude_entry_computation_; }; } // namespace xla diff --git a/tensorflow/compiler/xla/service/tuple_simplifier_test.cc b/tensorflow/compiler/xla/service/tuple_simplifier_test.cc index ca9ae91281..d3635eae81 100644 --- a/tensorflow/compiler/xla/service/tuple_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/tuple_simplifier_test.cc @@ -42,6 +42,12 @@ class TupleSimplifierTest : public HloTestBase { TF_ASSERT_OK(changed_status.status()); EXPECT_EQ(change_expected, changed_status.ValueOrDie()); } + void Run(HloModule* module, bool change_expected, bool exclude_entry) { + TupleSimplifier simplifier(exclude_entry); + auto changed_status = simplifier.Run(module); + TF_ASSERT_OK(changed_status.status()); + EXPECT_EQ(change_expected, changed_status.ValueOrDie()); + } const Shape scalar_shape_ = ShapeUtil::MakeShape(F32, {}); const Shape tuple_shape_ = ShapeUtil::MakeTupleShape( @@ -211,5 +217,76 @@ TEST_F(TupleSimplifierTest, IncompatibleTuples) { EXPECT_THAT(computation->root_instruction(), tuple); } +TEST_F(TupleSimplifierTest, CanExcludeEntryComputation) { + // Verify that the root computation can be excluded + auto module = CreateNewModule(); + + HloInstruction* p0; + HloInstruction* p1; + HloComputation* c0; + HloComputation* c1; + HloComputation* entry; + + { + HloComputation::Builder builder(TestName() + "_1"); + p0 = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape_, "param")); + HloInstruction* gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, p0, 0)); + HloInstruction* gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, p0, 1)); + HloInstruction* gte2 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, p0, 2)); + + builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1, gte2})); + + c0 = module->AddEmbeddedComputation(builder.Build()); + } + { + HloComputation::Builder builder(TestName() + "_2"); + p1 = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape_, "param")); + HloInstruction* gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, p1, 0)); + HloInstruction* gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, p1, 1)); + HloInstruction* gte2 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, p1, 2)); + + builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1, gte2})); + + c1 = module->AddEmbeddedComputation(builder.Build()); + } + { + HloComputation::Builder builder(TestName() + "_Entry"); + HloInstruction* tuple_param = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape_, "param")); + HloInstruction* call0 = builder.AddInstruction( + HloInstruction::CreateCall(tuple_shape_, {tuple_param}, c0)); + HloInstruction* call1 = builder.AddInstruction( + HloInstruction::CreateCall(tuple_shape_, {tuple_param}, c1)); + HloInstruction* gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, call0, 0)); + HloInstruction* gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, call1, 1)); + HloInstruction* tuple0 = + builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1})); + HloInstruction* gte2 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, tuple0, 0)); + HloInstruction* gte3 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, tuple0, 1)); + + builder.AddInstruction(HloInstruction::CreateTuple({gte2, gte3})); + + entry = module->AddEntryComputation(builder.Build()); + } + + Run(module.get(), /*change_expected=*/true, /*exclude_entry=*/ true); + + EXPECT_THAT(c0->root_instruction(), p0); + EXPECT_THAT(c1->root_instruction(), p1); + EXPECT_THAT(entry->instruction_count(), 9); +} + } // namespace } // namespace xla diff --git a/tensorflow/contrib/autograph/__init__.py b/tensorflow/contrib/autograph/__init__.py index 637e49c082..dbdbad8f4c 100644 --- a/tensorflow/contrib/autograph/__init__.py +++ b/tensorflow/contrib/autograph/__init__.py @@ -23,6 +23,7 @@ from __future__ import print_function # TODO(mdan): Bring only the relevant symbols to the top level. from tensorflow.contrib.autograph import utils +from tensorflow.contrib.autograph import operators from tensorflow.contrib.autograph.impl.api import convert from tensorflow.contrib.autograph.impl.api import converted_call from tensorflow.contrib.autograph.impl.api import do_not_convert @@ -43,6 +44,8 @@ _allowed_symbols = [ 'do_not_convert', 'to_code', 'to_graph', + # Overloaded operators + 'operators', # Special functions and directives 'set_element_type', 'set_loop_options', diff --git a/tensorflow/contrib/cmake/tf_c.cmake b/tensorflow/contrib/cmake/tf_c.cmake index bda5e26f43..2e0a2fcef4 100644 --- a/tensorflow/contrib/cmake/tf_c.cmake +++ b/tensorflow/contrib/cmake/tf_c.cmake @@ -37,13 +37,15 @@ add_dependencies( tf_core_lib tf_protos_cc) -add_library(tf_c_python_api OBJECT - "${tensorflow_source_dir}/tensorflow/c/python_api.cc" - "${tensorflow_source_dir}/tensorflow/c/python_api.h" -) -add_dependencies( - tf_c_python_api - tf_c - tf_core_lib - tf_core_framework - tf_protos_cc) +if(tensorflow_BUILD_PYTHON_BINDINGS) + add_library(tf_c_python_api OBJECT + "${tensorflow_source_dir}/tensorflow/c/python_api.cc" + "${tensorflow_source_dir}/tensorflow/c/python_api.h" + ) + add_dependencies( + tf_c_python_api + tf_c + tf_core_lib + tf_core_framework + tf_protos_cc) +endif() diff --git a/tensorflow/contrib/cmake/tf_cc_ops.cmake b/tensorflow/contrib/cmake/tf_cc_ops.cmake index f73da0b8ab..6c90cf398c 100644 --- a/tensorflow/contrib/cmake/tf_cc_ops.cmake +++ b/tensorflow/contrib/cmake/tf_cc_ops.cmake @@ -155,7 +155,7 @@ if (WIN32) set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/pywrap_tensorflow_internal.lib") endif() else (WIN32) - set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/libpywrap_tensorflow_internal.so") + set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/libpywrap_tensorflow_internal${CMAKE_SHARED_LIBRARY_SUFFIX}") endif (WIN32) add_custom_target(tf_extension_ops) diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index a0c3ddd28b..9244604489 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -715,7 +715,7 @@ if(WIN32) endif() else() add_custom_command(TARGET pywrap_tensorflow_internal POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/libpywrap_tensorflow_internal.so + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/libpywrap_tensorflow_internal${CMAKE_SHARED_LIBRARY_SUFFIX} ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/_pywrap_tensorflow_internal.so) endif() @@ -832,7 +832,6 @@ add_custom_command(TARGET tf_python_build_pip_package POST_BUILD add_custom_command(TARGET tf_python_copy_scripts_to_destination PRE_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${tensorflow_source_dir}/tensorflow/contrib/testing/python/framework/util_test.py ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/testing/python/framework/) - add_custom_command(TARGET tf_python_build_pip_package POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${tensorflow_source_dir}/tensorflow/tools/pip_package/README ${CMAKE_CURRENT_BINARY_DIR}/tf_python/) diff --git a/tensorflow/contrib/cmake/tools/create_def_file.py b/tensorflow/contrib/cmake/tools/create_def_file.py index cffe069aa3..4f957f1e0b 100644 --- a/tensorflow/contrib/cmake/tools/create_def_file.py +++ b/tensorflow/contrib/cmake/tools/create_def_file.py @@ -44,7 +44,8 @@ UNDNAME = "undname.exe" DUMPBIN = "dumpbin.exe" # Exclude if matched -EXCLUDE_RE = re.compile(r"RTTI|deleting destructor|::internal::") +EXCLUDE_RE = re.compile(r"RTTI|deleting destructor|::internal::|Internal|" + r"python_op_gen_internal|grappler") # Include if matched before exclude INCLUDEPRE_RE = re.compile(r"google::protobuf::internal::ExplicitlyConstructed|" @@ -56,6 +57,10 @@ INCLUDEPRE_RE = re.compile(r"google::protobuf::internal::ExplicitlyConstructed|" r"tensorflow::ops::internal::Enter|" r"tensorflow::strings::internal::AppendPieces|" r"tensorflow::strings::internal::CatPieces|" + r"tensorflow::errors::Internal|" + r"tensorflow::Tensor::CopyFromInternal|" + r"tensorflow::kernel_factory::" + r"OpKernelRegistrar::InitInternal|" r"tensorflow::io::internal::JoinPathImpl") # Include if matched after exclude @@ -64,7 +69,7 @@ INCLUDE_RE = re.compile(r"^(TF_\w*)$|" r"tensorflow::|" r"functor::|" r"\?nsync_|" - r"perftools::gputools") + r"stream_executor::") # We want to identify data members explicitly in the DEF file, so that no one # can implicitly link against the DLL if they use one of the variables exported diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_bijector_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_bijector_test.py index 45760a29ee..795f1993ba 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_bijector_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_bijector_test.py @@ -151,16 +151,24 @@ class SinhArcsinhBijectorTest(test.TestCase): self.assertAllClose(y, bijector.forward(x).eval(), rtol=1e-4, atol=0.) self.assertAllClose(x, bijector.inverse(y).eval(), rtol=1e-4, atol=0.) - # Do the numpy calculation in float128 to avoid inf/nan. - y_float128 = np.float128(y) - self.assertAllClose( - np.log(np.cosh( - np.arcsinh(y_float128) / tailweight - skewness) / np.sqrt( - y_float128**2 + 1)) - - np.log(tailweight), - bijector.inverse_log_det_jacobian(y, event_ndims=0).eval(), - rtol=1e-4, - atol=0.) + # On IBM PPC systems, longdouble (np.float128) is same as double except that it can have more precision. + # Type double being of 8 bytes, can't hold square of max of float64 (which is also 8 bytes) and + # below test fails due to overflow error giving inf. So this check avoids that error by skipping square + # calculation and corresponding assert. + + if np.amax(y) <= np.sqrt(np.finfo(np.float128).max) and \ + np.fabs(np.amin(y)) <= np.sqrt(np.fabs(np.finfo(np.float128).min)): + + # Do the numpy calculation in float128 to avoid inf/nan. + y_float128 = np.float128(y) + self.assertAllClose( + np.log(np.cosh( + np.arcsinh(y_float128) / tailweight - skewness) / np.sqrt( + y_float128**2 + 1)) - + np.log(tailweight), + bijector.inverse_log_det_jacobian(y, event_ndims=0).eval(), + rtol=1e-4, + atol=0.) self.assertAllClose( -bijector.inverse_log_det_jacobian(y, event_ndims=0).eval(), bijector.forward_log_det_jacobian(x, event_ndims=0).eval(), diff --git a/tensorflow/contrib/eager/python/datasets.py b/tensorflow/contrib/eager/python/datasets.py index d7909dd5a2..adf92c27ea 100644 --- a/tensorflow/contrib/eager/python/datasets.py +++ b/tensorflow/contrib/eager/python/datasets.py @@ -106,7 +106,8 @@ class Iterator(iterator_ops.EagerIterator, checkpointable.CheckpointableBase): target_device=target, buffer_size=10, container="", - shared_name=_generate_shared_name("function_buffer_resource")) + shared_name=_generate_shared_name( + "contrib_eager_iterator_function_buffer_resource")) self._buffer_resource_deleter = resource_variable_ops.EagerResourceDeleter( # pylint: disable=line-too-long handle=self._buffer_resource_handle, handle_device=self._device) diff --git a/tensorflow/contrib/eager/python/examples/notebooks/4_high_level.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/4_high_level.ipynb index 4fe3a0e3f3..5749f22ac5 100644 --- a/tensorflow/contrib/eager/python/examples/notebooks/4_high_level.ipynb +++ b/tensorflow/contrib/eager/python/examples/notebooks/4_high_level.ipynb @@ -68,7 +68,7 @@ "# simply construct the object. Most layers take as a first argument the number\n", "# of output dimensions / channels.\n", "layer = tf.keras.layers.Dense(100)\n", - "# The number of input dimensionss is often unnecessary, as it can be inferred\n", + "# The number of input dimensions is often unnecessary, as it can be inferred\n", "# the first time the layer is used, but it can be provided if you want to \n", "# specify it manually, which is useful in some complex models.\n", "layer = tf.keras.layers.Dense(10, input_shape=(None, 5))" @@ -267,7 +267,7 @@ " * `build`, where you know the shapes of the input tensors and can do the rest of the initialization\n", " * `call`, where you do the forward computation\n", "\n", - "Note that you don't have to wait until `build` is called to create your variables, you can also create them in `__init__`. However, the advantage of creating them in `build` is that it enables late variable creation based on the shape of the inputs the layer will operate on. On the other hand, creating variables in `__init__` would mean that shapes requires to create the variables will need to be explicitly specified." + "Note that you don't have to wait until `build` is called to create your variables, you can also create them in `__init__`. However, the advantage of creating them in `build` is that it enables late variable creation based on the shape of the inputs the layer will operate on. On the other hand, creating variables in `__init__` would mean that shapes required to create the variables will need to be explicitly specified." ] }, { diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py index 84a413c791..05bcdac2ca 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py @@ -346,7 +346,8 @@ def sequence_numeric_column( key, shape=(1,), default_value=0., - dtype=dtypes.float32): + dtype=dtypes.float32, + normalizer_fn=None): """Returns a feature column that represents sequences of numeric data. Example: @@ -370,6 +371,12 @@ def sequence_numeric_column( default_value: A single value compatible with `dtype` that is used for padding the sparse data into a dense `Tensor`. dtype: The type of values. + normalizer_fn: If not `None`, a function that can be used to normalize the + value of the tensor after `default_value` is applied for parsing. + Normalizer function takes the input `Tensor` as its argument, and returns + the output `Tensor`. (e.g. lambda x: (x - 3.0) / 4.2). Please note that + even though the most common use case of this function is normalization, it + can be used for any kind of Tensorflow transformations. Returns: A `_SequenceNumericColumn`. @@ -383,12 +390,16 @@ def sequence_numeric_column( if not (dtype.is_integer or dtype.is_floating): raise ValueError('dtype must be convertible to float. ' 'dtype: {}, key: {}'.format(dtype, key)) + if normalizer_fn is not None and not callable(normalizer_fn): + raise TypeError( + 'normalizer_fn must be a callable. Given: {}'.format(normalizer_fn)) return _SequenceNumericColumn( key, shape=shape, default_value=default_value, - dtype=dtype) + dtype=dtype, + normalizer_fn=normalizer_fn) def _assert_all_equal_and_return(tensors, name=None): @@ -407,7 +418,7 @@ class _SequenceNumericColumn( fc._SequenceDenseColumn, collections.namedtuple( '_SequenceNumericColumn', - ['key', 'shape', 'default_value', 'dtype'])): + ['key', 'shape', 'default_value', 'dtype', 'normalizer_fn'])): """Represents sequences of numeric data.""" @property @@ -419,7 +430,10 @@ class _SequenceNumericColumn( return {self.key: parsing_ops.VarLenFeature(self.dtype)} def _transform_feature(self, inputs): - return inputs.get(self.key) + input_tensor = inputs.get(self.key) + if self.normalizer_fn is not None: + input_tensor = self.normalizer_fn(input_tensor) + return input_tensor @property def _variable_shape(self): diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py index ee74cf56dc..45d7b74046 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py @@ -28,6 +28,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor +from tensorflow.python.ops import sparse_ops from tensorflow.python.platform import test from tensorflow.python.training import monitored_session @@ -947,6 +948,7 @@ class SequenceNumericColumnTest(test.TestCase): self.assertEqual((1,), a.shape) self.assertEqual(0., a.default_value) self.assertEqual(dtypes.float32, a.dtype) + self.assertIsNone(a.normalizer_fn) def test_shape_saved_as_tuple(self): a = sfc.sequence_numeric_column('aaa', shape=[1, 2]) @@ -965,6 +967,10 @@ class SequenceNumericColumnTest(test.TestCase): ValueError, 'dtype must be convertible to float'): sfc.sequence_numeric_column('aaa', dtype=dtypes.string) + def test_normalizer_fn_must_be_callable(self): + with self.assertRaisesRegexp(TypeError, 'must be a callable'): + sfc.sequence_numeric_column('aaa', normalizer_fn='NotACallable') + def test_get_sequence_dense_tensor(self): sparse_input = sparse_tensor.SparseTensorValue( # example 0, values [[0.], [1]] @@ -985,6 +991,41 @@ class SequenceNumericColumnTest(test.TestCase): self.assertAllEqual( expected_dense_tensor, dense_tensor.eval(session=sess)) + def test_get_sequence_dense_tensor_with_normalizer_fn(self): + + def _increment_two(input_sparse_tensor): + return sparse_ops.sparse_add( + input_sparse_tensor, + sparse_tensor.SparseTensor(((0, 0), (1, 1)), (2.0, 2.0), (2, 2)) + ) + + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[0.], [1]] + # example 1, [[10.]] + indices=((0, 0), (0, 1), (1, 0)), + values=(0., 1., 10.), + dense_shape=(2, 2)) + + # Before _increment_two: + # [[0.], [1.]], + # [[10.], [0.]], + # After _increment_two: + # [[2.], [1.]], + # [[10.], [2.]], + expected_dense_tensor = [ + [[2.], [1.]], + [[10.], [2.]], + ] + numeric_column = sfc.sequence_numeric_column( + 'aaa', normalizer_fn=_increment_two) + + dense_tensor, _ = numeric_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_dense_tensor, dense_tensor.eval(session=sess)) + def test_get_sequence_dense_tensor_with_shape(self): """Tests get_sequence_dense_tensor with shape !=(1,).""" sparse_input = sparse_tensor.SparseTensorValue( diff --git a/tensorflow/contrib/ffmpeg/__init__.py b/tensorflow/contrib/ffmpeg/__init__.py index daba965a98..484ffee3e7 100644 --- a/tensorflow/contrib/ffmpeg/__init__.py +++ b/tensorflow/contrib/ffmpeg/__init__.py @@ -28,7 +28,6 @@ from __future__ import print_function from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_audio from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_video from tensorflow.contrib.ffmpeg.ffmpeg_ops import encode_audio -from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_video from tensorflow.python.util.all_util import remove_undocumented diff --git a/tensorflow/contrib/ffmpeg/ffmpeg_ops.py b/tensorflow/contrib/ffmpeg/ffmpeg_ops.py index 020b5c99c6..b1b5126d9e 100644 --- a/tensorflow/contrib/ffmpeg/ffmpeg_ops.py +++ b/tensorflow/contrib/ffmpeg/ffmpeg_ops.py @@ -21,7 +21,6 @@ from __future__ import print_function from tensorflow.contrib.ffmpeg.ops import gen_decode_audio_op_py from tensorflow.contrib.ffmpeg.ops import gen_decode_video_op_py from tensorflow.contrib.ffmpeg.ops import gen_encode_audio_op_py -from tensorflow.contrib.ffmpeg.ops import gen_decode_video_op_py from tensorflow.contrib.util import loader from tensorflow.python.framework import ops from tensorflow.python.platform import resource_loader diff --git a/tensorflow/contrib/framework/__init__.py b/tensorflow/contrib/framework/__init__.py index 10d1ecc738..dc49383c5c 100644 --- a/tensorflow/contrib/framework/__init__.py +++ b/tensorflow/contrib/framework/__init__.py @@ -119,14 +119,13 @@ from tensorflow.python.framework.smart_cond import smart_cond from tensorflow.python.framework.smart_cond import smart_constant_value from tensorflow.python.framework.tensor_spec import BoundedTensorSpec from tensorflow.python.framework.tensor_spec import TensorSpec -from tensorflow.python.ops.array_ops import broadcast_to from tensorflow.python.ops.init_ops import convolutional_delta_orthogonal from tensorflow.python.ops.init_ops import convolutional_orthogonal_1d from tensorflow.python.ops.init_ops import convolutional_orthogonal_2d from tensorflow.python.ops.init_ops import convolutional_orthogonal_3d from tensorflow.python.util.all_util import remove_undocumented -_allowed_symbols = ['nest', 'broadcast_to'] +_allowed_symbols = ['nest'] _nest_allowed_symbols = [ 'assert_same_structure', 'is_sequence', diff --git a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py index 65cb94b5a4..a955e21b72 100644 --- a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py +++ b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py @@ -301,8 +301,8 @@ class FusedConv2DBiasActivationTest(test.TestCase): conv = tensors[i] value = values[i] ref_value = ref_values[i] - print("expected = ", ref_value) - print("actual = ", value) + tf_logging.info("expected = ", ref_value) + tf_logging.info("actual = ", value) tol = 1e-5 if value.dtype == np.float16: tol = 1e-3 @@ -843,7 +843,8 @@ class FusedConvInt8Tests(test.TestCase): vertical_stride, padding_type) output_width = CalculateConvolvedOutputDim(input_width, filter_width, horizontal_stride, padding_type) - print("output_height=", output_height, ", output_width=", output_width) + tf_logging.info("output_height=", output_height, ", output_width=", + output_width) side_input, _, _ = gen_array_ops.quantize_v2( random_ops.random_uniform( @@ -880,8 +881,8 @@ class FusedConvInt8Tests(test.TestCase): with self.test_session( use_gpu=True, config=NoMemoryOptimizationConfig()) as sess: actual_y, expected_y = sess.run([actual, expected]) - print("actual_y = ", actual_y) - print("expected_y = ", expected_y) + tf_logging.info("actual_y = ", actual_y) + tf_logging.info("expected_y = ", expected_y) self.assertTrue(np.array_equal(actual_y, expected_y)) def testFusedConvInt8(self): diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_impl/hexagon_controller.c b/tensorflow/contrib/hvx/hexagon_controller/src_impl/hexagon_controller.c index 6a5d982dc8..2e5c84704f 100644 --- a/tensorflow/contrib/hvx/hexagon_controller/src_impl/hexagon_controller.c +++ b/tensorflow/contrib/hvx/hexagon_controller/src_impl/hexagon_controller.c @@ -19,7 +19,7 @@ limitations under the License. #include "hexagon_controller.h" -#include +#include #include #include "adspmsgd.h" diff --git a/tensorflow/contrib/lite/download_dependencies.sh b/tensorflow/contrib/lite/download_dependencies.sh index 436c3e1d4c..840015a7fa 100755 --- a/tensorflow/contrib/lite/download_dependencies.sh +++ b/tensorflow/contrib/lite/download_dependencies.sh @@ -30,9 +30,7 @@ if [ ! -f $BZL_FILE_PATH ]; then fi EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)" -# TODO (yongtang): Replace the following with 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' once -# the archive has been propagated in mirror.bazel.build. -GEMMLOWP_URL="$(grep -o 'https://github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" +GEMMLOWP_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz" ABSL_URL="$(grep -o 'https://github.com/abseil/abseil-cpp/.*tar.gz' "${BZL_FILE_PATH}" | head -n1)" NEON_2_SSE_URL="https://github.com/intel/ARM_NEON_2_x86_SSE/archive/master.zip" diff --git a/tensorflow/contrib/lite/examples/minimal/minimal.cc b/tensorflow/contrib/lite/examples/minimal/minimal.cc index 106e3b0270..8b0ace96cc 100644 --- a/tensorflow/contrib/lite/examples/minimal/minimal.cc +++ b/tensorflow/contrib/lite/examples/minimal/minimal.cc @@ -38,7 +38,7 @@ using namespace tflite; int main(int argc, char *argv[]) { if(argc != 2) { - fprintf(stderr, "Usage: %s \n"); + fprintf(stderr, "minimal \n"); return 1; } const char* filename = argv[1]; diff --git a/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md b/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md index bb2e615eac..965273f0f0 100644 --- a/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md +++ b/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md @@ -128,7 +128,6 @@ TensorFlow operation not listed above are likely unsupported. Notably, the following common ops are not supported at the moment: * [tf.depth_to_space](https://www.tensorflow.org/api_docs/python/tf/depth_to_space) -* [tf.gather](https://www.tensorflow.org/api_docs/python/tf/gather) * [tf.image.resize_bilinear](https://www.tensorflow.org/api_docs/python/tf/image/resize_bilinear) * [tf.tanh](https://www.tensorflow.org/api_docs/python/tf/tanh) @@ -306,6 +305,19 @@ Options { } ``` +**GATHER** + +``` +Inputs { + 0: params tensor + 1: indices tensor + 2: axis tensor (optional) +} +Outputs { + 0: a tensor with same type as the params tensor. +} +``` + **GREATER** ``` diff --git a/tensorflow/contrib/lite/java/ovic/README.md b/tensorflow/contrib/lite/java/ovic/README.md index 5efa70987e..26349347fa 100644 --- a/tensorflow/contrib/lite/java/ovic/README.md +++ b/tensorflow/contrib/lite/java/ovic/README.md @@ -2,7 +2,7 @@ This folder contains building code for track one of the [Low Power ImageNet Recognition Challenge workshop at CVPR 2018.](https://rebootingcomputing.ieee.org/home/sitemap/14-lpirc/80-low-power-image-recognition-challenge-lpirc-2018) -## Pre-requesits +## Pre-requisite Follow the steps [here](https://www.tensorflow.org/mobile/tflite/demo_android) to install Tensorflow, Bazel, and the Android NDK and SDK. @@ -49,7 +49,7 @@ Once you have a submission that follows the instructions from the [competition s You can call the validator binary below to verify that your model fits the format requirements. This often helps you to catch size mismatches (e.g. output should be [1, 1001] instead of [1,1,1,1001]). Let say the submission file is located at `/path/to/my_model.lite`, then call: ```sh -bazel build --cxxopt--std=c++11 //tensorflow/contrib/lite/java/ovic:ovic_validator --cxxopt=-Wno-all +bazel build --cxxopt=--std=c++11 //tensorflow/contrib/lite/java/ovic:ovic_validator --cxxopt=-Wno-all bazel-bin/tensorflow/contrib/lite/java/ovic/ovic_validator /path/to/my_model.lite ``` diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index a2f192bbc2..1908f7fa6c 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -1934,7 +1934,7 @@ inline void LstmCell(const float* input_data, const Dims<4>& input_dims, // The quantization of the input, output arrays is as follows: // - The input activations are quantized as uint8 on the interval // [-1, 127/128]. -// The rationale for that is that that is the natural interval for output +// The rationale for that is that is the natural interval for output // activations (see next point) and these need to be concatenated together. // We could accommodate different ranges by re-scaling, but we empirically // found that setting the input activations range to be [-1, 127/128] in the @@ -1999,7 +1999,7 @@ inline void LstmCell(const float* input_data, const Dims<4>& input_dims, // However, for a fixed-point implementation in 16-bit integers, using 5 // integer bits to represent the [-16, 16] range would leave only 11 // fractional bits, giving an increment of 2^-11 = 4.9e-4 between consecutive -// representable values. Notice that that is higher than the +// representable values. Notice that is higher than the // worst-case clamping error with clamping to [-8, 8]: 3.4e-4 for Logistic. // Using [-8, 8] thus seems like the better compromise overall, enjoying // an increment of 2.4e-4 between representable values and a worst-case diff --git a/tensorflow/contrib/lite/python/interpreter.py b/tensorflow/contrib/lite/python/interpreter.py index 9400e757b9..fd90823425 100644 --- a/tensorflow/contrib/lite/python/interpreter.py +++ b/tensorflow/contrib/lite/python/interpreter.py @@ -55,7 +55,7 @@ class Interpreter(object): elif model_content and not model_path: self._interpreter = ( _interpreter_wrapper.InterpreterWrapper_CreateWrapperCPPFromBuffer( - model_content, len(model_content))) + model_content)) if not self._interpreter: raise ValueError( 'Failed to create model from {} bytes'.format(len(model_content))) diff --git a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc index f705551fcb..b283551c45 100644 --- a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc +++ b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc @@ -397,9 +397,14 @@ InterpreterWrapper* InterpreterWrapper::CreateWrapperCPPFromFile( } InterpreterWrapper* InterpreterWrapper::CreateWrapperCPPFromBuffer( - const char* data, size_t len) { + PyObject* data) { + char * buf = nullptr; + Py_ssize_t length; + if (PY_TO_CPPSTRING(data, &buf, &length) == -1) { + return nullptr; + } std::unique_ptr model = - tflite::FlatBufferModel::BuildFromBuffer(data, len); + tflite::FlatBufferModel::BuildFromBuffer(buf, length); return model ? new InterpreterWrapper(std::move(model)) : nullptr; } diff --git a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h index b0ed7c4559..cbeb53bee7 100644 --- a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h +++ b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h @@ -40,8 +40,7 @@ class InterpreterWrapper { static InterpreterWrapper* CreateWrapperCPPFromFile(const char* model_path); // SWIG caller takes ownership of pointer. - static InterpreterWrapper* CreateWrapperCPPFromBuffer(const char* data, - size_t len); + static InterpreterWrapper* CreateWrapperCPPFromBuffer(PyObject* data); ~InterpreterWrapper(); bool AllocateTensors(); diff --git a/tensorflow/contrib/lite/python/lite.py b/tensorflow/contrib/lite/python/lite.py index 0913cd2c5c..88dda7290b 100644 --- a/tensorflow/contrib/lite/python/lite.py +++ b/tensorflow/contrib/lite/python/lite.py @@ -34,6 +34,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from six import PY3 + from google.protobuf import text_format as _text_format from google.protobuf.message import DecodeError from tensorflow.contrib.lite.python import lite_constants as constants @@ -54,6 +56,7 @@ from tensorflow.python.framework.importer import import_graph_def from tensorflow.python.ops.variables import global_variables_initializer from tensorflow.python.saved_model import signature_constants from tensorflow.python.saved_model import tag_constants +# from tensorflow.python.util.all_util import remove_undocumented class TocoConverter(object): @@ -203,6 +206,12 @@ class TocoConverter(object): except (_text_format.ParseError, DecodeError): try: print("Ignore 'tcmalloc: large alloc' warnings.") + + if not isinstance(file_content, str): + if PY3: + file_content = file_content.decode('utf-8') + else: + file_content = file_content.encode('utf-8') _text_format.Merge(file_content, graph_def) except (_text_format.ParseError, DecodeError): raise ValueError( @@ -382,3 +391,5 @@ def _freeze_graph(sess, output_tensors): output_arrays) else: return sess.graph_def + +# remove_undocumented(__name__) diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc index e33b430937..5c7fa09891 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc @@ -178,7 +178,7 @@ ArrayDataType ConvertDataType(tensorflow::DataType dtype) { else if (dtype == DT_STRING) return ArrayDataType::kString; else - LOG(INFO) << "Unsupported data type in placehoder op: " << dtype; + LOG(INFO) << "Unsupported data type in placeholder op: " << dtype; return ArrayDataType::kNone; } diff --git a/tensorflow/contrib/lite/toco/toco_port.cc b/tensorflow/contrib/lite/toco/toco_port.cc index 1b21c8bc60..de76fd4032 100644 --- a/tensorflow/contrib/lite/toco/toco_port.cc +++ b/tensorflow/contrib/lite/toco/toco_port.cc @@ -20,6 +20,12 @@ limitations under the License. #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/logging.h" +#if defined(__ANDROID__) && defined(__ARM_ARCH_7A__) +namespace std { +double round(double x) { return ::round(x); } +} // namespace std +#endif + namespace toco { namespace port { void CopyToBuffer(const string& src, char* dest) { diff --git a/tensorflow/contrib/lite/toco/toco_port.h b/tensorflow/contrib/lite/toco/toco_port.h index 5c019cb2bf..17f82b9dd7 100644 --- a/tensorflow/contrib/lite/toco/toco_port.h +++ b/tensorflow/contrib/lite/toco/toco_port.h @@ -34,6 +34,24 @@ limitations under the License. #define TFLITE_PROTO_NS google::protobuf #endif +#ifdef __ANDROID__ +#include +namespace std { + +template +std::string to_string(T value) +{ + std::ostringstream os ; + os << value ; + return os.str() ; +} + +#ifdef __ARM_ARCH_7A__ +double round(double x); +#endif +} +#endif + namespace toco { namespace port { diff --git a/tensorflow/contrib/makefile/compile_nsync.sh b/tensorflow/contrib/makefile/compile_nsync.sh index e8c6edd7ba..a28fc3a87f 100755 --- a/tensorflow/contrib/makefile/compile_nsync.sh +++ b/tensorflow/contrib/makefile/compile_nsync.sh @@ -270,7 +270,7 @@ for arch in $archs; do PLATFORM_LDFLAGS=-pthread MKDEP=${CC} -M -std=c++11 PLATFORM_C=../../platform/c++11/src/nsync_semaphore_mutex.cc \ - ../../platform/c++11/src/per_thread_waiter.cc \ + ../../platform/posix/src/per_thread_waiter.c \ ../../platform/c++11/src/yield.cc \ ../../platform/c++11/src/time_rep_timespec.cc \ ../../platform/c++11/src/nsync_panic.cc diff --git a/tensorflow/contrib/makefile/download_dependencies.sh b/tensorflow/contrib/makefile/download_dependencies.sh index eff9081e35..48953e2e38 100755 --- a/tensorflow/contrib/makefile/download_dependencies.sh +++ b/tensorflow/contrib/makefile/download_dependencies.sh @@ -27,9 +27,7 @@ if [ ! -f $BZL_FILE_PATH ]; then fi EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)" -# TODO (yongtang): Replace the following with 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' once -# the archive has been propagated in mirror.bazel.build. -GEMMLOWP_URL="$(grep -o 'https://github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" +GEMMLOWP_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz" NSYNC_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/nsync/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)" PROTOBUF_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/protobuf/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)" diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py index 2ed99d50a4..a6be2084aa 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py @@ -2503,7 +2503,7 @@ def _compute_recall_at_precision(tp, fp, fn, precision, name): name: An optional variable_scope name. Returns: - The recall at a the given `precision`. + The recall at a given `precision`. """ precisions = math_ops.div(tp, tp + fp + _EPSILON) tf_index = math_ops.argmin( diff --git a/tensorflow/contrib/mpi_collectives/kernels/ring.h b/tensorflow/contrib/mpi_collectives/kernels/ring.h index 1d56d588bc..c001615d3f 100644 --- a/tensorflow/contrib/mpi_collectives/kernels/ring.h +++ b/tensorflow/contrib/mpi_collectives/kernels/ring.h @@ -129,7 +129,7 @@ cudaStream_t CudaStreamForMPI(); * has the fully accumulated Segment 1; and so on. The scatter-reduce is * complete. * - * Next, the allgather distributes these fully accumululated chunks across all + * Next, the allgather distributes these fully accumulated chunks across all * nodes. Communication proceeds in the same ring, once again in N-1 steps. At * the ith step, node j will send chunk (j - i + 1) and receive chunk (j - i). * For example, at the first iteration, the following transfers will occur: diff --git a/tensorflow/contrib/opt/python/training/adamax_test.py b/tensorflow/contrib/opt/python/training/adamax_test.py index 21bf3f5313..915e6504e1 100644 --- a/tensorflow/contrib/opt/python/training/adamax_test.py +++ b/tensorflow/contrib/opt/python/training/adamax_test.py @@ -224,8 +224,10 @@ class AdaMaxOptimizerTest(test.TestCase): var1_np, m1, v1 = adamax_update_numpy(var1_np, grads1_np, t, m1, v1) # Validate updated params - self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) - self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) + self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0), + rtol=1e-2) + self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1), + rtol=1e-2) if use_resource: self.assertEqual("var0_%d/AdaMax:0" % (i,), opt.get_slot(var=var0, name="m").name) diff --git a/tensorflow/contrib/opt/python/training/model_average_optimizer.py b/tensorflow/contrib/opt/python/training/model_average_optimizer.py index a7c97a1da2..b6b10e500b 100644 --- a/tensorflow/contrib/opt/python/training/model_average_optimizer.py +++ b/tensorflow/contrib/opt/python/training/model_average_optimizer.py @@ -62,7 +62,7 @@ class ModelAverageCustomGetter(object): """ def __init__(self, worker_device): - """Create a new `ElasticAverageCustomGetter`. + """Create a new `ModelAverageCustomGetter`. Args: worker_device: String. Name of the `worker` job. diff --git a/tensorflow/contrib/periodic_resample/BUILD b/tensorflow/contrib/periodic_resample/BUILD index 6ca7fe8b6e..aad1ca04c5 100644 --- a/tensorflow/contrib/periodic_resample/BUILD +++ b/tensorflow/contrib/periodic_resample/BUILD @@ -6,12 +6,13 @@ exports_files(["LICENSE"]) load( "//tensorflow:tensorflow.bzl", - "py_test", + "tf_cc_test", "tf_gen_op_libs", "tf_custom_op_library", "tf_custom_op_py_library", "tf_gen_op_wrapper_py", ) +load("//tensorflow:tensorflow.bzl", "py_test") cc_library( name = "all_ops", @@ -84,6 +85,23 @@ py_test( ":init_py", "//tensorflow/contrib/util:util_py", "//tensorflow/python:framework_test_lib", + "//tensorflow/python:gradient_checker", + ], +) + +tf_cc_test( + name = "periodic_resample_op_cc_test", + size = "small", + srcs = [ + "ops/array_ops_test.cc", + ], + deps = [ + ":all_ops", + "//tensorflow/core:framework", + "//tensorflow/core:protos_all_proto", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", ], ) diff --git a/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.cc b/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.cc index e18923c8aa..514689cf45 100644 --- a/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.cc +++ b/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.cc @@ -22,4 +22,9 @@ namespace tensorflow { REGISTER_KERNEL_BUILDER(Name("PeriodicResample").Device(DEVICE_CPU), PeriodicResampleOp); + +REGISTER_KERNEL_BUILDER(Name("PeriodicResampleOpGrad") + .Device(DEVICE_CPU), + PeriodicResampleOpGrad); + } // namespace tensorflow diff --git a/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h b/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h index 3ab588c458..42fba81a5c 100644 --- a/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h +++ b/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h @@ -25,92 +25,202 @@ #include "tensorflow/core/framework/shape_inference.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/util/work_sharder.h" namespace { -template -IndexT compute_input_index( - IndexVecT* target_dimensions, const IndexT& output_index, - const IndexVecT& original_dimensions, const int& adjustable_dimension, - const std::vector& dimension_ceiling, - const std::vector& cumulative_dimensions, IndexT* result, - std::vector* output_indices, const int& rank) { - *result = 0; - output_indices->clear(); +// Computes input tensor index for given output index during forward +// propagation through periodic_resample operation. +class InputIndexer { + public: + InputIndexer(const std::vector& output_dimensions, + const tensorflow::TensorShape& input_shape, + int adjustable_dimension) + : output_dimensions_(output_dimensions), + adjustable_dimension_(adjustable_dimension), + rank_(input_shape.dims()), + linear_output_index_(0), + linear_input_index_(0), + adjustable_dimension_carriage_sum_(0) { + auto input_dimensions = TensorShapeToVector(input_shape); + // factors by which input_dimensions increases/decreases w.r.t. + // output_dimensions + dimension_ceiling_ = + ComputeDimensionCeiling(output_dimensions, input_dimensions); + cumulative_dimensions_ = ComputeCumulativeDimensions(); + + output_indices_.resize(output_dimensions_.size()); + input_indices_.resize(output_dimensions_.size()); + + // Compute index_factors + index_factors_.resize(rank_); + tensorflow::int64 last_index_factor = 1; + for (auto r = rank_ - 1; r >= 0; --r) { + index_factors_[r] = last_index_factor; + last_index_factor *= input_dimensions[r]; + } + } + + tensorflow::int64 linear_input_index() const { return linear_input_index_; } + + void MoveToOutputIndex(tensorflow::int64 output_index); + void IncrementOutputIndex(); + + private: + void RecomputeInputAdjustableDimensionIndex() { + tensorflow::int64 index = adjustable_dimension_carriage_sum_; + index *= output_dimensions_[adjustable_dimension_]; + index += output_indices_[adjustable_dimension_]; + input_indices_[adjustable_dimension_] = index; + } + + std::vector TensorShapeToVector( + const tensorflow::TensorShape& tensor_shape); + + std::vector ComputeDimensionCeiling( + const std::vector& output_dimensions, + const std::vector& input_dimensions); + + std::vector ComputeCumulativeDimensions(); + + const std::vector output_dimensions_; + std::vector dimension_ceiling_; + std::vector index_factors_; + std::vector cumulative_dimensions_; + std::vector output_indices_; + std::vector input_indices_; + + const int adjustable_dimension_; + const int rank_; + tensorflow::int64 linear_output_index_; + tensorflow::int64 linear_input_index_; + tensorflow::int64 adjustable_dimension_carriage_sum_; +}; + +void InputIndexer::MoveToOutputIndex(tensorflow::int64 output_index) { + linear_output_index_ = output_index; + linear_input_index_ = 0; // un-rasterize the output index auto last_reduced_i = output_index; - for (auto r = rank - 1; r >= 0; --r) { - (*output_indices)[r] = last_reduced_i % (*target_dimensions)[r]; + for (auto r = rank_ - 1; r >= 0; --r) { + output_indices_[r] = last_reduced_i % output_dimensions_[r]; last_reduced_i = - (last_reduced_i - (*output_indices)[r]) / (*target_dimensions)[r]; + (last_reduced_i - output_indices_[r]) / output_dimensions_[r]; } + tensorflow::int64 carriage_sum = 0; + for (int qi = 0; qi < rank_; ++qi) { + if (qi == adjustable_dimension_) continue; + carriage_sum += cumulative_dimensions_[qi] * + (output_indices_[qi] % dimension_ceiling_[qi]); + } + adjustable_dimension_carriage_sum_ = carriage_sum; + // rasterize the input index - IndexT last_index_factor = 1; - for (auto r = rank - 1; r >= 0; --r) { - IndexT index = 0; - if (r != adjustable_dimension) - index = (*output_indices)[r] / dimension_ceiling[r]; - else { - for (int qi = 0; qi < rank; ++qi) { - if (qi == adjustable_dimension) continue; - index += cumulative_dimensions[qi] * - ((*output_indices)[qi] % dimension_ceiling[qi]); - } - index *= (*target_dimensions)[adjustable_dimension]; - index += (*output_indices)[r]; + for (auto r = rank_ - 1; r >= 0; --r) { + if (r != adjustable_dimension_) { + input_indices_[r] = output_indices_[r] / dimension_ceiling_[r]; + } else { + RecomputeInputAdjustableDimensionIndex(); } - *result += last_index_factor * index; - last_index_factor *= original_dimensions[r]; } + for (auto r = rank_ - 1; r >= 0; --r) { + linear_input_index_ += index_factors_[r] * input_indices_[r]; + } +} + +void InputIndexer::IncrementOutputIndex() { + linear_output_index_++; + for (auto r = rank_ - 1; r >= 0; --r) { + auto old_carriage_sum_increment = + cumulative_dimensions_[r] * + (output_indices_[r] % dimension_ceiling_[r]); + output_indices_[r] = (output_indices_[r] + 1) % output_dimensions_[r]; + if (r != adjustable_dimension_) { + auto new_input_index = output_indices_[r] / dimension_ceiling_[r]; + linear_input_index_ += + (new_input_index - input_indices_[r]) * index_factors_[r]; + + input_indices_[r] = new_input_index; + + auto new_carriage_sum_increment = + cumulative_dimensions_[r] * + (output_indices_[r] % dimension_ceiling_[r]); - return *result; + adjustable_dimension_carriage_sum_ = adjustable_dimension_carriage_sum_ - + old_carriage_sum_increment + + new_carriage_sum_increment; + } + + if (output_indices_[r] != 0) { + // No more carries to higher indices. + break; + } + } + auto old_adjustable_dimension_input_index = + input_indices_[adjustable_dimension_]; + RecomputeInputAdjustableDimensionIndex(); + linear_input_index_ += (input_indices_[adjustable_dimension_] - + old_adjustable_dimension_input_index) * + index_factors_[adjustable_dimension_]; } -template // both types are needed here b/c IndexVecT and - // InputDataT are not related - void - fill_periodic_tensor( - tensorflow::OpKernelContext* context, - const IndexVecT& desired_shape, - const tensorflow::Tensor& input_tensor) { - // input is a strided array (last index is fastest, C-ordered) - auto input = input_tensor.flat(); - const int rank = input_tensor.dims(); - // original and target dimensions - std::vector original_dimensions(rank), - target_dimensions(rank); - tensorflow::int64 total_size(input_tensor.NumElements()), new_sliced_size(1); - // factors by which original_dimensions increases/decreases w.r.t. - // target_dimensions - std::vector dimension_ceiling(rank), - cumulative_dimensions(rank); - // index of adjustable dimension - int adjustable_dimension; - tensorflow::TensorShape output_shape; +std::vector InputIndexer::TensorShapeToVector( + const tensorflow::TensorShape& tensor_shape) { + std::vector result(tensor_shape.dims()); + int count = 0; + for (const auto dim_info : tensor_shape) { + result[count] = dim_info.size; + ++count; + } + return result; +} - // requires that the rank of the input tensor and length of the desired shape - // are equal - OP_REQUIRES(context, rank == desired_shape.size(), - tensorflow::errors::InvalidArgument( - "periodic_resample expects the rank of the input tensor, ", - rank, ", to be the same as the length of the desired shape, ", - desired_shape.size(), ".")); +std::vector InputIndexer::ComputeDimensionCeiling( + const std::vector& output_dimensions, + const std::vector& input_dimensions) { + std::vector dimension_ceiling(input_dimensions.size()); + for (size_t i = 0; i < input_dimensions.size(); ++i) { + dimension_ceiling[i] = (output_dimensions[i] + input_dimensions[i] - 1) / + input_dimensions[i]; + } + return dimension_ceiling; +} - bool found = false; - const auto& input_tensor_shape = input_tensor.shape(); +std::vector InputIndexer::ComputeCumulativeDimensions() { + std::vector cumulative_dimensions(rank_); + int count = 0; + for (int i = 0; i < rank_; ++i) { + if (count == 0) { + cumulative_dimensions[count] = 1; + } else { + cumulative_dimensions[count] = + cumulative_dimensions[count - 1] * dimension_ceiling_[count - 1]; + } + ++count; + } + return cumulative_dimensions; +} +template +void process_desired_shape(tensorflow::OpKernelContext* context, + const tensorflow::TensorShape& input_tensor_shape, + const IndexVecT& desired_shape, + int* adjustable_dimension, + std::vector* target_dimensions, + tensorflow::int64* output_size) { + tensorflow::int64 new_sliced_size = 1; + bool found = false; + const int rank = input_tensor_shape.dims(); for (int i = 0; i < rank; ++i) { - // if (desired_shape(i) < 1) { if (desired_shape[i] < 1) { // only one index can be adjustable OP_REQUIRES(context, !found, tensorflow::errors::InvalidArgument( "periodic_resample expects only " "one index to be marked as adjustable.")); - adjustable_dimension = i; + *adjustable_dimension = i; found = true; } else { OP_REQUIRES( @@ -122,9 +232,8 @@ template +void +do_periodic_resample_op(tensorflow::OpKernelContext* context, + const tensorflow::TensorShape& original_shape, + const tensorflow::PartialTensorShape& desired_shape, + const tensorflow::Tensor& source_tensor) { + const int rank = source_tensor.dims(); + + // requires that the rank of the input tensor and length of the desired shape + // are equal + OP_REQUIRES(context, rank == desired_shape.dims(), + tensorflow::errors::InvalidArgument( + "periodic_resample expects the rank of the input tensor, ", + rank, ", to be the same as the length of the desired shape, ", + desired_shape.dims(), ".")); + + std::vector target_dimensions(rank); + tensorflow::int64 new_size = 0; + // index of adjustable dimension + int adjustable_dimension = 0; + process_desired_shape(context, original_shape, desired_shape.dim_sizes(), + &adjustable_dimension, &target_dimensions, &new_size); // ensure that the new dimension is greater than zero OP_REQUIRES(context, target_dimensions[adjustable_dimension] > 0, @@ -160,11 +293,14 @@ template allocate_output(0, output_shape, &output_tensor)); auto output = output_tensor->flat(); - // memory is allocated for these variables outside the inner loop for - // efficiency (although, I could create a separate class scope for - // this purpose instead) - tensorflow::int64 result = 0; - std::vector output_indices(target_dimensions.size()); + // input is a strided array (last index is fastest, C-ordered) + auto input = source_tensor.flat(); // Fill output tensor with periodically resampled input tensor values - for (tensorflow::int64 output_index = 0; output_index < new_size; - ++output_index) { - output(output_index) = input(compute_input_index( - &target_dimensions, output_index, original_dimensions, - adjustable_dimension, dimension_ceiling, cumulative_dimensions, &result, - &output_indices, rank)); - } + InputIndexer input_indexer(target_dimensions, original_shape, + adjustable_dimension); + + auto worker_threads = *(context->device()->tensorflow_cpu_worker_threads()); + auto fill_output_tensor = [&input_indexer, &output, &input]( + tensorflow::int64 start, tensorflow::int64 limit) { + InputIndexer local_indexer(input_indexer); + local_indexer.MoveToOutputIndex(start); + for (tensorflow::int64 output_index = start; output_index < limit; + ++output_index) { + if (mode == Mode::kForward) { + output(output_index) = input(local_indexer.linear_input_index()); + } else { + output(local_indexer.linear_input_index()) = input(output_index); + } + local_indexer.IncrementOutputIndex(); + } + }; + ::tensorflow::Shard(worker_threads.num_threads, worker_threads.workers, + new_size, costPerFillIndex, fill_output_tensor); } +#define DATA_TYPE_SWITCH(data_type, context, CASE) \ + switch (data_type) { \ + CASE(float) \ + CASE(double) \ + CASE(tensorflow::int32) \ + CASE(tensorflow::int64) \ + default: \ + context->CtxFailure(__FILE__, __LINE__, \ + tensorflow::errors::InvalidArgument( \ + "Unsuppored tensor elements type")); \ + break; \ + } + void create_output_tensor( tensorflow::OpKernelContext* context, const tensorflow::Tensor& input_tensor, const tensorflow::DataType& input_tensor_type, - const tensorflow::PartialTensorShape& desired_shape_tensor) { - auto desired_shape = desired_shape_tensor.dim_sizes(); - - // obligatory type switch - switch (input_tensor_type) { - case tensorflow::DataTypeToEnum::value: - fill_periodic_tensor(context, desired_shape, input_tensor); + const tensorflow::PartialTensorShape& desired_shape) { +#define CASE(type) \ + case tensorflow::DataTypeToEnum::value: \ + do_periodic_resample_op( \ + context, input_tensor.shape(), desired_shape, input_tensor); \ break; - case tensorflow::DataTypeToEnum::value: - fill_periodic_tensor(context, desired_shape, input_tensor); - break; - case tensorflow::DataTypeToEnum::value: - fill_periodic_tensor(context, desired_shape, - input_tensor); - break; - case tensorflow::DataTypeToEnum::value: - fill_periodic_tensor(context, desired_shape, - input_tensor); + + DATA_TYPE_SWITCH(input_tensor_type, context, CASE); +#undef CASE +} + +void create_grad_tensor(tensorflow::OpKernelContext* context, + const tensorflow::Tensor& grad_tensor, + const tensorflow::DataType& grad_tensor_type, + const tensorflow::TensorShape& original_shape, + const tensorflow::PartialTensorShape& desired_shape) { +#define CASE(type) \ + case tensorflow::DataTypeToEnum::value: \ + do_periodic_resample_op( \ + context, original_shape, desired_shape, grad_tensor); \ break; - default:; - } + + DATA_TYPE_SWITCH(grad_tensor_type, context, CASE); +#undef CASE } } // namespace @@ -238,4 +400,25 @@ class PeriodicResampleOp : public tensorflow::OpKernel { tensorflow::PartialTensorShape desired_shape; }; +class PeriodicResampleOpGrad : public tensorflow::OpKernel { + public: + explicit PeriodicResampleOpGrad(tensorflow::OpKernelConstruction* context) + : tensorflow::OpKernel(context) { + OP_REQUIRES_OK(context, + context->GetAttr("original_shape", &original_shape)); + OP_REQUIRES_OK(context, context->GetAttr("desired_shape", &desired_shape)); + } + + void Compute(tensorflow::OpKernelContext* context) override { + const tensorflow::Tensor& grad_tensor = context->input(0); + const tensorflow::DataType grad_tensor_type = context->input_dtype(0); + create_grad_tensor(context, grad_tensor, grad_tensor_type, original_shape, + desired_shape); + } + + private: + tensorflow::TensorShape original_shape; + tensorflow::PartialTensorShape desired_shape; +}; + #endif // TENSORFLOW_KERNELS_PERIODICRESAMPLE_OP_H_ diff --git a/tensorflow/contrib/periodic_resample/ops/array_ops.cc b/tensorflow/contrib/periodic_resample/ops/array_ops.cc index 82bd796956..fd38cd09b4 100644 --- a/tensorflow/contrib/periodic_resample/ops/array_ops.cc +++ b/tensorflow/contrib/periodic_resample/ops/array_ops.cc @@ -26,7 +26,42 @@ REGISTER_OP("PeriodicResample") .Input("values: T") .Attr("shape: shape") .Output("output: T") - .SetShapeFn(shape_inference::ExplicitShape) + .SetShapeFn([](shape_inference::InferenceContext* c) { + tensorflow::PartialTensorShape desired_shape; + TF_RETURN_IF_ERROR(c->GetAttr("shape", &desired_shape)); + shape_inference::ShapeHandle input_tensor_shape = c->input(0); + shape_inference::DimensionHandle num_input_elements = + c->NumElements(input_tensor_shape); + shape_inference::ShapeHandle result_shape_handle; + if (!shape_inference::InferenceContext::ValueKnown(num_input_elements)) { + TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape( + desired_shape, &result_shape_handle)); + } else { + const int rank = c->Rank(input_tensor_shape); + std::vector target_dimensions(rank); + tensorflow::int64 new_sliced_size = 1; + int adjustable_dimension = 0; + for (int i = 0; i < rank; ++i) { + if (desired_shape.dim_size(i) < 1) { + adjustable_dimension = i; + } else { + target_dimensions[i] = desired_shape.dim_size(i); + new_sliced_size *= target_dimensions[i]; + } + } + target_dimensions[adjustable_dimension] = + shape_inference::InferenceContext::Value( + num_input_elements) / new_sliced_size; + tensorflow::TensorShape result_shape; + for (int i = 0; i < rank; ++i) { + result_shape.AddDim(target_dimensions[i]); + } + TF_RETURN_IF_ERROR(c->MakeShapeFromTensorShape( + result_shape, &result_shape_handle)); + } + c->set_output(0, result_shape_handle); + return Status::OK(); + }) .Doc(R"doc( Periodically resample elements of a tensor to conform to `shape`. @@ -101,4 +136,20 @@ output: Periodically resampled tensor that has dimensions specified as in )doc"); + +REGISTER_OP("PeriodicResampleOpGrad") + .Attr("T: numbertype") + .Input("grad: T") + .Attr("original_shape: shape") + .Attr("desired_shape: shape") + .Output("grad_values: T") + .SetShapeFn([](shape_inference::InferenceContext* c) { + tensorflow::TensorShape original_shape; + TF_RETURN_IF_ERROR(c->GetAttr("original_shape", &original_shape)); + shape_inference::ShapeHandle s; + TF_RETURN_IF_ERROR(c->MakeShapeFromTensorShape(original_shape, &s)); + c->set_output(0, s); + return Status::OK(); +}); + } // namespace tensorflow diff --git a/tensorflow/contrib/periodic_resample/ops/array_ops_test.cc b/tensorflow/contrib/periodic_resample/ops/array_ops_test.cc new file mode 100644 index 0000000000..43b7c1799f --- /dev/null +++ b/tensorflow/contrib/periodic_resample/ops/array_ops_test.cc @@ -0,0 +1,41 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/shape_inference_testutil.h" +#include "tensorflow/core/framework/tensor_shape.pb.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { + +TEST(ArrayOpsTest, PeriodicResample_ShapeFn) { + ShapeInferenceTestOp op("PeriodicResample"); + // Case 1: output shape can be fully inferreed. + PartialTensorShape shape({4, 4, -1}); + TensorShapeProto shape_proto; + shape.AsProto(&shape_proto); + + TF_ASSERT_OK(NodeDefBuilder("test", "PeriodicResample") + .Input({"values", 0, DT_INT32}) + .Attr("shape", shape_proto) + .Finalize(&op.node_def)); + INFER_OK(op, "[2,2,4]", "[4,4,1]"); + // Case 2: output shape can not be inferred - report desired shape. + INFER_OK(op, "[2,2,?]", "[4,4,?]"); +} + +} // end namespace tensorflow diff --git a/tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py b/tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py index a25de55e18..31a6fe1d94 100644 --- a/tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py +++ b/tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py @@ -21,8 +21,11 @@ from __future__ import print_function import numpy from tensorflow.contrib.periodic_resample import periodic_resample +from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors_impl from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import variables from tensorflow.python.platform import googletest @@ -93,7 +96,6 @@ class PeriodicResampleTest(test_util.TensorFlowTestCase): def testPeriodicResampleErrors(self): input_tensor = numpy.zeros(shape=[1, 2, 2, 4]) with self.test_session(): - variables.global_variables_initializer().run() with self.assertRaisesWithPredicateMatch( errors_impl.InvalidArgumentError, 'Dimension 3 input tensor has size 4, desired shape has size 1'): @@ -103,6 +105,29 @@ class PeriodicResampleTest(test_util.TensorFlowTestCase): '4, to be the same as the length of the desired shape, 3'): periodic_resample(input_tensor, [None, 4, 4]).eval() + def testPeriodicResampleGradient(self): + desired_shape = numpy.array([4, 4, None]) + result_shape = (4, 4, 1) + input_shape = (2, 2, 4) + with self.test_session() as sess: + x = array_ops.placeholder(dtypes.float32, shape=input_shape) + output = periodic_resample(x, desired_shape) + error = gradient_checker.compute_gradient_error( + x, input_shape, output, result_shape) + self.assertLess(error, 1e-4) + + def testPeriodicResampleShapeInference(self): + with self.test_session() as sess: + # Case 1: output shape can be fully inferreed. + x = array_ops.placeholder(dtypes.float32, shape=(2, 2, 4)) + output = periodic_resample(x, [4, 4, None]) + self.assertEqual(output.shape, [4, 4, 1]) + # Case 2: output shape can not be inferred - report desired shape. + x = array_ops.placeholder(dtypes.float32, shape=(2, 2, None)) + output = periodic_resample(x, [4, 4, None]) + self.assertTrue(output.shape.is_compatible_with([4, 4, None])) + self.assertEqual(output.shape[2].value, None) + if __name__ == '__main__': googletest.main() diff --git a/tensorflow/contrib/periodic_resample/python/ops/periodic_resample_op.py b/tensorflow/contrib/periodic_resample/python/ops/periodic_resample_op.py index 348623d8f8..470e300ccb 100644 --- a/tensorflow/contrib/periodic_resample/python/ops/periodic_resample_op.py +++ b/tensorflow/contrib/periodic_resample/python/ops/periodic_resample_op.py @@ -21,11 +21,17 @@ from __future__ import print_function # pylint: disable=unused-import from tensorflow.contrib.periodic_resample.python.ops import gen_periodic_resample_op -from tensorflow.contrib.periodic_resample.python.ops.gen_periodic_resample_op import periodic_resample +from tensorflow.contrib.periodic_resample.python.ops.gen_periodic_resample_op import periodic_resample, periodic_resample_op_grad from tensorflow.contrib.util import loader +from tensorflow.python.framework import ops from tensorflow.python.platform import resource_loader # pylint: enable=unused-import _periodic_resample_op = loader.load_op_library( resource_loader.get_path_to_datafile('_periodic_resample_op.so')) + +@ops.RegisterGradient("PeriodicResample") +def _periodic_resample_grad_cc(op, grad): + return periodic_resample_op_grad( + grad, op.inputs[0].shape, op.get_attr('shape')) diff --git a/tensorflow/contrib/predictor/contrib_estimator_predictor.py b/tensorflow/contrib/predictor/contrib_estimator_predictor.py index b7a98c68e2..af3b2ad1b5 100644 --- a/tensorflow/contrib/predictor/contrib_estimator_predictor.py +++ b/tensorflow/contrib/predictor/contrib_estimator_predictor.py @@ -34,7 +34,8 @@ class ContribEstimatorPredictor(predictor.Predictor): prediction_input_fn, input_alternative_key=None, output_alternative_key=None, - graph=None): + graph=None, + config=None): """Initialize a `ContribEstimatorPredictor`. Args: @@ -48,6 +49,7 @@ class ContribEstimatorPredictor(predictor.Predictor): multi-headed models. graph: Optional. The Tensorflow `graph` in which prediction should be done. + config: `ConfigProto` proto used to configure the session. """ self._graph = graph or ops.Graph() with self._graph.as_default(): @@ -58,6 +60,7 @@ class ContribEstimatorPredictor(predictor.Predictor): checkpoint_path = saver.latest_checkpoint(estimator.model_dir) self._session = monitored_session.MonitoredSession( session_creator=monitored_session.ChiefSessionCreator( + config=config, checkpoint_filename_with_path=checkpoint_path)) input_alternative_key = ( diff --git a/tensorflow/contrib/predictor/core_estimator_predictor.py b/tensorflow/contrib/predictor/core_estimator_predictor.py index d78d94c269..a725072e72 100644 --- a/tensorflow/contrib/predictor/core_estimator_predictor.py +++ b/tensorflow/contrib/predictor/core_estimator_predictor.py @@ -51,7 +51,8 @@ class CoreEstimatorPredictor(predictor.Predictor): estimator, serving_input_receiver_fn, output_key=None, - graph=None): + graph=None, + config=None): """Initialize a `CoreEstimatorPredictor`. Args: @@ -62,6 +63,7 @@ class CoreEstimatorPredictor(predictor.Predictor): `None`, then `DEFAULT_SERVING_SIGNATURE_DEF_KEY` is used. graph: Optional. The Tensorflow `graph` in which prediction should be done. + config: `ConfigProto` proto used to configure the session. """ self._graph = graph or ops.Graph() with self._graph.as_default(): @@ -71,6 +73,7 @@ class CoreEstimatorPredictor(predictor.Predictor): checkpoint_dir = estimator.model_dir self._session = monitored_session.MonitoredSession( session_creator=monitored_session.ChiefSessionCreator( + config=config, checkpoint_dir=checkpoint_dir)) feed_tensor_info = signature_def.inputs diff --git a/tensorflow/contrib/predictor/predictor_factories.py b/tensorflow/contrib/predictor/predictor_factories.py index 6e77e934fe..f275bc15ad 100644 --- a/tensorflow/contrib/predictor/predictor_factories.py +++ b/tensorflow/contrib/predictor/predictor_factories.py @@ -30,7 +30,8 @@ def from_contrib_estimator(estimator, prediction_input_fn, input_alternative_key=None, output_alternative_key=None, - graph=None): + graph=None, + config=None): """Constructs a `Predictor` from a `tf.contrib.learn.Estimator`. Args: @@ -44,6 +45,7 @@ def from_contrib_estimator(estimator, multi-headed models. graph: Optional. The Tensorflow `graph` in which prediction should be done. + config: `ConfigProto` proto used to configure the session. Returns: An initialized `Predictor`. @@ -62,13 +64,15 @@ def from_contrib_estimator(estimator, prediction_input_fn, input_alternative_key=input_alternative_key, output_alternative_key=output_alternative_key, - graph=graph) + graph=graph, + config=config) def from_estimator(estimator, serving_input_receiver_fn, output_key=None, - graph=None): + graph=None, + config=None): """Constructs a `Predictor` from a `tf.python.estimator.Estimator`. Args: @@ -79,6 +83,7 @@ def from_estimator(estimator, `None`, then `DEFAULT_SERVING_SIGNATURE_DEF_KEY` is used. graph: Optional. The Tensorflow `graph` in which prediction should be done. + config: `ConfigProto` proto used to configure the session. Returns: An initialized `Predictor`. @@ -93,14 +98,19 @@ def from_estimator(estimator, 'tf.contrib.learn.Estimator. You likely want to call ' 'from_contrib_estimator.') return core_estimator_predictor.CoreEstimatorPredictor( - estimator, serving_input_receiver_fn, output_key=output_key, graph=graph) + estimator, + serving_input_receiver_fn, + output_key=output_key, + graph=graph, + config=config) def from_saved_model(export_dir, signature_def_key=None, signature_def=None, tags=None, - graph=None): + graph=None, + config=None): """Constructs a `Predictor` from a `SavedModel` on disk. Args: @@ -115,6 +125,7 @@ def from_saved_model(export_dir, `SignatureDef`. Defaults to `DEFAULT_TAGS`. graph: Optional. The Tensorflow `graph` in which prediction should be done. + config: `ConfigProto` proto used to configure the session. Returns: An initialized `Predictor`. @@ -128,4 +139,5 @@ def from_saved_model(export_dir, signature_def_key=signature_def_key, signature_def=signature_def, tags=tags, - graph=graph) + graph=graph, + config=config) diff --git a/tensorflow/contrib/predictor/predictor_factories_test.py b/tensorflow/contrib/predictor/predictor_factories_test.py index 578d9424b2..a2ef1dc3af 100644 --- a/tensorflow/contrib/predictor/predictor_factories_test.py +++ b/tensorflow/contrib/predictor/predictor_factories_test.py @@ -20,6 +20,7 @@ from __future__ import print_function from tensorflow.contrib.predictor import predictor_factories from tensorflow.contrib.predictor import testing_common +from tensorflow.core.protobuf import config_pb2 from tensorflow.python.platform import test MODEL_DIR_NAME = 'contrib/predictor/test_export_dir' @@ -41,6 +42,11 @@ class PredictorFactoriesTest(test.TestCase): """Test loading from_saved_model with tags.""" predictor_factories.from_saved_model(self._export_dir, tags='serve') + def testFromSavedModelWithSessionConfig(self): + """Test loading from_saved_model with session config.""" + predictor_factories.from_saved_model( + self._export_dir, config=config_pb2.ConfigProto()) + def testFromSavedModelWithBadTags(self): """Test that loading fails for bad tags.""" bad_tags_regex = ('.*? could not be found in SavedModel') @@ -53,6 +59,13 @@ class PredictorFactoriesTest(test.TestCase): predictor_factories.from_contrib_estimator( estimator, input_fn, output_alternative_key='sum') + def testFromContribEstimatorWithSessionConfig(self): + estimator = testing_common.get_arithmetic_estimator(core=False) + input_fn = testing_common.get_arithmetic_input_fn(core=False) + predictor_factories.from_contrib_estimator( + estimator, input_fn, output_alternative_key='sum', + config=config_pb2.ConfigProto()) + def testFromContribEstimatorWithCoreEstimatorRaises(self): estimator = testing_common.get_arithmetic_estimator(core=True) input_fn = testing_common.get_arithmetic_input_fn(core=True) @@ -64,6 +77,12 @@ class PredictorFactoriesTest(test.TestCase): input_fn = testing_common.get_arithmetic_input_fn(core=True) predictor_factories.from_estimator(estimator, input_fn) + def testFromCoreEstimatorWithSessionConfig(self): + estimator = testing_common.get_arithmetic_estimator(core=True) + input_fn = testing_common.get_arithmetic_input_fn(core=True) + predictor_factories.from_estimator( + estimator, input_fn, config=config_pb2.ConfigProto()) + def testFromCoreEstimatorWithContribEstimatorRaises(self): estimator = testing_common.get_arithmetic_estimator(core=False) input_fn = testing_common.get_arithmetic_input_fn(core=False) diff --git a/tensorflow/contrib/predictor/saved_model_predictor.py b/tensorflow/contrib/predictor/saved_model_predictor.py index 0dbca0f813..95da6d04ed 100644 --- a/tensorflow/contrib/predictor/saved_model_predictor.py +++ b/tensorflow/contrib/predictor/saved_model_predictor.py @@ -121,7 +121,8 @@ class SavedModelPredictor(predictor.Predictor): input_names=None, output_names=None, tags=None, - graph=None): + graph=None, + config=None): """Initialize a `CoreEstimatorPredictor`. Args: @@ -142,6 +143,7 @@ class SavedModelPredictor(predictor.Predictor): the correct `SignatureDef`. Defaults to `DEFAULT_TAGS`. graph: Optional. The Tensorflow `graph` in which prediction should be done. + config: `ConfigProto` proto used to configure the session. Raises: ValueError: If more than one of signature_def_key OR signature_def OR (input_names AND output_names) is specified. @@ -152,7 +154,7 @@ class SavedModelPredictor(predictor.Predictor): self._graph = graph or ops.Graph() with self._graph.as_default(): - self._session = session.Session() + self._session = session.Session(config=config) loader.load(self._session, tags.split(','), export_dir) if input_names is None: diff --git a/tensorflow/contrib/quantize/README.md b/tensorflow/contrib/quantize/README.md index c83623ec94..27a933c0f9 100644 --- a/tensorflow/contrib/quantize/README.md +++ b/tensorflow/contrib/quantize/README.md @@ -6,7 +6,7 @@ inference. The details of the transformation implemented in this package is described here [1]. This is done using the -[fake quantization op](https://www.tensorflow.org/versions/r0.12/api_docs/python/array_ops/fake_quantization). +[fake quantization op](https://www.tensorflow.org/api_guides/python/array_ops#Fake_quantization). Literature has shown that fixed point networks provide comparable performance to floating point networks [2]. This is achieved by modeling the quantization diff --git a/tensorflow/contrib/slim/python/slim/evaluation_test.py b/tensorflow/contrib/slim/python/slim/evaluation_test.py index 94fc12ca81..3d0308aaf3 100644 --- a/tensorflow/contrib/slim/python/slim/evaluation_test.py +++ b/tensorflow/contrib/slim/python/slim/evaluation_test.py @@ -26,7 +26,6 @@ import time import numpy as np from tensorflow.contrib.framework.python.ops import variables as variables_lib -from tensorflow.contrib.metrics.python.ops import metric_ops from tensorflow.contrib.slim.python.slim import evaluation from tensorflow.contrib.training.python.training import evaluation as evaluation_lib from tensorflow.core.protobuf import saver_pb2 @@ -37,6 +36,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import metrics from tensorflow.python.ops import variables from tensorflow.python.platform import flags from tensorflow.python.platform import gfile @@ -89,8 +89,8 @@ class EvaluationTest(test.TestCase): self._predictions, self._scale = TestModel(self._inputs) def testFinalOpsOnEvaluationLoop(self): - value_op, update_op = metric_ops.streaming_accuracy(self._predictions, - self._labels) + value_op, update_op = metrics.accuracy( + labels=self._labels, predictions=self._predictions) init_op = control_flow_ops.group(variables.global_variables_initializer(), variables.local_variables_initializer()) # Create checkpoint and log directories: @@ -136,9 +136,10 @@ class EvaluationTest(test.TestCase): self.assertTrue(obj.hook_was_run) def _create_names_to_metrics(self, predictions, labels): - accuracy0, update_op0 = metric_ops.streaming_accuracy(predictions, labels) - accuracy1, update_op1 = metric_ops.streaming_accuracy(predictions + 1, - labels) + accuracy0, update_op0 = metrics.accuracy( + labels=labels, predictions=predictions) + accuracy1, update_op1 = metrics.accuracy( + labels=labels, predictions=predictions + 1) names_to_values = {'Accuracy': accuracy0, 'Another_accuracy': accuracy1} names_to_updates = {'Accuracy': update_op0, 'Another_accuracy': update_op1} @@ -198,8 +199,8 @@ class EvaluationTest(test.TestCase): predictions_limited = input.limit_epochs(self._predictions, num_epochs=1) labels_limited = input.limit_epochs(self._labels, num_epochs=1) - value_op, update_op = metric_ops.streaming_accuracy( - predictions_limited, labels_limited) + value_op, update_op = metrics.accuracy( + labels=labels_limited, predictions=predictions_limited) init_op = control_flow_ops.group(variables.global_variables_initializer(), variables.local_variables_initializer()) @@ -260,8 +261,8 @@ class SingleEvaluationTest(test.TestCase): self._prepareCheckpoint(checkpoint_path) # Next, determine the metric to evaluate: - value_op, update_op = metric_ops.streaming_accuracy(self._predictions, - self._labels) + value_op, update_op = metrics.accuracy( + labels=self._labels, predictions=self._predictions) # Run the evaluation and verify the results: accuracy_value = evaluation.evaluate_once( @@ -276,8 +277,8 @@ class SingleEvaluationTest(test.TestCase): self._prepareCheckpoint(checkpoint_path) # Next, determine the metric to evaluate: - value_op, update_op = metric_ops.streaming_accuracy(self._predictions, - self._labels) + value_op, update_op = metrics.accuracy( + labels=self._labels, predictions=self._predictions) dumping_root = os.path.join(self.get_temp_dir(), 'tfdbg_dump_dir') dumping_hook = hooks.DumpingDebugHook(dumping_root, log_usage=False) diff --git a/tensorflow/contrib/summary/summary.py b/tensorflow/contrib/summary/summary.py index 99ced53e11..d22b80ac88 100644 --- a/tensorflow/contrib/summary/summary.py +++ b/tensorflow/contrib/summary/summary.py @@ -21,6 +21,7 @@ from @{tf.summary.merge_all} to @{tf.summary.FileWriter}. To use with eager execution enabled, write your code as follows: +```python global_step = tf.train.get_or_create_global_step() summary_writer = tf.contrib.summary.create_file_writer( train_dir, flush_millis=10000) @@ -30,9 +31,11 @@ with summary_writer.as_default(), tf.contrib.summary.always_record_summaries(): tf.contrib.summary.scalar("loss", my_loss) # In this case every call to tf.contrib.summary.scalar will generate a record # ... +``` To use it with graph execution, write your code as follows: +```python global_step = tf.train.get_or_create_global_step() summary_writer = tf.contrib.summary.create_file_writer( train_dir, flush_millis=10000) @@ -53,7 +56,7 @@ with tf.Session(...) as sess: while not_done_training: sess.run([train_op, tf.contrib.summary.all_summary_ops()]) # ... - +``` """ from __future__ import absolute_import diff --git a/tensorflow/contrib/tensor_forest/client/eval_metrics.py b/tensorflow/contrib/tensor_forest/client/eval_metrics.py index e893e1d1c8..d8236a0a6f 100644 --- a/tensorflow/contrib/tensor_forest/client/eval_metrics.py +++ b/tensorflow/contrib/tensor_forest/client/eval_metrics.py @@ -21,10 +21,10 @@ import numpy as np from tensorflow.contrib import losses from tensorflow.contrib.learn.python.learn.estimators import prediction_key -from tensorflow.contrib.metrics.python.ops import metric_ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import metrics from tensorflow.python.ops import nn INFERENCE_PROB_NAME = prediction_key.PredictionKey.PROBABILITIES @@ -38,12 +38,13 @@ def _top_k_generator(k): targets = math_ops.to_int32(targets) if targets.get_shape().ndims > 1: targets = array_ops.squeeze(targets, axis=[1]) - return metric_ops.streaming_mean(nn.in_top_k(probabilities, targets, k)) + return metrics.mean(nn.in_top_k(probabilities, targets, k)) return _top_k def _accuracy(predictions, targets, weights=None): - return metric_ops.streaming_accuracy(predictions, targets, weights=weights) + return metrics.accuracy( + labels=targets, predictions=predictions, weights=weights) def _r2(probabilities, targets, weights=None): @@ -53,7 +54,7 @@ def _r2(probabilities, targets, weights=None): squares_residuals = math_ops.reduce_sum( math_ops.square(targets - probabilities), 0) score = 1 - math_ops.reduce_sum(squares_residuals / squares_total) - return metric_ops.streaming_mean(score, weights=weights) + return metrics.mean(score, weights=weights) def _squeeze_and_onehot(targets, depth): @@ -62,7 +63,7 @@ def _squeeze_and_onehot(targets, depth): def _sigmoid_entropy(probabilities, targets, weights=None): - return metric_ops.streaming_mean( + return metrics.mean( losses.sigmoid_cross_entropy(probabilities, _squeeze_and_onehot( targets, @@ -71,7 +72,7 @@ def _sigmoid_entropy(probabilities, targets, weights=None): def _softmax_entropy(probabilities, targets, weights=None): - return metric_ops.streaming_mean( + return metrics.mean( losses.sparse_softmax_cross_entropy(probabilities, math_ops.to_int32(targets)), weights=weights) @@ -82,7 +83,7 @@ def _predictions(predictions, unused_targets, **unused_kwargs): def _class_log_loss(probabilities, targets, weights=None): - return metric_ops.streaming_mean( + return metrics.mean( losses.log_loss(probabilities, _squeeze_and_onehot(targets, array_ops.shape(probabilities)[1])), @@ -90,34 +91,36 @@ def _class_log_loss(probabilities, targets, weights=None): def _precision(predictions, targets, weights=None): - return metric_ops.streaming_precision(predictions, targets, weights=weights) + return metrics.precision( + labels=targets, predictions=predictions, weights=weights) def _precision_at_thresholds(predictions, targets, weights=None): - return metric_ops.streaming_precision_at_thresholds( - array_ops.slice(predictions, [0, 1], [-1, 1]), - targets, - np.arange( - 0, 1, 0.01, dtype=np.float32), + return metrics.precision_at_thresholds( + labels=targets, + predictions=array_ops.slice(predictions, [0, 1], [-1, 1]), + thresholds=np.arange(0, 1, 0.01, dtype=np.float32), weights=weights) def _recall(predictions, targets, weights=None): - return metric_ops.streaming_recall(predictions, targets, weights=weights) + return metrics.recall( + labels=targets, predictions=predictions, weights=weights) def _recall_at_thresholds(predictions, targets, weights=None): - return metric_ops.streaming_recall_at_thresholds( - array_ops.slice(predictions, [0, 1], [-1, 1]), - targets, - np.arange( - 0, 1, 0.01, dtype=np.float32), + return metrics.recall_at_thresholds( + labels=targets, + predictions=array_ops.slice(predictions, [0, 1], [-1, 1]), + thresholds=np.arange(0, 1, 0.01, dtype=np.float32), weights=weights) def _auc(probs, targets, weights=None): - return metric_ops.streaming_auc(array_ops.slice(probs, [0, 1], [-1, 1]), - targets, weights=weights) + return metrics.auc( + labels=targets, + predictions=array_ops.slice(probs, [0, 1], [-1, 1]), + weights=weights) _EVAL_METRICS = { diff --git a/tensorflow/contrib/tensor_forest/python/tensor_forest.py b/tensorflow/contrib/tensor_forest/python/tensor_forest.py index 7a35a70bbe..6f62cd11a9 100644 --- a/tensorflow/contrib/tensor_forest/python/tensor_forest.py +++ b/tensorflow/contrib/tensor_forest/python/tensor_forest.py @@ -295,7 +295,7 @@ def get_epoch_variable(): # A simple container to hold the training variables for a single tree. -class TreeTrainingVariables(object): +class TreeVariables(object): """Stores tf.Variables for training a single random tree. Uses tf.get_variable to get tree-specific names so that this can be used @@ -303,7 +303,7 @@ class TreeTrainingVariables(object): then relies on restoring that model to evaluate). """ - def __init__(self, params, tree_num, training): + def __init__(self, params, tree_num, training, tree_config='', tree_stat=''): if (not hasattr(params, 'params_proto') or not isinstance(params.params_proto, _params_proto.TensorForestParams)): @@ -315,27 +315,28 @@ class TreeTrainingVariables(object): # TODO(gilberth): Manually shard this to be able to fit it on # multiple machines. self.stats = stats_ops.fertile_stats_variable( - params, '', self.get_tree_name('stats', tree_num)) + params, tree_stat, self.get_tree_name('stats', tree_num)) self.tree = model_ops.tree_variable( - params, '', self.stats, self.get_tree_name('tree', tree_num)) + params, tree_config, self.stats, self.get_tree_name('tree', tree_num)) def get_tree_name(self, name, num): return '{0}-{1}'.format(name, num) -class ForestTrainingVariables(object): +class ForestVariables(object): """A container for a forests training data, consisting of multiple trees. - Instantiates a TreeTrainingVariables object for each tree. We override the + Instantiates a TreeVariables object for each tree. We override the __getitem__ and __setitem__ function so that usage looks like this: - forest_variables = ForestTrainingVariables(params) + forest_variables = ForestVariables(params) ... forest_variables.tree ... """ def __init__(self, params, device_assigner, training=True, - tree_variables_class=TreeTrainingVariables): + tree_variables_class=TreeVariables, + tree_configs=None, tree_stats=None): self.variables = [] # Set up some scalar variables to run through the device assigner, then # we can use those to colocate everything related to a tree. @@ -347,7 +348,13 @@ class ForestTrainingVariables(object): for i in range(params.num_trees): with ops.device(self.device_dummies[i].device): - self.variables.append(tree_variables_class(params, i, training)) + kwargs = {} + if tree_configs is not None: + kwargs.update(dict(tree_config=tree_configs[i])) + if tree_stats is not None: + kwargs.update(dict(tree_stat=tree_stats[i])) + self.variables.append(tree_variables_class( + params, i, training, **kwargs)) def __setitem__(self, t, val): self.variables[t] = val @@ -361,9 +368,11 @@ class RandomForestGraphs(object): def __init__(self, params, + tree_configs=None, + tree_stats=None, device_assigner=None, variables=None, - tree_variables_class=TreeTrainingVariables, + tree_variables_class=TreeVariables, tree_graphs=None, training=True): self.params = params @@ -371,9 +380,10 @@ class RandomForestGraphs(object): device_assigner or framework_variables.VariableDeviceChooser()) logging.info('Constructing forest with params = ') logging.info(self.params.__dict__) - self.variables = variables or ForestTrainingVariables( + self.variables = variables or ForestVariables( self.params, device_assigner=self.device_assigner, training=training, - tree_variables_class=tree_variables_class) + tree_variables_class=tree_variables_class, + tree_configs=tree_configs, tree_stats=tree_stats) tree_graph_class = tree_graphs or RandomTreeGraphs self.trees = [ tree_graph_class(self.variables[i], self.params, i) diff --git a/tensorflow/contrib/tensor_forest/python/tensor_forest_test.py b/tensorflow/contrib/tensor_forest/python/tensor_forest_test.py index bbe627b157..1c9c81827e 100644 --- a/tensorflow/contrib/tensor_forest/python/tensor_forest_test.py +++ b/tensorflow/contrib/tensor_forest/python/tensor_forest_test.py @@ -18,10 +18,14 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from google.protobuf.json_format import ParseDict +from tensorflow.contrib.decision_trees.proto import generic_tree_model_pb2 as _tree_proto from tensorflow.contrib.tensor_forest.python import tensor_forest from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import test_util +from tensorflow.python.ops import resources +from tensorflow.python.ops import variables from tensorflow.python.platform import googletest @@ -110,6 +114,47 @@ class TensorForestTest(test_util.TensorFlowTestCase): self.assertTrue(isinstance(paths, ops.Tensor)) self.assertTrue(isinstance(var, ops.Tensor)) + def testInfrenceFromRestoredModel(self): + input_data = [[-1., 0.], [-1., 2.], # node 1 + [1., 0.], [1., -2.]] # node 2 + expected_prediction = [[0.0, 1.0], [0.0, 1.0], + [0.0, 1.0], [0.0, 1.0]] + hparams = tensor_forest.ForestHParams( + num_classes=2, + num_features=2, + num_trees=1, + max_nodes=1000, + split_after_samples=25).fill() + tree_weight = {'decisionTree': + {'nodes': + [{'binaryNode': + {'rightChildId': 2, + 'leftChildId': 1, + 'inequalityLeftChildTest': + {'featureId': {'id': '0'}, + 'threshold': {'floatValue': 0}}}}, + {'leaf': {'vector': + {'value': [{'floatValue': 0.0}, + {'floatValue': 1.0}]}}, + 'nodeId': 1}, + {'leaf': {'vector': + {'value': [{'floatValue': 0.0}, + {'floatValue': 1.0}]}}, + 'nodeId': 2}]}} + restored_tree_param = ParseDict(tree_weight, + _tree_proto.Model()).SerializeToString() + graph_builder = tensor_forest.RandomForestGraphs(hparams, + [restored_tree_param]) + probs, paths, var = graph_builder.inference_graph(input_data) + self.assertTrue(isinstance(probs, ops.Tensor)) + self.assertTrue(isinstance(paths, ops.Tensor)) + self.assertTrue(isinstance(var, ops.Tensor)) + with self.test_session(): + variables.global_variables_initializer().run() + resources.initialize_resources(resources.shared_resources()).run() + self.assertEquals(probs.eval().shape, (4, 2)) + self.assertEquals(probs.eval().tolist(), expected_prediction) + def testTrainingConstructionClassificationSparse(self): input_data = sparse_tensor.SparseTensor( indices=[[0, 0], [0, 3], [1, 0], [1, 7], [2, 1], [3, 9]], diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index b7b26cfb1c..da4dd5a14c 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -91,8 +91,11 @@ void GetSubGraphIncomingEdges(const tensorflow::Graph& graph, if (!subgraph_node_ids.count(edge->src()->id()) && !edge->src()->IsSource() && !edge->IsControlEdge()) { incoming_edges->insert(edge); + VLOG(2) << "INCOMING " << edge->src()->name() << " -> " << node->name() + << " Y, "; } else { - VLOG(2) << node->name() << " -> " << edge->src()->name() << " N, "; + VLOG(2) << "INCOMING " << edge->src()->name() << " -> " << node->name() + << " N, "; } } } @@ -106,10 +109,12 @@ void GetSubGraphOutgoingEdges(const tensorflow::Graph& graph, for (const tensorflow::Edge* edge : node->out_edges()) { if (!subgraph_node_ids.count(edge->dst()->id()) && !edge->dst()->IsSink() && !edge->IsControlEdge()) { - VLOG(2) << node->name() << " -> " << edge->dst()->name() << " Y, "; + VLOG(2) << "OUTGOING " << node->name() << " -> " << edge->dst()->name() + << " Y, "; outgoing_edges->insert(edge); } else { - VLOG(2) << node->name() << " -> " << edge->dst()->name() << " N, "; + VLOG(2) << "OUTGOING " << node->name() << " -> " << edge->dst()->name() + << " N, "; } } } @@ -181,29 +186,27 @@ struct ConvertGraphParams { static tensorflow::Status FillSubGraphEdgeSets(ConvertGraphParams* p) { GetSubGraphIncomingEdges(p->graph, p->subgraph_node_ids, &p->subgraph_incoming_edges); + + std::set> unique_tensors; + // Add only unique input source nodes. If output of an outside node is shared + // between multiple nodes inside the engine, only one edge should be created for (const tensorflow::Edge* edge : p->subgraph_incoming_edges) { - p->subgraph_inputs.push_back({edge->src()->id(), edge->src_output()}); - } - auto output_name_to_index_map = BuildTensorNameMap(p->output_names); - std::set> subgraph_outputs_set; - // Collect outputs referenced from output_names - for (int node_id : p->subgraph_node_ids) { - tensorflow::Node* node = p->graph.FindNodeId(node_id); - if (output_name_to_index_map.count(node->name())) { - for (int index : output_name_to_index_map.at(node->name())) { - subgraph_outputs_set.insert({node_id, index}); - } - } + unique_tensors.insert({edge->src()->id(), edge->src_output()}); } + p->subgraph_inputs.insert(p->subgraph_inputs.begin(), unique_tensors.begin(), + unique_tensors.end()); GetSubGraphOutgoingEdges(p->graph, p->subgraph_node_ids, &p->subgraph_outgoing_edges); + unique_tensors.clear(); + // Similar to above, if multiple ouside nodes are sharing the output of an + // internal node only one output port should be created and shared between + // outputs for (const tensorflow::Edge* edge : p->subgraph_outgoing_edges) { - subgraph_outputs_set.insert({edge->src()->id(), edge->src_output()}); + unique_tensors.insert({edge->src()->id(), edge->src_output()}); } - p->subgraph_outputs.reserve(subgraph_outputs_set.size()); + p->subgraph_outputs.reserve(unique_tensors.size()); p->subgraph_outputs.insert(p->subgraph_outputs.begin(), - subgraph_outputs_set.begin(), - subgraph_outputs_set.end()); + unique_tensors.begin(), unique_tensors.end()); return tensorflow::Status::OK(); } @@ -225,7 +228,6 @@ tensorflow::Status GetCalibNode(ConvertGraphParams* params) { for (auto in_edge : params->subgraph_incoming_edges) { // loop over incoming edges and // attach them to calib node - // tensorflow::Node* src_node = in_edge->src(); auto src_output = in_edge->src_output(); auto dst_node = in_edge->dst(); auto dst_input = in_edge->dst_input(); @@ -257,19 +259,24 @@ tensorflow::Status ConvertSubGraphToTensorRT(ConvertGraphParams* params) { for (size_t i = 0; i < params->subgraph_inputs.size(); ++i) { subgraph_edge_to_input_map.insert({params->subgraph_inputs.at(i), i}); } + std::set> unique_tensors; for (const tensorflow::Edge* edge : params->subgraph_incoming_edges) { std::pair old_src = {edge->src()->id(), edge->src_output()}; + if (unique_tensors.count(old_src)) continue; + unique_tensors.insert(old_src); int new_src_output = subgraph_edge_to_input_map.at(old_src); params->graph.AddEdge(edge->src(), edge->src_output(), trt_node, new_src_output); + VLOG(1) << "Wire " << edge->src()->name() << ":" << edge->src_output() + << " -> " << trt_node->name() << ":" << new_src_output; params->graph.RemoveEdge(edge); } - - VLOG(2) << "new wiring edges: " << trt_node->in_edges().size(); - for (const tensorflow::Edge* edge : trt_node->in_edges()) { - VLOG(2) << edge->src()->name() << " port: " << edge->src_output(); + if (VLOG_IS_ON(2)) { + VLOG(2) << "new edge count: " << trt_node->in_edges().size(); + for (const tensorflow::Edge* edge : trt_node->in_edges()) { + VLOG(2) << edge->src()->name() << " port: " << edge->src_output(); + } } - TF_RETURN_IF_ERROR(status); // Re-map outgoing edges to use the new TRT node instead of the orig subgraph @@ -283,6 +290,8 @@ tensorflow::Status ConvertSubGraphToTensorRT(ConvertGraphParams* params) { int new_src_output = subgraph_edge_to_output_map.at(old_src); TF_RETURN_IF_ERROR(params->graph.UpdateEdge( trt_node, new_src_output, edge->dst(), edge->dst_input())); + VLOG(1) << "Wire " << trt_node->name() << ":" << new_src_output << " -> " + << edge->dst()->name() << ":" << edge->dst_input(); } // Remove the original subgraph for (int node_id : params->subgraph_node_ids) { @@ -317,9 +326,12 @@ tensorflow::Status ConvertCalibGraphToInferGraph( tensorflow::GraphConstructorOptions(), graph_def, &graph)); // get calib nodes std::vector calib_nodes; - for (auto node : graph.op_nodes()) { + std::vector topo_order; + tensorflow::GetPostOrder(graph, &topo_order); + for (auto rit = topo_order.rbegin(); rit != topo_order.rend(); ++rit) { + auto node = *rit; if (node->type_string() == "TRTCalibOp") { - VLOG(1) << "Found Calib Node"; + VLOG(1) << "Found Calib Node " << node->name(); calib_nodes.push_back(node); } } diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 96e0700862..4e4d295538 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -362,10 +362,11 @@ void ReorderCKtoKC(const TRT_ShapedWeights& iweights, break; } case tensorflow::DataType::DT_HALF: { - Reorder2({k, c}, static_cast(iweights.GetValues()), - istrides, static_cast( - const_cast(oweights->GetValues())), - ostrides); + Reorder2( + {k, c}, static_cast(iweights.GetValues()), + istrides, + static_cast(const_cast(oweights->GetValues())), + ostrides); break; } default: @@ -1179,9 +1180,9 @@ tensorflow::Status BinaryTensorOpTensor( CHECK_EQ_TYPE(tensor_r->getType(), dtype); auto op_pair = ops.find(node_def.op()); if (op_pair == ops.end()) - return tensorflow::errors::Unimplemented("binary op: " + node_def.op() + - " not supported at: " + - node_def.name()); + return tensorflow::errors::Unimplemented( + "binary op: " + node_def.op() + + " not supported at: " + node_def.name()); nvinfer1::IElementWiseLayer* layer = ctx.network()->addElementWise( *const_cast(tensor_l), @@ -2138,9 +2139,7 @@ void Converter::register_op_converters() { } } // namespace -tensorflow::Status GetTensorRTGraph(tensorrt::convert::SubGraphParams& s) { - return tensorflow::errors::Unimplemented("Not implemented yet"); -} + tensorflow::Status ConvertCalibrationNodeToEngineNode( tensorflow::Graph& graph, tensorflow::Node* c_node) { const auto ndef = c_node->def(); @@ -2164,9 +2163,23 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode( for (auto n : graph.op_nodes()) { node_maps.insert({n->name(), n}); } + std::set subgraph_ids; + for (const auto internal_node : segment_nodes) { + subgraph_ids.insert(node_maps.at(internal_node)->id()); + } + if (VLOG_IS_ON(2)) { + string node_names = StrCat(c_node->name(), " segment nodes= "); + + for (const auto& node_name : segment_nodes) { + StrAppend(&node_names, node_name, ", "); + } + VLOG(2) << node_names; + } + VLOG(1) << "Output Nodes:"; std::vector out_types; std::vector out_edges; + for (auto& i : output_nodes) { auto node_port = tensorflow::str_util::Split(i, ":"); VLOG(1) << " " << i << " in graph " << node_maps.count(i); @@ -2186,18 +2199,24 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode( out_types.push_back(out_node->output_type(0)); } for (auto out_edge : out_node->out_edges()) { + if (subgraph_ids.count(out_edge->dst()->id())) + continue; // skip internal edges; if (out_edge->src_output() == port) { out_edges.push_back(out_edge); - break; + VLOG(1) << "OUTPUT EDGE " << out_edge->src()->name() << ":" + << out_edge->src_output() << " -> " << out_edge->dst()->name() + << ":" << out_edge->dst_input(); } } } else { LOG(WARNING) << " couldn't find output node " << out_node_name; } } - VLOG(1) << "Input Nodes:"; - for (auto& i : input_names) { - VLOG(1) << " " << i << " in graph " << node_maps.count(i); + if (VLOG_IS_ON(1)) { + VLOG(1) << c_node->name() << " Input Nodes:"; + for (auto& i : input_names) { + VLOG(1) << " Input " << i << " in graph " << node_maps.count(i); + } } auto trt_rm = tensorflow::tensorrt::TRTResourceManager::instance(); auto resmgr = trt_rm->getManager("TRTCalibOps"); @@ -2231,14 +2250,24 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode( calib_res->builder_ = nullptr; tensorflow::NodeDefBuilder op_builder(engine_name, "TRTEngineOp"); std::vector income_edges; + income_edges.resize(c_node->num_inputs()); for (const auto in_edge : c_node->in_edges()) { auto src = in_edge->src(); int dest_port = in_edge->dst_input(); - income_edges.emplace_back(src->name(), in_edge->src_output(), - c_node->input_type(dest_port)); + VLOG(1) << "Incoming connection " << src->name() << ":" + << in_edge->src_output() << " -> " << c_node->name() << ":" + << dest_port; + income_edges.at(dest_port) = {src->name(), in_edge->src_output(), + c_node->input_type(dest_port)}; } tensorflow::gtl::ArraySlice input_list( income_edges); + if (VLOG_IS_ON(2)) { + for (const auto& inp : input_list) { + VLOG(2) << " Input from inputlist " << inp.node << ":" << inp.index << " " + << tensorflow::DataTypeString(inp.data_type); + } + } op_builder.Input(input_list); tensorflow::NodeDef engine_node; const char* engine_plan_data = static_cast(engine_plan->data()); @@ -2255,13 +2284,26 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode( } auto trt_engine_node = graph.AddNode(engine_node, &status); TF_RETURN_IF_ERROR(status); - for (size_t i = 0; i < out_edges.size(); i++) { - VLOG(1) << "Connecting trt_engine_node output " << i << " with " - << out_edges.at(i)->dst()->name() << " port " - << out_edges.at(i)->dst_input(); - TF_RETURN_IF_ERROR(graph.UpdateEdge(trt_engine_node, i, - out_edges.at(i)->dst(), - out_edges.at(i)->dst_input())); + std::map port_map; + for (size_t t = 0; t < output_nodes.size(); t++) { + port_map.insert({output_nodes.at(t), t}); + } + for (auto& i : out_edges) { + string s(i->src()->name()); + if (i->src_output()) StrAppend(&s, ":", i->src_output()); + int out_port = port_map.at(s); + VLOG(1) << "Connecting " << trt_engine_node->name() << ":" << out_port + << " -> " << i->dst()->name() << ":" << i->dst_input(); + TF_RETURN_IF_ERROR( + graph.UpdateEdge(trt_engine_node, out_port, i->dst(), i->dst_input())); + } + for (const auto ed : trt_engine_node->in_edges()) { + VLOG(1) << "In Edge " << ed->src()->name() << ":" << ed->src_output() + << " -> " << ed->dst()->name() << ":" << ed->dst_input(); + } + for (const auto ed : trt_engine_node->out_edges()) { + VLOG(1) << "Out Edge " << ed->src()->name() << ":" << ed->src_output() + << " -> " << ed->dst()->name() << ":" << ed->dst_input(); } VLOG(1) << "Segment nodes:"; for (auto& i : segment_nodes) { @@ -2332,6 +2374,7 @@ tensorflow::Status ConvertSubgraph( std::vector* output_names, std::vector* output_dtypes, const string& engine_name) { + std::set added_tensors; for (const std::pair& input : s.input_inds) { VLOG(2) << "parsing input. Node id= " << input.first; int node_id = input.first; @@ -2374,7 +2417,6 @@ tensorflow::Status ConvertSubgraph( auto op_info = op_info_vec.at(shape_inference_output_idx); tensorflow::DataType tf_dtype = op_info.dtype(); - input_dtypes->push_back(tf_dtype); nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); auto type_status = ConvertDType(tf_dtype, &dtype); @@ -2410,8 +2452,10 @@ tensorflow::Status ConvertSubgraph( if (output_idx != 0) { input_tensor_name = StrCat(node_name, ":", output_idx); } - + if (added_tensors.count(input_tensor_name)) continue; + added_tensors.insert(input_tensor_name); input_names->push_back(input_tensor_name); + input_dtypes->push_back(tf_dtype); nvinfer1::ITensor* input_tensor = converter.network()->addInput( input_tensor_name.c_str(), dtype, input_dim_pseudo_chw); @@ -2435,6 +2479,7 @@ tensorflow::Status ConvertSubgraph( // Gather output metadata int trt_engine_op_output_idx = 0; + added_tensors.clear(); for (const std::pair& output : s.output_inds) { int node_id = output.first; int output_idx = output.second; @@ -2451,6 +2496,8 @@ tensorflow::Status ConvertSubgraph( if (output_idx != 0) tensorflow::strings::StrAppend(&tensor_name, ":", output_idx); VLOG(2) << "Output tensor name: " << tensor_name; + if (added_tensors.count(tensor_name)) continue; + added_tensors.insert(tensor_name); output_names->push_back(tensor_name); auto tensor_or_weights = converter.get_tensor(tensor_name); if (!tensor_or_weights.is_tensor()) { diff --git a/tensorflow/contrib/tpu/python/tpu/datasets.py b/tensorflow/contrib/tpu/python/tpu/datasets.py index 2e472a2805..d879170b68 100644 --- a/tensorflow/contrib/tpu/python/tpu/datasets.py +++ b/tensorflow/contrib/tpu/python/tpu/datasets.py @@ -166,11 +166,21 @@ def StreamingFilesDataset(files, return remote_iterator.get_next() def MapFn(unused_input): - return functional_ops.remote_call( + if isinstance(source_dataset.output_types, dtypes.DType): + output_types = [source_dataset.output_types] + elif isinstance(source_dataset.output_types, (list, tuple)): + output_types = source_dataset.output_types + else: + raise ValueError('source dataset has invalid output types') + remote_calls = functional_ops.remote_call( args=[source_handle], - Tout=[dtypes.string], + Tout=output_types, f=LoadingFunc, - target='/job:%s/replica:0/task:0/cpu:0' % file_reader_job)[0] + target='/job:%s/replica:0/task:0/cpu:0' % file_reader_job) + if len(remote_calls) == 1: + return remote_calls[0] + else: + return remote_calls with ops.device('/job:%s' % worker_job): output_dataset = dataset_ops.Dataset.range(2).repeat().map( diff --git a/tensorflow/contrib/tpu/python/tpu/datasets_test.py b/tensorflow/contrib/tpu/python/tpu/datasets_test.py index 918cf0ed8e..b58d05eac5 100644 --- a/tensorflow/contrib/tpu/python/tpu/datasets_test.py +++ b/tensorflow/contrib/tpu/python/tpu/datasets_test.py @@ -26,6 +26,8 @@ from tensorflow.core.protobuf import config_pb2 from tensorflow.python.client import session from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.ops import readers +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import tensor_shape from tensorflow.python.lib.io import python_io from tensorflow.python.platform import test from tensorflow.python.training import server_lib @@ -162,6 +164,30 @@ class DatasetsTest(test.TestCase): self.assertEqual(set(all_contents), set(retrieved_values)) + def testArbitraryReaderFuncFromDatasetGenerator(self): + + def my_generator(): + yield (1, [1] * 10) + + def gen_dataset(dummy): + return dataset_ops.Dataset.from_generator( + my_generator, (dtypes.int64, dtypes.int64), + (tensor_shape.TensorShape([]), tensor_shape.TensorShape([10]))) + + dataset = datasets.StreamingFilesDataset( + dataset_ops.Dataset.range(10), filetype=gen_dataset) + + iterator = dataset.make_initializable_iterator() + self._sess.run(iterator.initializer) + get_next = iterator.get_next() + + retrieved_values = self._sess.run(get_next) + + self.assertIsInstance(retrieved_values, (list, tuple)) + self.assertEqual(len(retrieved_values), 2) + self.assertEqual(retrieved_values[0], 1) + self.assertItemsEqual(retrieved_values[1], [1] * 10) + def testUnexpectedFiletypeString(self): with self.assertRaises(ValueError): datasets.StreamingFilesDataset( diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index d89633199d..b1c224a345 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -699,7 +699,9 @@ cc_library( srcs = ["platform/stacktrace_handler.cc"], hdrs = ["platform/stacktrace_handler.h"], deps = [ + ":abi", ":lib_platform", + ":stacktrace", ], ) @@ -3089,6 +3091,8 @@ cc_library( # we now need at least "str_util". ":lib", ":lib_platform", + ":stacktrace_handler", + ":test_lite", "//tensorflow/core/platform/default/build_config:test_lite_main", ], alwayslink = 1, @@ -3569,7 +3573,10 @@ tf_cc_tests_gpu( tf_cc_test_mkl( name = "mkl_runtime_tests", size = "small", - srcs = ["common_runtime/mkl_cpu_allocator_test.cc"], + srcs = [ + "common_runtime/mkl_cpu_allocator_test.cc", + "common_runtime/mkl_threadpool_device_test.cc", + ], linkstatic = 1, deps = [ ":core", diff --git a/tensorflow/core/api_def/base_api/api_def_Selu.pbtxt b/tensorflow/core/api_def/base_api/api_def_Selu.pbtxt index cbe76de415..985f09312f 100644 --- a/tensorflow/core/api_def/base_api/api_def_Selu.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_Selu.pbtxt @@ -4,6 +4,10 @@ op { description: < 0`, limit of the split of the result. +END + } + summary: "Split elements of `source` based on `sep` into a `SparseTensor`." + description: <2<><>3"` and +sep of `"<>"` returns `["1", "2", "", "3"]`. If `sep` is None or an empty +string, consecutive whitespace are regarded as a single separator, and the +result will contain no empty strings at the startor end if the string has +leading or trailing whitespace. + +Note that the above mentioned behavior matches python's str.split. +END +} diff --git a/tensorflow/core/api_def/python_api/api_def_StringSplitV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_StringSplitV2.pbtxt new file mode 100644 index 0000000000..0e8576fb01 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_StringSplitV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "StringSplitV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/common_runtime/bfc_allocator.cc b/tensorflow/core/common_runtime/bfc_allocator.cc index 8f2a419756..9cda17867b 100644 --- a/tensorflow/core/common_runtime/bfc_allocator.cc +++ b/tensorflow/core/common_runtime/bfc_allocator.cc @@ -86,7 +86,7 @@ BFCAllocator::Chunk* BFCAllocator::ChunkFromHandle(ChunkHandle h) { return &(chunks_[h]); } -bool BFCAllocator::Extend(size_t rounded_bytes) { +bool BFCAllocator::Extend(size_t alignment, size_t rounded_bytes) { size_t available_bytes = memory_limit_ - total_region_allocated_bytes_; // Rounds available_bytes down to the nearest multiple of kMinAllocationSize. available_bytes = (available_bytes / kMinAllocationSize) * kMinAllocationSize; @@ -108,7 +108,7 @@ bool BFCAllocator::Extend(size_t rounded_bytes) { // Try allocating. size_t bytes = std::min(curr_region_allocation_bytes_, available_bytes); - void* mem_addr = suballocator_->Alloc(32, bytes); + void* mem_addr = suballocator_->Alloc(alignment, bytes); if (mem_addr == nullptr && !started_backpedal_) { // Only backpedal once. started_backpedal_ = true; @@ -119,7 +119,7 @@ bool BFCAllocator::Extend(size_t rounded_bytes) { while (mem_addr == nullptr) { bytes = RoundedBytes(bytes * kBackpedalFactor); if (bytes < rounded_bytes) break; - mem_addr = suballocator_->Alloc(32, bytes); + mem_addr = suballocator_->Alloc(alignment, bytes); } } @@ -261,7 +261,7 @@ void* BFCAllocator::AllocateRawInternal(size_t unused_alignment, } // Try to extend - if (Extend(rounded_bytes)) { + if (Extend(unused_alignment, rounded_bytes)) { ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes); if (ptr != nullptr) { return ptr; diff --git a/tensorflow/core/common_runtime/bfc_allocator.h b/tensorflow/core/common_runtime/bfc_allocator.h index ba5a3eea3a..52aedb1e9c 100644 --- a/tensorflow/core/common_runtime/bfc_allocator.h +++ b/tensorflow/core/common_runtime/bfc_allocator.h @@ -305,7 +305,8 @@ class BFCAllocator : public VisitableAllocator { // Try to add a new memory region that can satisfy an allocation of // 'rounded_bytes' bytes. Returns true on success and false on // failure. - bool Extend(size_t rounded_bytes) EXCLUSIVE_LOCKS_REQUIRED(lock_); + bool Extend(size_t alignment, size_t rounded_bytes) + EXCLUSIVE_LOCKS_REQUIRED(lock_); // Returns a pointer to an underlying allocated chunk of size // 'rounded_bytes'. diff --git a/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc b/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc index c21a1ea9f2..9028e6298c 100644 --- a/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc +++ b/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc @@ -102,9 +102,25 @@ TEST(DirectSessionWithTrackingAllocTest, CostModelTest) { EXPECT_EQ(2, shape.dim(0).size()); EXPECT_EQ(1, shape.dim(1).size()); if (node->name() == y->name()) { +#ifdef INTEL_MKL + // if MKL is used, it goes through various additional + // graph rewrite pass. In TF, everytime a graph pass + // happens, "constant" nodes are allocated + // and deallocated. Each allocation calls the + // (FindChunkPtr of BFCAllocator), + // which increments the value of AllocationId. + // Thus AllocationId becomes more than 3 and 4 if + // MKL is used. Now they are 9 and 10 for MKL. + EXPECT_EQ(19, cm->AllocationId(node, 0)); +#else EXPECT_EQ(21, cm->AllocationId(node, 0)); +#endif } else { +#ifdef INTEL_MKL + EXPECT_EQ(20, cm->AllocationId(node, 0)); +#else EXPECT_EQ(22, cm->AllocationId(node, 0)); +#endif } } EXPECT_LE(0, cm->MaxExecutionTime(node)); diff --git a/tensorflow/core/common_runtime/mkl_threadpool_device_test.cc b/tensorflow/core/common_runtime/mkl_threadpool_device_test.cc new file mode 100644 index 0000000000..5d583a8360 --- /dev/null +++ b/tensorflow/core/common_runtime/mkl_threadpool_device_test.cc @@ -0,0 +1,53 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifdef INTEL_MKL + +#include "tensorflow/core/common_runtime/threadpool_device.h" + +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/cpu_info.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/public/session_options.h" + +namespace tensorflow { + +#ifdef _OPENMP +TEST(MKLThreadPoolDeviceTest, TestOmpDefaults) { + SessionOptions options; + unsetenv("OMP_NUM_THREADS"); + + ThreadPoolDevice* tp = new ThreadPoolDevice( + options, "/device:CPU:0", Bytes(256), DeviceLocality(), cpu_allocator()); + + const int ht = port::NumHyperthreadsPerCore(); + EXPECT_EQ(omp_get_max_threads(), (port::NumSchedulableCPUs() + ht - 1) / ht); +} + +TEST(MKLThreadPoolDeviceTest, TestOmpPreSets) { + SessionOptions options; + setenv("OMP_NUM_THREADS", "314", 1); + + ThreadPoolDevice* tp = new ThreadPoolDevice( + options, "/device:CPU:0", Bytes(256), DeviceLocality(), cpu_allocator()); + + EXPECT_EQ(omp_get_max_threads(), 314); +} +#endif // _OPENMP + +} // namespace tensorflow + +#endif // INTEL_MKL diff --git a/tensorflow/core/common_runtime/process_util.cc b/tensorflow/core/common_runtime/process_util.cc index 21912236d0..a5d31b75c7 100644 --- a/tensorflow/core/common_runtime/process_util.cc +++ b/tensorflow/core/common_runtime/process_util.cc @@ -16,8 +16,10 @@ limitations under the License. #include "tensorflow/core/common_runtime/process_util.h" #ifdef INTEL_MKL +#ifdef _OPENMP #include -#endif +#endif // _OPENMP +#endif // INTEL_MKL #include #include "tensorflow/core/lib/core/threadpool.h" @@ -57,7 +59,10 @@ int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) { // MKL library executes ops in parallel using OMP threads // Set inter_op conservatively to avoid thread oversubscription that could // lead to severe perf degradations and OMP resource exhaustion - const int mkl_intra_op = omp_get_max_threads(); + int mkl_intra_op = 1; +#ifdef _OPENMP + mkl_intra_op = omp_get_max_threads(); +#endif // _OPENMP CHECK_GE(mkl_intra_op, 1); const int32 mkl_inter_op = std::max( (port::NumSchedulableCPUs() + mkl_intra_op - 1) / mkl_intra_op, 2); @@ -68,7 +73,7 @@ int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) { #else // Default to using the number of cores available in the process. return port::NumSchedulableCPUs(); -#endif +#endif // INTEL_MKL } thread::ThreadPool* NewThreadPoolFromSessionOptions( diff --git a/tensorflow/core/common_runtime/threadpool_device.cc b/tensorflow/core/common_runtime/threadpool_device.cc index f7a07fe503..74a87215e1 100644 --- a/tensorflow/core/common_runtime/threadpool_device.cc +++ b/tensorflow/core/common_runtime/threadpool_device.cc @@ -31,7 +31,11 @@ limitations under the License. #include "tensorflow/core/public/session_options.h" #ifdef INTEL_MKL +#ifdef _OPENMP +#include +#endif #include "tensorflow/core/common_runtime/mkl_cpu_allocator.h" +#include "tensorflow/core/platform/cpu_info.h" #endif namespace tensorflow { @@ -43,7 +47,26 @@ ThreadPoolDevice::ThreadPoolDevice(const SessionOptions& options, : LocalDevice(options, Device::BuildDeviceAttributes( name, DEVICE_CPU, memory_limit, locality)), allocator_(allocator), - scoped_allocator_mgr_(new ScopedAllocatorMgr(name)) {} + scoped_allocator_mgr_(new ScopedAllocatorMgr(name)) { +#ifdef INTEL_MKL +#ifdef _OPENMP + const char* user_omp_threads = getenv("OMP_NUM_THREADS"); + if (user_omp_threads == nullptr) { + // OMP_NUM_THREADS controls MKL's intra-op parallelization + // Default to available physical cores + const int mkl_intra_op = port::NumSchedulableCPUs(); + const int ht = port::NumHyperthreadsPerCore(); + omp_set_num_threads((mkl_intra_op + ht - 1) / ht); + } else { + uint64 user_val = 0; + if (strings::safe_strtou64(user_omp_threads, &user_val)) { + // Superflous but triggers OpenMP loading + omp_set_num_threads(user_val); + } + } +#endif // _OPENMP +#endif // INTEL_MKL +} ThreadPoolDevice::~ThreadPoolDevice() {} diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.cc b/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.cc index 1cea1b1462..770a0fcf14 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.cc @@ -147,7 +147,9 @@ MasterService::Stub::Stub( } MasterService::AsyncService::AsyncService() { - for (int i = 0; i < 10; ++i) { + int method_len = sizeof(grpcMasterService_method_names) / + sizeof(grpcMasterService_method_names[0]); + for (int i = 0; i < method_len; ++i) { AddMethod(new ::grpc::internal::RpcServiceMethod( grpcMasterService_method_names[i], ::grpc::internal::RpcMethod::NORMAL_RPC, nullptr)); diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_testlib.cc b/tensorflow/core/distributed_runtime/rpc/grpc_testlib.cc index 89f83f9f24..a8508d2d4f 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_testlib.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_testlib.cc @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/core/distributed_runtime/rpc/grpc_session.h" #include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/env.h" #include "tensorflow/core/util/device_name_utils.h" namespace tensorflow { @@ -50,9 +51,14 @@ Status TestCluster::MakeTestCluster(const SessionOptions& options, int n, } for (int i = 0; i < n; ++i) { + string server_file = + strings::StrCat(testing::TensorFlowSrcRoot(), + "/core/distributed_runtime/rpc/grpc_testlib_server"); + if (!options.env->FileExists(server_file).ok()) { + return errors::Internal("Could not find grpc_testlib_server"); + } const std::vector argv( - {strings::StrCat(testing::TensorFlowSrcRoot(), - "/core/distributed_runtime/rpc/grpc_testlib_server"), + {server_file, /* see grpc_testlib_server.cc for flags */ tf_jobs, "--tf_job=localhost", strings::StrCat("--tf_task=", i), strings::StrCat("--num_cpus=", num_cpus), diff --git a/tensorflow/core/framework/allocator.h b/tensorflow/core/framework/allocator.h index 2c87156dca..2bb4d32d57 100644 --- a/tensorflow/core/framework/allocator.h +++ b/tensorflow/core/framework/allocator.h @@ -67,13 +67,8 @@ struct AllocatorStats { // device memory. class Allocator { public: -#ifdef EIGEN_VECTORIZE_AVX512 // Align to 64 byte boundary. static constexpr size_t kAllocatorAlignment = 64; -#else - // Align to 32 byte boundary. - static constexpr size_t kAllocatorAlignment = 32; -#endif virtual ~Allocator(); diff --git a/tensorflow/core/framework/op_gen_lib.cc b/tensorflow/core/framework/op_gen_lib.cc index 3d7920a6e2..4b56d807df 100644 --- a/tensorflow/core/framework/op_gen_lib.cc +++ b/tensorflow/core/framework/op_gen_lib.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/core/framework/op_gen_lib.h" +#include #include #include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/lib/core/errors.h" diff --git a/tensorflow/core/framework/remote_fused_graph_execute_info.proto b/tensorflow/core/framework/remote_fused_graph_execute_info.proto index eb689ec1e6..10072724d2 100644 --- a/tensorflow/core/framework/remote_fused_graph_execute_info.proto +++ b/tensorflow/core/framework/remote_fused_graph_execute_info.proto @@ -5,7 +5,7 @@ option cc_enable_arenas = true; option java_outer_classname = "RemoteFusedGraphExecuteInfoProto"; option java_multiple_files = true; option java_package = "org.tensorflow.framework"; -//add go_package externally +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework"; import "tensorflow/core/framework/graph.proto"; import "tensorflow/core/framework/tensor_shape.proto"; import "tensorflow/core/framework/types.proto"; diff --git a/tensorflow/core/framework/tensor_test.cc b/tensorflow/core/framework/tensor_test.cc index b613effd18..80e168df97 100644 --- a/tensorflow/core/framework/tensor_test.cc +++ b/tensorflow/core/framework/tensor_test.cc @@ -1147,29 +1147,29 @@ TEST(Tensor, FailureToAllocate) { // On the alignment. // -// As of 2015/8, tensorflow::Tensor allocates its buffer with 32-byte +// As of 2018/5, tensorflow::Tensor allocates its buffer with 64-byte // alignment. Tensor::tensor/flat/vec/matrix methods requires the // buffer satisfies Eigen::Aligned (e.g., 16-bytes aligned usually, -// and 32-bytes for AVX). Tensor::Slice requires the caller to ensure -// its result is aligned if the caller intends to use those methods. -// In this test case, we simply make sure each slice is 32-byte -// aligned: sizeof(float) * 4 * 2 = 32. +// 32-bytes for AVX, and 64-bytes for AVX512). Tensor::Slice requires +// the caller to ensure its result is aligned if the caller intends +// to use those methods. In this test case, we simply make sure each +// slice is 64-byte aligned: sizeof(float) * 4 * 36 = 576. 576 % 64 = 0. TEST(Tensor, Slice_Basic) { Tensor saved; { // General - Tensor x(DT_FLOAT, TensorShape({10, 4, 34})); + Tensor x(DT_FLOAT, TensorShape({10, 4, 36})); // Fills in known values. for (int i = 0; i < 10; ++i) { x.Slice(i, i + 1).flat().setConstant(i * 1.f); } // A simple slice along dim0. Tensor y = x.Slice(4, 8); - EXPECT_TRUE(y.shape().IsSameSize(TensorShape({4, 4, 34}))); + EXPECT_TRUE(y.shape().IsSameSize(TensorShape({4, 4, 36}))); auto tx = x.tensor(); auto ty = y.tensor(); for (int i = 0; i < 4; ++i) { for (int j = 0; j < 4; ++j) { - for (int k = 0; k < 34; ++k) { + for (int k = 0; k < 36; ++k) { EXPECT_EQ(ty(i, j, k), 4.0 + i); EXPECT_EQ(&tx(4 + i, j, k), &ty(i, j, k)); } @@ -1186,7 +1186,7 @@ TEST(Tensor, Slice_Basic) { auto tz = z.tensor(); EXPECT_EQ(1, z.dim_size(0)); for (int j = 0; j < 4; ++j) { - for (int k = 0; k < 34; ++k) { + for (int k = 0; k < 36; ++k) { EXPECT_EQ(tz(0, j, k), 6.0); } } @@ -1198,16 +1198,16 @@ TEST(Tensor, Slice_Basic) { EXPECT_EQ(1, saved.dim_size(0)); auto tsaved = saved.tensor(); for (int j = 0; j < 4; ++j) { - for (int k = 0; k < 34; ++k) { + for (int k = 0; k < 36; ++k) { EXPECT_EQ(tsaved(0, j, k), 6.0); } } } { // Empty - Tensor x(DT_FLOAT, TensorShape({10, 0, 34})); + Tensor x(DT_FLOAT, TensorShape({10, 0, 36})); x.flat().setRandom(); Tensor y = x.Slice(4, 8); - EXPECT_TRUE(y.shape().IsSameSize(TensorShape({4, 0, 34}))); + EXPECT_TRUE(y.shape().IsSameSize(TensorShape({4, 0, 36}))); } { diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 72a13d4da7..b9667998d6 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -2691,14 +2691,14 @@ class MklLayoutRewritePass : public GraphOptimizationPass { // If Op has been specifically assigned to a non-CPU device, then No. if (!n->assigned_device_name().empty() && - !str_util::StrContains(n->assigned_device_name(),kCPUDeviceSubStr)) { + !str_util::StrContains(n->assigned_device_name(), kCPUDeviceSubStr)) { result = false; reason = "Op has been assigned a runtime device that is not CPU."; } // If user has specifically assigned this op to a non-CPU device, then No. if (!n->def().device().empty() && - !str_util::StrContains(n->def().device(),kCPUDeviceSubStr)) { + !str_util::StrContains(n->def().device(), kCPUDeviceSubStr)) { result = false; reason = "User has assigned a device that is not CPU."; } @@ -2865,9 +2865,9 @@ class MklLayoutRewritePass : public GraphOptimizationPass { return false; } - // If the depth_radius of LRN is not 2, then MKL DNN takes unoptimized - // path. The unoptimized path is slow. Thus we dont rewrite the node - // and use default Eigen. But for depth_radius=2, MKL DNN optimized + // If the depth_radius of LRN is not 2, then MKL DNN takes unoptimized + // path. The unoptimized path is slow. Thus we dont rewrite the node + // and use default Eigen. But for depth_radius=2, MKL DNN optimized // path is taken, i.e., eigen node is rewritten by MKl DNN node. static bool LrnRewrite(const Node* n) { CHECK_NOTNULL(n); @@ -2876,13 +2876,13 @@ class MklLayoutRewritePass : public GraphOptimizationPass { CHECK_EQ(GetNodeAttr(n->def(), "depth_radius", &depth_radius).ok(), true); // if the depth_radius of LRN is not 2, don't rewrite the node by MKL DNN - // and use eigen node instead + // and use eigen node instead if (depth_radius == 2) { return true; } VLOG(1) << "LrnRewrite: The model sets depth_radius as not 2 which" << "case is not optimized by Intel MKL, thus using Eigen op" - << "for LRN " ; + << "for LRN "; return false; } @@ -3015,6 +3015,35 @@ class MklLayoutRewritePass : public GraphOptimizationPass { std::vector* ws_tensors, bool* are_ws_tensors_added); + // Helper function used by FixMklMetaDataEdges. Fixes the metadata edge + // pointed by 'e_metadata' corresponding to the data edge 'e_data' in graph + // 'g'. Returns true is fixup was done; otherwise, it returns false. + bool FixMklMetaDataEdgeIfNeeded(std::unique_ptr* g, + const Edge* e_data, const Edge* e_metadata); + + // Are the input Mkl metadata edges for node 'n' in graph 'g' correctly + // connected? If not, then fix them. This is needed because a graph may have + // some input Mkl metadata edges incorrectly setup after node merge and + // rewrite passes. This could happen because GetReversePostOrder function may + // not provide topologically sorted order if a graph contains cycles. The + // function returns true if at least one Mkl metadata edge for node 'n' was + // fixed. Otherwise, it returns false. + // + // Example: + // + // X = MklConv2D(_, _, _) + // Y = MklConv2DWithBias(_, _, _, _, _, _) + // Z = MklAdd(X, Y, DummyMklTensor, Y:1) + // + // For a graph such as shown above, note that 3rd argument of MklAdd contains + // DummyMklTensor. Actually, it should be getting the Mkl metadata from + // MklConv2D op (specifically, X:2). This incorrect plumbing could be possible + // (although rare) if the Mkl NodeMerge + NodeRewrite passes visit Z before X + // (possible if X, Y, Z are part of a loop.) This function fixes the Mkl + // metadata edges only - it does not rewrite nodes nor does it modify the Mkl + // data edges (1st and 2nd arguments of MklAdd). + bool FixMklMetaDataEdges(std::unique_ptr* g, Node* n); + // Functions specific to operators to copy attributes // We need operator-specific function to copy attributes because the framework // does not provide any generic function for it. @@ -4241,6 +4270,92 @@ MklLayoutRewritePass::CheckForNodeRewrite(const Node* n) const { return nullptr; } +/////////////////////////////////////////////////////////////////////////////// +// Post-rewrite Mkl metadata fixup pass +/////////////////////////////////////////////////////////////////////////////// +bool MklLayoutRewritePass::FixMklMetaDataEdgeIfNeeded(std::unique_ptr* g, + const Edge* e_data, const Edge* e_metadata) { + if (g == nullptr || e_data == nullptr || e_metadata == nullptr) { + return false; + } + + Node* n_data = e_data->src(); + int n_data_op_slot = e_data->src_output(); + int n_metadata_op_slot = GetTensorMetaDataIndex(n_data_op_slot, + n_data->num_outputs()); + + // If the source of meta edge is a constant node (producing dummy Mkl metadata + // tensor), then we will need to fix. + if (IsConstant(e_metadata->src())) { + Node* e_metadata_dst = e_metadata->dst(); + int e_metadata_in_slot = e_metadata->dst_input(); + CHECK_NOTNULL((*g)->AddEdge(n_data, n_metadata_op_slot, + e_metadata_dst, e_metadata_in_slot)); + + (*g)->RemoveEdge(e_metadata); + return true; + } + + return false; +} + +bool MklLayoutRewritePass::FixMklMetaDataEdges(std::unique_ptr* g, + Node* n) { + bool result = false; + + // If graph node is not Mkl node, then return. + DataType T = DT_INVALID; + if (!GetNodeAttr(n->def(), "T", &T).ok() || + !mkl_op_registry::IsMklOp(n->type_string(), T)) { + return result; + } + + // If it is Mkl node, then check if the input edges to this node that carry + // Mkl metadata are linked up correctly with the source node. + + // For Mkl nodes, we generate twice the number of input tensors (n for Mkl + // data tensors + n for Mkl metadata tensors). We need to check for correct + // connection of n metadata tensors only. + int num_data_inputs = n->num_inputs() / 2; + for (int idx = 0; idx < num_data_inputs; idx++) { + // Get the edge connecting input slot with index (idx). + const Edge* e = nullptr; + TF_CHECK_OK(n->input_edge(idx, &e)); + + // If e is control edge, then skip. + if (e->IsControlEdge()) { + continue; + } + + // Check that the source node for edge 'e' is Mkl node. If it is not an Mkl + // node, then we don't need to do anything. + Node* e_src = e->src(); + if (GetNodeAttr(e_src->def(), "T", &T).ok() && + mkl_op_registry::IsMklOp(e_src->type_string(), T)) { + // Source node for edge 'e' is Mkl node. + // Destination node and destination input slot of e is node 'n' and 'idx' + // resp. + CHECK_EQ(e->dst(), n); + CHECK_EQ(e->dst_input(), idx); + + // Let's get edge that carries Mkl metadata corresponding to Mkl data edge + // 'e'. For that, let's first get the input slot of 'n' where the meta + // edge will feed the value. + int e_meta_in_slot = GetTensorMetaDataIndex(e->dst_input(), + n->num_inputs()); + const Edge* e_meta = nullptr; + TF_CHECK_OK(n->input_edge(e_meta_in_slot, &e_meta)); + + // Let's check if we need to fix this meta edge. + if (FixMklMetaDataEdgeIfNeeded(g, e, e_meta)) { + result = true; + } + } + } + + return result; +} + /////////////////////////////////////////////////////////////////////////////// // Run function for the pass /////////////////////////////////////////////////////////////////////////////// @@ -4307,6 +4422,25 @@ bool MklLayoutRewritePass::RunPass(std::unique_ptr* g) { DumpGraph("After running MklLayoutRewritePass(NodeMerge+Rewrite)", &**g); + order.clear(); + GetReversePostOrder(**g, &order); // This will give us topological sort. + for (Node* n : order) { + // If node is not an op or it cannot run on CPU device, then skip. + if (!n->IsOp() || !CanOpRunOnCPUDevice(n)) { + continue; + } + if (FixMklMetaDataEdges(g, n)) { + string node_name = n->name(); + string op_name = n->type_string(); + + VLOG(1) << "MklLayoutRewritePass: fixed metadata edges for node " + << node_name << " with op " << op_name; + result = true; + } + } + DumpGraph("After running MklLayoutRewritePass(NodeMerge+Rewrite+Fixup)", + &**g); + return result; } diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc index 029cdcf94a..7645b4a7f0 100644 --- a/tensorflow/core/graph/mkl_layout_pass_test.cc +++ b/tensorflow/core/graph/mkl_layout_pass_test.cc @@ -3518,6 +3518,37 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_DeviceTest) { "B->C:1;C->E;D->E:1;E->Z;M->C:2;N->C:3;Y->Z:1"); } +///////////////////////////////////////////////////////////////////// +// Post-rewrite fixup pass test + +TEST_F(MklLayoutPassTest, PostRewriteFixUpPass) { + InitGraph( + "node { name: 'A' op: 'Input'}" + "node { name: 'B' op: 'Input'}" + "node { name: 'M' op: '_MklInput'}" + "node { name: 'N' op: '_MklInput'}" + "node { name: 'C' op: '_MklConv2D'" + " attr { key: 'T' value { type: DT_FLOAT } }" + " attr { key: 'data_format' value { s: 'NCHW' } }" + " attr { key: 'use_cudnn_on_gpu' value { b: false } }" + " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" + " attr { key: 'padding' value { s: 'SAME' } }" + " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" + " input: ['A', 'B', 'M', 'N']}" + "node { name: 'D' op: 'Const' " + " attr { key: 'dtype' value { type: DT_UINT8 } }" + " attr { key: 'value' value { " + " tensor { dtype: DT_UINT8 tensor_shape { dim { size: 1 } } " + " int_val: 0 } } } }" + "node { name: 'E' op: '_MklAdd'" + " attr {key: 'T' value { type: DT_FLOAT } }" + " input: ['C', 'A', 'D', 'D']}"); + EXPECT_EQ(DoMklLayoutOptimizationPass(), + "A(Input);B(Input);C(_MklConv2D);D(Const);E(_MklAdd);" + "M(_MklInput);N(_MklInput)|A->C;A->E:1;B->C:1;C->E;C:2->E:2;" + "D->E:3;M->C:2;N->C:3"); +} + ///////////////////////////////////////////////////////////////////// static void BM_MklLayoutRewritePass(int iters, int op_nodes) { diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc index 6749a7c571..0c02876ac5 100644 --- a/tensorflow/core/grappler/costs/graph_properties.cc +++ b/tensorflow/core/grappler/costs/graph_properties.cc @@ -610,7 +610,6 @@ class SymbolicShapeRefiner { } }; - // Compute the shape of the tensors outputed by node 'node' at output port // 'port_index' as the union of shape1 and shape2. ShapeHandle OutputAsUnion(const NodeDef* node, int port_index, ShapeHandle shape1, ShapeHandle shape2) { diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 1b18087cdf..8ca726df0b 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -679,6 +679,7 @@ cc_library( deps = [ ":constant_folding", ":graph_optimizer", + "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core/grappler:graph_view", "//tensorflow/core/grappler:grappler_item", @@ -780,7 +781,6 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", - "//tensorflow/core:scoped_allocator_ops_op_lib", "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:op_types", "//tensorflow/core/grappler:utils", diff --git a/tensorflow/core/grappler/optimizers/remapper.cc b/tensorflow/core/grappler/optimizers/remapper.cc index 4dde7ed1b4..03e36a7b9c 100644 --- a/tensorflow/core/grappler/optimizers/remapper.cc +++ b/tensorflow/core/grappler/optimizers/remapper.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/grappler/op_types.h" #include "tensorflow/core/grappler/optimizers/constant_folding.h" #include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/platform/logging.h" namespace tensorflow { namespace grappler { @@ -200,8 +201,7 @@ Status Remapper::Optimize(Cluster* /*cluster*/, const GrapplerItem& item, } } if (optimizable) { - VLOG(2) << "Optimizing fused batch norm node " << node.DebugString() - << std::endl; + VLOG(1) << "Optimizing fused batch norm node " << node.DebugString(); AddBatchNormNodes(optimized_graph, node); continue; } diff --git a/tensorflow/core/kernels/as_string_op.cc b/tensorflow/core/kernels/as_string_op.cc index 66c4aff3e3..a7757d1361 100644 --- a/tensorflow/core/kernels/as_string_op.cc +++ b/tensorflow/core/kernels/as_string_op.cc @@ -73,6 +73,7 @@ class AsStringOp : public OpKernel { } switch (dtype) { case DT_INT8: + case DT_INT16: case DT_INT32: strings::Appendf(&format_, "d"); break; @@ -129,6 +130,7 @@ class AsStringOp : public OpKernel { ENCODE_TYPE(DT_FLOAT, float, format_); ENCODE_TYPE(DT_DOUBLE, double, format_); ENCODE_TYPE(DT_INT8, int8, format_); + ENCODE_TYPE(DT_INT16, int16, format_); case (DT_BOOL): { const auto& input_flat = input_tensor->flat(); for (int i = 0; i < input_flat.size(); ++i) { diff --git a/tensorflow/core/kernels/cwise_op_clip.cc b/tensorflow/core/kernels/cwise_op_clip.cc index 14d889e8e3..49b90e855b 100644 --- a/tensorflow/core/kernels/cwise_op_clip.cc +++ b/tensorflow/core/kernels/cwise_op_clip.cc @@ -33,52 +33,41 @@ class ClipOp : public OpKernel { const Tensor& in0 = ctx->input(0); const Tensor& in1 = ctx->input(1); const Tensor& in2 = ctx->input(2); + OP_REQUIRES(ctx, (in0.shape() == in1.shape() || + TensorShapeUtils::IsScalar(in1.shape())) && + (in0.shape() == in2.shape() || + TensorShapeUtils::IsScalar(in2.shape())), + errors::InvalidArgument( + "clip_value_min and clip_value_max must be either of " + "the same shape as input, or a scalar. ", + "input shape: ", in0.shape().DebugString(), + "clip_value_min shape: ", in1.shape().DebugString(), + "clip_value_max shape: ", in2.shape().DebugString())); + + Tensor* out = nullptr; + OP_REQUIRES_OK( + ctx, ctx->forward_input_or_allocate_output({0}, 0, in0.shape(), &out)); + if (out->NumElements() == 0) return; // Nothing to do for empty output auto in0_flat = in0.flat(); auto in1_flat = in1.flat(); auto in2_flat = in2.flat(); + auto out_flat = out->flat(); const Device& d = ctx->eigen_device(); - Tensor* out = nullptr; - OP_REQUIRES_OK( - ctx, ctx->forward_input_or_allocate_output({0}, 0, in0.shape(), &out)); - auto out_flat = out->flat(); if (in1.shape() == in2.shape()) { if (in0.shape() == in1.shape()) { functor::TernaryClipOp()(d, in0_flat, in1_flat, in2_flat, out_flat); } else { - OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(in1.shape()), - errors::InvalidArgument( - "clip_value_min and clip_value_max must be either of " - "the same shape as input, or a scalar. ", - "input shape: ", in0.shape().DebugString(), - "clip_value_min shape: ", in1.shape().DebugString(), - "clip_value_max shape: ", in2.shape().DebugString())); functor::UnaryClipOp()(d, in0_flat, in1_flat, in2_flat, out_flat); } } else { if (in0.shape() == in1.shape()) { - OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(in2.shape()), - errors::InvalidArgument( - "clip_value_min and clip_value_max must be either of " - "the same shape as input, or a scalar. ", - "input shape: ", in0.shape().DebugString(), - "clip_value_min shape: ", in1.shape().DebugString(), - "clip_value_max shape: ", in2.shape().DebugString())); functor::BinaryLeftClipOp()(d, in0_flat, in1_flat, in2_flat, out_flat); } else { - OP_REQUIRES(ctx, - (in0.shape() == in2.shape() && - TensorShapeUtils::IsScalar(in1.shape())), - errors::InvalidArgument( - "clip_value_min and clip_value_max must be either of " - "the same shape as input, or a scalar. ", - "input shape: ", in0.shape().DebugString(), - "clip_value_min shape: ", in1.shape().DebugString(), - "clip_value_max shape: ", in2.shape().DebugString())); functor::BinaryRightClipOp()(d, in0_flat, in1_flat, in2_flat, out_flat); } diff --git a/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc b/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc index 9a3b2303a3..17a85d9773 100644 --- a/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc +++ b/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc @@ -57,6 +57,7 @@ struct DenseUpdate { template struct functor::DenseUpdate; \ template struct functor::DenseUpdate; TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS); +TF_CALL_int32(DEFINE_GPU_KERNELS); TF_CALL_int64(DEFINE_GPU_KERNELS); #undef DEFINE_GPU_KERNELS diff --git a/tensorflow/core/kernels/gather_functor.cc b/tensorflow/core/kernels/gather_functor.cc index e6fefe643b..5cd8e04927 100644 --- a/tensorflow/core/kernels/gather_functor.cc +++ b/tensorflow/core/kernels/gather_functor.cc @@ -37,6 +37,7 @@ namespace functor { DECLARE_GPU_SPECS_INDEX(T, int32); \ DECLARE_GPU_SPECS_INDEX(T, int64) +TF_CALL_int64(DECLARE_GPU_SPECS); TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS); TF_CALL_complex64(DECLARE_GPU_SPECS); TF_CALL_complex128(DECLARE_GPU_SPECS); diff --git a/tensorflow/core/kernels/gather_functor_gpu.cu.cc b/tensorflow/core/kernels/gather_functor_gpu.cu.cc index 39b6924d74..4563fc6353 100644 --- a/tensorflow/core/kernels/gather_functor_gpu.cu.cc +++ b/tensorflow/core/kernels/gather_functor_gpu.cu.cc @@ -31,6 +31,7 @@ typedef Eigen::GpuDevice GPUDevice; DEFINE_GPU_SPECS_INDEX(T, int32); \ DEFINE_GPU_SPECS_INDEX(T, int64); +TF_CALL_int64(DEFINE_GPU_SPECS); TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_SPECS); TF_CALL_complex64(DEFINE_GPU_SPECS); TF_CALL_complex128(DEFINE_GPU_SPECS); diff --git a/tensorflow/core/kernels/gather_nd_op.cc b/tensorflow/core/kernels/gather_nd_op.cc index 7e5a9e1ec5..4e53291b7f 100644 --- a/tensorflow/core/kernels/gather_nd_op.cc +++ b/tensorflow/core/kernels/gather_nd_op.cc @@ -228,6 +228,8 @@ namespace functor { DECLARE_GPU_SPECS_INDEX(T, int32); \ DECLARE_GPU_SPECS_INDEX(T, int64) +TF_CALL_int32(DECLARE_GPU_SPECS); +TF_CALL_int64(DECLARE_GPU_SPECS); TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS); TF_CALL_complex64(DECLARE_GPU_SPECS); TF_CALL_complex128(DECLARE_GPU_SPECS); @@ -239,6 +241,8 @@ TF_CALL_complex128(DECLARE_GPU_SPECS); // Registration of the GPU implementations. #define REGISTER_GATHER_ND_GPU(type) REGISTER_GATHER_ND_ALL_INDICES(GPU, type) +TF_CALL_int32(REGISTER_GATHER_ND_GPU); +TF_CALL_int64(REGISTER_GATHER_ND_GPU); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GATHER_ND_GPU); TF_CALL_complex64(REGISTER_GATHER_ND_GPU); TF_CALL_complex128(REGISTER_GATHER_ND_GPU); diff --git a/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc b/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc index b03efc684f..da8d2e9e3c 100644 --- a/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc +++ b/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc @@ -119,6 +119,8 @@ struct GatherNdSlice { DEFINE_GPU_SPECS_INDEX(T, int32); \ DEFINE_GPU_SPECS_INDEX(T, int64); +TF_CALL_int32(DEFINE_GPU_SPECS); +TF_CALL_int64(DEFINE_GPU_SPECS); TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_SPECS); TF_CALL_complex64(DEFINE_GPU_SPECS); TF_CALL_complex128(DEFINE_GPU_SPECS); diff --git a/tensorflow/core/kernels/gather_op.cc b/tensorflow/core/kernels/gather_op.cc index ef332ebee3..094504d6b9 100644 --- a/tensorflow/core/kernels/gather_op.cc +++ b/tensorflow/core/kernels/gather_op.cc @@ -153,6 +153,7 @@ TF_CALL_uint64(REGISTER_GATHER_CPU); // Registration of the GPU implementations. #define REGISTER_GATHER_GPU(type) REGISTER_GATHER_ALL_INDICES(GPU, type) +TF_CALL_int64(REGISTER_GATHER_GPU); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GATHER_GPU); TF_CALL_complex64(REGISTER_GATHER_GPU); TF_CALL_complex128(REGISTER_GATHER_GPU); diff --git a/tensorflow/core/kernels/mkl_concat_op.cc b/tensorflow/core/kernels/mkl_concat_op.cc index 5eeb23d810..31d1b949ef 100644 --- a/tensorflow/core/kernels/mkl_concat_op.cc +++ b/tensorflow/core/kernels/mkl_concat_op.cc @@ -14,6 +14,7 @@ limitations under the License. #include #include +#include #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/op_kernel.h" @@ -590,8 +591,8 @@ class MklConcatOp : public OpKernel { const int N = input_tensors.size(); // Get Tensor shapes. - std::vector input_shapes(N); - GetMklShapeList(context, "values", &input_shapes); + std::vector mkl_input_shapes(N); + GetMklShapeList(context, "values", &mkl_input_shapes); const Tensor& concat_dim_tensor = (AxisArgName == NAME_IS_CONCAT_DIM) ? MklGetInput(context, 0) @@ -610,19 +611,14 @@ class MklConcatOp : public OpKernel { int i = 0; bool invoke_eigen = false; bool are_all_mkl_inputs = true, are_all_tf_inputs = true; - const TensorShape expected_shape = input_shapes[0].IsMklTensor() - ? input_shapes[0].GetTfShape() - : input_tensors[0].shape(); + const TensorShape expected_shape = mkl_input_shapes[0].IsMklTensor() + ? mkl_input_shapes[0].GetTfShape() + : input_tensors[0].shape(); size_t expected_dims = expected_shape.dims(); if (concat_dim < 0) concat_dim = expected_dims + concat_dim; - for (auto& s : input_shapes) { - if (s == expected_shape) { - ++i; - continue; - } - + for (auto& s : mkl_input_shapes) { TensorShape s_shape = s.IsMklTensor() ? s.GetTfShape() : input_tensors[i].shape(); size_t s_dims = s_shape.dims(); @@ -665,21 +661,14 @@ class MklConcatOp : public OpKernel { // Call Eigen library if (invoke_eigen) { - TensorShapeList tf_input_shapes; - i = 0; - for (auto& s : input_shapes) { - TensorShape s_shape = - s.IsMklTensor() ? s.GetTfShape() : input_tensors[i].shape(); - tf_input_shapes.push_back(s_shape); - ++i; - } - CallEigenVersion(context, input_tensors, tf_input_shapes); + CallEigenVersion(context, input_tensors, mkl_input_shapes); return; } memory::dims dst_dims; + if (are_all_mkl_inputs) - dst_dims = TFShapeToMklDnnDims(input_shapes[0].GetTfShape()); + dst_dims = TFShapeToMklDnnDims(mkl_input_shapes[0].GetTfShape()); else // When all the inputs are in Tensorflow format, we don't know // what is the input data format. In that case, we just use @@ -689,26 +678,61 @@ class MklConcatOp : public OpKernel { std::vector srcs_pd; std::vector> srcs(N, MklDnnData(&cpu_engine)); int64 dst_concat_dim_size = 0; - for (int k = 0; k < N; k++) { - bool is_mkl_tensor = input_shapes[k].IsMklTensor(); - memory::dims src_dims; - - // Same comment as dst_dims for src_dims. - src_dims = (is_mkl_tensor) - ? TFShapeToMklDnnDims(input_shapes[k].GetTfShape()) - : TFShapeToMklDnnDims(input_tensors[k].shape()); - - dst_concat_dim_size += src_dims[concat_dim]; - auto src_md = - is_mkl_tensor ? input_shapes[k].GetMklLayout() : - // It does not matter what data format we use here - // (NHWC or NCHW). We just need to ensure that output - // of Concat uses same data format as input. - memory::desc(src_dims, MklDnnType(), memory::format::nchw); - - srcs[k].SetUsrMem(src_md, &input_tensors[k]); - auto src_mpd = srcs[k].GetUsrMemPrimDesc(); - srcs_pd.push_back(src_mpd); + + bool isMklReorderNeeded = false; + memory::format mkl_common_format = memory::format::any; + if (are_all_mkl_inputs) { + mkl_common_format = + FindMklCommonFormat(mkl_input_shapes, concat_dim, + &isMklReorderNeeded, &dst_concat_dim_size); + + if (!isMklReorderNeeded) { + // All MKL tensors have a same format. Reorder is not needed. + for (int k = 0; k < N; k++) { + if (input_tensors[k].NumElements() == 0) + continue; + + auto src_md = mkl_input_shapes[k].GetMklLayout(); + srcs[k].SetUsrMem(src_md, &input_tensors[k]); + auto src_mpd = srcs[k].GetUsrMemPrimDesc(); + srcs_pd.push_back(src_mpd); + } + } else { + // MKL tensors have different formats. + // Reorder them to most common format. + for (int k = 0; k < N; k++) { + if (input_tensors[k].NumElements() == 0) + continue; + + auto src_dims = TFShapeToMklDnnDims( + mkl_input_shapes[k].GetTfShape()); + auto src_md = mkl_input_shapes[k].GetMklLayout(); + srcs[k].SetUsrMem(src_md, &input_tensors[k]); + + if (src_md.data.format != mkl_common_format) + src_md = memory::desc(src_dims, MklDnnType(), + mkl_common_format); + + srcs_pd.push_back(memory::primitive_desc(src_md, cpu_engine)); + } + } + } else { // All TF inputs + for (int k = 0; k < N; k++) { + if (input_tensors[k].NumElements() == 0) + continue; + + memory::dims src_dims = TFShapeToMklDnnDims(input_tensors[k].shape()); + dst_concat_dim_size += src_dims[concat_dim]; + + // It does not matter what data format to be used (NHWC versus NCHW). + // We just need to ensure that output uses same data format as inputs. + auto src_md = + memory::desc(src_dims, MklDnnType(), memory::format::nchw); + + srcs[k].SetUsrMem(src_md, &input_tensors[k]); + auto src_mpd = srcs[k].GetUsrMemPrimDesc(); + srcs_pd.push_back(src_mpd); + } } dst_dims[concat_dim] = dst_concat_dim_size; @@ -718,25 +742,33 @@ class MklConcatOp : public OpKernel { if (are_all_mkl_inputs) { // Since we are passing a specific format for destination, // we need to have dst_dims in MklDnn order (NCHW). - auto orig_tf_format = input_shapes[0].GetTfDataFormat(); + auto orig_tf_format = mkl_input_shapes[0].GetTfDataFormat(); dst_dims_in_nchw = MklDnnDimsInNCHW( dst_dims, MklDnnDataFormatToTFDataFormat(orig_tf_format)); - // We will set the output in the same format as input to avoid layout - // conversions. - // Currently we are setting dst format same as input format. - // See if we can make this choice in a better way. + // Set the output format same as the most common format of inputs + // to avoid layout conversions. dst_md = memory::desc( - dst_dims_in_nchw, MklDnnType(), - (memory::format)input_shapes[0].GetMklLayout().data.format); + dst_dims_in_nchw, MklDnnType(), mkl_common_format); } else { - // Again, format does not matter here. We just need to make it same as - // input format. + // All inputs are TF tensors. + // Set the output format same as input format (nchw). dst_md = memory::desc(dst_dims, MklDnnType(), memory::format::nchw); } std::vector inputs; - for (int k = 0; k < input_tensors.size(); k++) - inputs.push_back(srcs[k].GetOpMem()); + std::vector net; + if (isMklReorderNeeded) { + for (int k = 0; k < input_tensors.size(); k++) { + if (input_tensors[k].NumElements() > 0) { + srcs[k].CheckReorderToOpMem(srcs_pd[k], &net); + } + } + } + for (int k = 0; k < input_tensors.size(); k++) { + if (input_tensors[k].NumElements() > 0) { + inputs.push_back(srcs[k].GetOpMem()); + } + } // If all inputs are in MKL format, then meaning of concat_dim needs to // change. Value of concat_dim is tied to input Tensorflow data format @@ -745,7 +777,8 @@ class MklConcatOp : public OpKernel { // But ifinput tensors are in NHWC order, then semantics need to change. // E.g., if we are concatinating over Channel (dimension 3 for NHWC), // then since MklDnn order is NCHW, concat_dim needs to be 1. - if (are_all_mkl_inputs) concat_dim = input_shapes[0].TfDimIdx(concat_dim); + if (are_all_mkl_inputs) + concat_dim = mkl_input_shapes[0].TfDimIdx(concat_dim); auto concat_pd = concat::primitive_desc(dst_md, concat_dim, srcs_pd); @@ -758,7 +791,7 @@ class MklConcatOp : public OpKernel { dnn_shape_dst.SetMklLayout(&dst_pd); dnn_shape_dst.SetElemType(MklDnnType()); dnn_shape_dst.SetTfLayout(dst_dims.size(), dst_dims_in_nchw, - input_shapes[0].GetTfDataFormat()); + mkl_input_shapes[0].GetTfDataFormat()); tf_shape_dst.AddDim((dst_pd.get_size() / sizeof(T))); } else { dnn_shape_dst.SetMklTensor(false); @@ -773,7 +806,6 @@ class MklConcatOp : public OpKernel { dst.SetUsrMem(dst_md, dst_tensor); auto concat_op = concat(concat_pd, inputs, dst.GetOpMem()); - std::vector net; net.push_back(concat_op); stream(stream::kind::eager).submit(net).wait(); } catch (mkldnn::error& e) { @@ -787,15 +819,27 @@ class MklConcatOp : public OpKernel { } void CallEigenVersion(OpKernelContext* context, const OpInputList& values, - const TensorShapeList& input_shapes) { - CHECK_EQ(values.size(), input_shapes.size()); + const MklDnnShapeList& mkl_input_shapes) { + CHECK_EQ(values.size(), mkl_input_shapes.size()); std::vector converted_values; - for (int i = 0; i < input_shapes.size(); i++) - converted_values.push_back(values[i]); + TensorShapeList tf_input_shapes; + for (int i = 0; i < mkl_input_shapes.size(); i++) { + if (mkl_input_shapes[i].IsMklTensor()) { + // do conversion from MKL to TF + Tensor tmp_tensor = + ConvertMklToTF(context, values[i], mkl_input_shapes[i]); + converted_values.push_back(tmp_tensor); + tf_input_shapes.push_back(mkl_input_shapes[i].GetTfShape()); + } else { + // no conversion since it is TF tensor already + converted_values.push_back(values[i]); + tf_input_shapes.push_back(values[i].shape()); + } + } // Call Eigen concat. - eigen_concat_op_.Compute(context, converted_values, input_shapes); + eigen_concat_op_.Compute(context, converted_values, tf_input_shapes); // Set output Mkl tensor for this op. MklDnnShape dnn_shape_output; @@ -812,6 +856,55 @@ class MklConcatOp : public OpKernel { output_tensor->flat().data(), output_tensor->flat().size() * sizeof(uint8)); } + + // This method finds the most commom format accross all MKL inputs + // Inputs: + // 1. input_shapes: shapes of input (MKL) tensors. + // 2. concat_dim: concat dimension. + // Outputs: + // 1. is_reorder_needed is set to true if inputs have difference formats + // It is set to false otherwise. + // 2. concat_dim_size is the size of concat_dim. + // Return: + // return the common MKL format. + memory::format FindMklCommonFormat(const MklDnnShapeList& input_shapes, + int concat_dim, bool* is_reorder_needed, int64* concat_dim_size) { + *is_reorder_needed = false; + *concat_dim_size = 0; + std::unordered_map occurrence_map; + if (input_shapes.size() == 0) + return memory::format::any; + + // Compute ocurrences of each format of all inputs. + for (int k=0; k ( + input_shapes[k].GetMklLayout().data.format); + occurrence_map[fmt] += 1; + } + + if (occurrence_map.size() == 1) { + // this means that all inputs have a same format + // return it with is_reorder_needed set false. + return static_cast( + input_shapes[0].GetMklLayout().data.format); + } + + // Input tensors have different formats. Thus, reorder is needed. + // We pick up the most common format to minimize the total + // number of input reorder. + memory::format commonest_format = memory::format::any; + int max_occurrence = 0; + *is_reorder_needed = true; + for (auto item : occurrence_map) { + if (item.second > max_occurrence) { + commonest_format = static_cast(item.first); + max_occurrence = item.second; + } + } + return commonest_format; + } }; #endif diff --git a/tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc index c1da0ded1d..f857be6c32 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc @@ -18,6 +18,7 @@ limitations under the License. // bias. #ifdef INTEL_MKL +#ifdef INTEL_MKL_ML #define USE_EIGEN_TENSOR #define EIGEN_USE_THREADS @@ -264,4 +265,5 @@ class MklConv2DCustomBackpropBiasOp : public OpKernel { TF_CALL_float(REGISTER_CPU_KERNELS); #undef REGISTER_CPU_KERNELS } /* namespace tensorflow */ +#endif /* INTEL_MKL_ML */ #endif /* INTEL_MKL */ diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.h b/tensorflow/core/kernels/mkl_pooling_ops_common.h index 279167aba2..c0dfed7d7d 100644 --- a/tensorflow/core/kernels/mkl_pooling_ops_common.h +++ b/tensorflow/core/kernels/mkl_pooling_ops_common.h @@ -199,13 +199,15 @@ class MklPoolingForwardOpBase : public MklPoolingOpBase { CHECK_NOTNULL(pool_params); CHECK_NOTNULL(dnn_data_input); TensorShape input_tensor_shape = input_tensor.shape(); - memory::desc input_md = + if (input_tensor.NumElements() != 0) { + memory::desc input_md = input_mkl_shape.IsMklTensor() ? input_mkl_shape.GetMklLayout() : memory::desc(TFShapeToMklDnnDimsInNCHW(input_tensor_shape, this->data_format_tf_), MklDnnType(), this->data_format_mkldnn_); - dnn_data_input->SetUsrMem(input_md, &input_tensor); + dnn_data_input->SetUsrMem(input_md, &input_tensor); + } this->InitMklPoolParameters(context, pool_params, input_mkl_shape, input_tensor_shape); } diff --git a/tensorflow/core/kernels/scatter_nd_op.cc b/tensorflow/core/kernels/scatter_nd_op.cc index 43c5b29509..e1fc2ea128 100644 --- a/tensorflow/core/kernels/scatter_nd_op.cc +++ b/tensorflow/core/kernels/scatter_nd_op.cc @@ -292,6 +292,7 @@ TF_CALL_string(REGISTER_SCATTER_ND_CPU); REGISTER_SCATTER_ND_UPDATE_GPU(type); \ REGISTER_SCATTER_ND_GPU(type); +TF_CALL_int32(REGISTER_SCATTER_ND_ALL_GPU); // TODO(b/66916790): Support half types in ScatterNd. TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ND_ALL_GPU); TF_CALL_complex64(REGISTER_SCATTER_ND_ALL_GPU); @@ -306,6 +307,8 @@ TF_CALL_complex128(REGISTER_SCATTER_ND_ALL_GPU); #define REGISTER_SCATTER_ND_UPDATE_SYCL(type) \ REGISTER_SCATTER_ND_UPDATE(type, SYCL); +TF_CALL_int32(REGISTER_SCATTER_ND_ADD_SUB_SYCL); +TF_CALL_int32(REGISTER_SCATTER_ND_UPDATE_SYCL); TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ND_ADD_SUB_SYCL); TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ND_UPDATE_SYCL); #undef REGISTER_SCATTER_ND_ADD_SUB_SYCL @@ -576,6 +579,7 @@ namespace functor { DECLARE_GPU_SPECS_INDEX(T, int32); \ DECLARE_GPU_SPECS_INDEX(T, int64) +TF_CALL_int32(DECLARE_GPU_SPECS); // TODO(b/66916790): Support half types in ScatterNd. TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS); TF_CALL_complex64(DECLARE_GPU_SPECS); diff --git a/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc b/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc index a3c21edc15..08b657f4c3 100644 --- a/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc +++ b/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc @@ -170,6 +170,7 @@ struct ScatterNdFunctor { DECLARE_GPU_SPECS_INDEX(T, int32); \ DECLARE_GPU_SPECS_INDEX(T, int64) +TF_CALL_int32(DECLARE_GPU_SPECS); TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS); TF_CALL_complex64(DECLARE_GPU_SPECS); TF_CALL_complex128(DECLARE_GPU_SPECS); diff --git a/tensorflow/core/kernels/scoped_allocator_ops_test.cc b/tensorflow/core/kernels/scoped_allocator_ops_test.cc index bb0129fa6f..634f9ba887 100644 --- a/tensorflow/core/kernels/scoped_allocator_ops_test.cc +++ b/tensorflow/core/kernels/scoped_allocator_ops_test.cc @@ -216,8 +216,13 @@ TEST_F(ScopedAllocatorConcatOpTest, Success3) { } TEST_F(ScopedAllocatorConcatOpTest, Reshape) { - MakeOp({2, 2, 2}, DT_DOUBLE, true, "test", 120, 2); - ExecOp(DT_DOUBLE, 120, {{2, 2}, {2, 2}}); + MakeOp({2, 2, 4}, DT_DOUBLE, true, "test", 120, 2); + + // The elements of the third parameter to ExecOp must be multiples of + // Allocator::kAllocatorAlignment in size. If they are not, the backing + // tensor allocated by PrepOp will have too many elements and reshaping + // will fail. + ExecOp(DT_DOUBLE, 120, {{2, 4}, {2, 4}}); } TEST_F(ScopedAllocatorConcatOpTest, NoReshapeAttr) { diff --git a/tensorflow/core/kernels/segment_reduction_ops.h b/tensorflow/core/kernels/segment_reduction_ops.h index 7796bf3587..d65692a552 100644 --- a/tensorflow/core/kernels/segment_reduction_ops.h +++ b/tensorflow/core/kernels/segment_reduction_ops.h @@ -16,6 +16,14 @@ limitations under the License. #ifndef TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ #define TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ + +// This file requires the following include because it uses CudaAtomicMax: +// #include "tensorflow/core/util/cuda_kernel_helper.h" + +// Unfortunately we can't add the #include, since it breaks compilation for +// non-GPU targets. This only breaks in clang, because it's more strict for +// template code and CudaAtomicMax is used in template context. + // This file requires the following include because it uses CudaAtomicMax: // #include "tensorflow/core/util/cuda_kernel_helper.h" @@ -130,4 +138,4 @@ struct Highest { } // namespace functor } // namespace tensorflow -#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ +#endif // TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ diff --git a/tensorflow/core/kernels/sparse_matmul_op.cc b/tensorflow/core/kernels/sparse_matmul_op.cc index a1f9667b78..866c5dcd52 100644 --- a/tensorflow/core/kernels/sparse_matmul_op.cc +++ b/tensorflow/core/kernels/sparse_matmul_op.cc @@ -1490,7 +1490,7 @@ inline void LibxsmmSparseMatMul::Compute( #endif // TENSORFLOW_USE_LIBXSMM -// Here is a an overview of the SparseMatMul code. Note that we assume that the +// Here is an overview of the SparseMatMul code. Note that we assume that the // left matrix is sparse. // // The matrix "left" is divided into a grid with blocksize of (M, KL). Each diff --git a/tensorflow/core/kernels/string_split_op.cc b/tensorflow/core/kernels/string_split_op.cc index 4c2b312c34..26ab72f12e 100644 --- a/tensorflow/core/kernels/string_split_op.cc +++ b/tensorflow/core/kernels/string_split_op.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/strings/str_util.h" namespace tensorflow { @@ -43,6 +44,63 @@ std::vector Split(const string& str, const string& delimiter, return char_vector; } +std::vector SplitV2(const string& str, StringPiece sep, int maxsplit) { + // This SplitV2 method matches the behavior of python's str.split: + // If sep is given, consecutive delimiters are not grouped together + // and are deemed to delimit empty strings (for example, '1,,2'.split(',') + // returns ['1', '', '2']). The sep argument may consist of multiple + // characters (for example, '1<>2<>3'.split('<>') returns ['1', '2', '3']). + // Splitting an empty string with a specified separator returns ['']. + // + // If sep is not specified or is None, a different splitting algorithm is + // applied: runs of consecutive whitespace are regarded as a single + // separator, and the result will contain no empty strings at the start or + // end if the string has leading or trailing whitespace. Consequently, + // splitting an empty string or a string consisting of just whitespace + // with a None separator returns []. + + std::vector result; + + StringPiece text(str); + if (maxsplit == 0) { + result.emplace_back(std::string(text)); + return result; + } + + if (sep.empty()) { + StringPiece token; + // Remove leading whitespaces. + str_util::RemoveLeadingWhitespace(&text); + int split = 0; + while (str_util::ConsumeNonWhitespace(&text, &token)) { + result.emplace_back(std::string(token)); + str_util::RemoveLeadingWhitespace(&text); + ++split; + if (maxsplit > 0 && split == maxsplit) { + result.emplace_back(std::string(text)); + return result; + } + } + return result; + } + auto p = std::search(text.begin(), text.end(), sep.begin(), sep.end()); + int split = 0; + while (p != text.end()) { + StringPiece token = text.substr(0, p - text.begin()); + result.emplace_back(std::string(token)); + text.remove_prefix(token.size()); + text.remove_prefix(sep.size()); + ++split; + if (maxsplit > 0 && split == maxsplit) { + result.emplace_back(std::string(text)); + return result; + } + p = std::search(text.begin(), text.end(), sep.begin(), sep.end()); + } + result.emplace_back(std::string(text)); + return result; +} + } // namespace class StringSplitOp : public OpKernel { @@ -122,6 +180,78 @@ class StringSplitOp : public OpKernel { bool skip_empty_; }; +class StringSplitV2Op : public OpKernel { + public: + explicit StringSplitV2Op(OpKernelConstruction* context) + : OpKernel(context), maxsplit_(-1) { + OP_REQUIRES_OK(context, context->GetAttr("maxsplit", &maxsplit_)); + } + + void Compute(OpKernelContext* ctx) override { + const Tensor* input_tensor; + OP_REQUIRES_OK(ctx, ctx->input("input", &input_tensor)); + OP_REQUIRES(ctx, TensorShapeUtils::IsVector(input_tensor->shape()), + errors::InvalidArgument("input must be a vector, got shape: ", + input_tensor->shape().DebugString())); + + const auto input_vec = input_tensor->vec(); + const int64 batch_size = input_vec.dimension(0); + + const Tensor* sep_tensor; + OP_REQUIRES_OK(ctx, ctx->input("sep", &sep_tensor)); + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(sep_tensor->shape()), + errors::InvalidArgument("sep must be a scalar, got shape: ", + sep_tensor->shape().DebugString())); + const auto sep_vec = sep_tensor->flat(); + StringPiece sep(sep_vec(0)); + std::vector tokens; + // Guess that we'll be unpacking a handful of tokens per example. + static constexpr int kReserveSize = 4; + tokens.reserve(batch_size * kReserveSize); + + int64 output_size = 0; + int64 max_num_entries = 0; + std::vector num_indices(batch_size); + for (int64 i = 0; i < batch_size; ++i) { + std::vector parts = SplitV2(input_vec(i), sep, maxsplit_); + int64 n_entries = parts.size(); + num_indices[i] = n_entries; + output_size += n_entries; + max_num_entries = std::max(max_num_entries, n_entries); + tokens.insert(tokens.end(), parts.begin(), parts.end()); + } + + Tensor* sp_indices_t; + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({output_size, 2}), + &sp_indices_t)); + Tensor* sp_tokens_t; + OP_REQUIRES_OK( + ctx, ctx->allocate_output(1, TensorShape({output_size}), &sp_tokens_t)); + Tensor* sp_shape_t; + OP_REQUIRES_OK(ctx, ctx->allocate_output(2, TensorShape({2}), &sp_shape_t)); + + auto sp_indices = sp_indices_t->matrix(); + auto sp_tokens = sp_tokens_t->vec(); + auto sp_shape = sp_shape_t->vec(); + sp_shape(0) = batch_size; + sp_shape(1) = max_num_entries; + size_t c = 0; + for (size_t i = 0; i < batch_size; ++i) { + for (size_t j = 0; j < num_indices[i]; ++j) { + sp_indices(c, 0) = i; + sp_indices(c, 1) = j; + sp_tokens(c) = tokens[c]; + ++c; + } + } + } + + private: + int maxsplit_; +}; + REGISTER_KERNEL_BUILDER(Name("StringSplit").Device(DEVICE_CPU), StringSplitOp); +REGISTER_KERNEL_BUILDER(Name("StringSplitV2").Device(DEVICE_CPU), + StringSplitV2Op); } // namespace tensorflow diff --git a/tensorflow/core/ops/candidate_sampling_ops.cc b/tensorflow/core/ops/candidate_sampling_ops.cc index 6e4d100b04..6e589c8d1c 100644 --- a/tensorflow/core/ops/candidate_sampling_ops.cc +++ b/tensorflow/core/ops/candidate_sampling_ops.cc @@ -145,12 +145,15 @@ REGISTER_OP("ComputeAccidentalHits") int64 num_true; TF_RETURN_IF_ERROR(c->GetAttr("num_true", &num_true)); - // Validate true_classes. + // Validate true_classes, must be a matrix. ShapeHandle true_classes; TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &true_classes)); DimensionHandle unused; TF_RETURN_IF_ERROR( c->WithValue(c->Dim(true_classes, 1), num_true, &unused)); + // Validate sampled_candidates, must be a vector. + ShapeHandle sampled_candidates; + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &sampled_candidates)); // All three outputs are the same shape. ShapeHandle v = c->Vector(InferenceContext::kUnknownDim); diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc index 15e0ca8af9..9dca5f53ce 100644 --- a/tensorflow/core/ops/dataset_ops.cc +++ b/tensorflow/core/ops/dataset_ops.cc @@ -218,7 +218,17 @@ REGISTER_OP("MapAndBatchDataset") .Attr("Targuments: list(type) >= 0") .Attr("output_types: list(type) >= 1") .Attr("output_shapes: list(shape) >= 1") - .SetShapeFn(shape_inference::ScalarShape); + .SetShapeFn([](shape_inference::InferenceContext* c) { + // Use index from the end to retrieve the Input shapes, + // so that to avoid guessing the length of "other_arguments". + // batch_size, num_parallel_batches, and drop_remainder are 0-D scalars. + shape_inference::ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(c->num_inputs() - 3), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(c->num_inputs() - 2), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(c->num_inputs() - 1), 0, &unused)); + + return shape_inference::ScalarShape(c); + }); REGISTER_OP("MapAndBatchDatasetV2") .Input("input_dataset: variant") @@ -231,7 +241,17 @@ REGISTER_OP("MapAndBatchDatasetV2") .Attr("Targuments: list(type) >= 0") .Attr("output_types: list(type) >= 1") .Attr("output_shapes: list(shape) >= 1") - .SetShapeFn(shape_inference::ScalarShape); + .SetShapeFn([](shape_inference::InferenceContext* c) { + // Use index from the end to retrieve the Input shapes, + // so that to avoid guessing the length of "other_arguments". + // batch_size, num_parallel_calls, and drop_remainder are 0-D scalars. + shape_inference::ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(c->num_inputs() - 3), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(c->num_inputs() - 2), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(c->num_inputs() - 1), 0, &unused)); + + return shape_inference::ScalarShape(c); + }); REGISTER_OP("PrefetchDataset") .Input("input_dataset: variant") diff --git a/tensorflow/core/ops/image_ops.cc b/tensorflow/core/ops/image_ops.cc index d949e70c66..87f4991134 100644 --- a/tensorflow/core/ops/image_ops.cc +++ b/tensorflow/core/ops/image_ops.cc @@ -454,7 +454,9 @@ REGISTER_OP("DrawBoundingBoxes") DimensionHandle unused; TF_RETURN_IF_ERROR(c->WithValue(c->Dim(boxes, 2), 4, &unused)); - return shape_inference::UnchangedShapeWithRankAtLeast(c, 3); + // The rank of the input image (rank = 4) has already been restricted + // above, and the output is of the same shape as the input. + return shape_inference::UnchangedShape(c); }); // -------------------------------------------------------------------------- diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index 1740fa152c..b3487122e2 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -1084,7 +1084,7 @@ REGISTER_OP("UnsortedSegmentProd") .Input("segment_ids: Tindices") .Input("num_segments: Tnumsegments") .Output("output: T") - .Attr("T: realnumbertype") + .Attr("T: numbertype") .Attr("Tindices: {int32,int64}") .Attr("Tnumsegments: {int32,int64} = DT_INT32") .SetShapeFn(UnsortedSegmentReductionShapeFn); diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index fc60e807b9..41efa49ce3 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -1453,6 +1453,7 @@ REGISTER_OP("QuantizedReluX") ShapeHandle unused; TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); c->set_output(1, c->Scalar()); c->set_output(2, c->Scalar()); return Status::OK(); diff --git a/tensorflow/core/ops/string_ops.cc b/tensorflow/core/ops/string_ops.cc index 1d5c743a56..4423062362 100644 --- a/tensorflow/core/ops/string_ops.cc +++ b/tensorflow/core/ops/string_ops.cc @@ -78,7 +78,7 @@ REGISTER_OP("ReduceJoin") REGISTER_OP("AsString") .Input("input: T") .Output("output: string") - .Attr("T: {int32, int64, complex64, float, double, bool, int8}") + .Attr("T: {int8, int16, int32, int64, complex64, float, double, bool}") .Attr("precision: int = -1") .Attr("scientific: bool = false") .Attr("shortest: bool = false") @@ -134,6 +134,24 @@ REGISTER_OP("StringSplit") return Status::OK(); }); +REGISTER_OP("StringSplitV2") + .Input("input: string") + .Input("sep: string") + .Output("indices: int64") + .Output("values: string") + .Output("shape: int64") + .Attr("maxsplit: int = -1") + .SetShapeFn([](InferenceContext* c) { + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); + + c->set_output(0, c->Matrix(InferenceContext::kUnknownDim, 2)); + c->set_output(1, c->Vector(InferenceContext::kUnknownDim)); + c->set_output(2, c->Vector(2)); + return Status::OK(); + }); + REGISTER_OP("StringStrip") .Input("input: string") .Output("output: string") diff --git a/tensorflow/core/platform/cpu_info.cc b/tensorflow/core/platform/cpu_info.cc index 99de364042..e9da3d8e32 100644 --- a/tensorflow/core/platform/cpu_info.cc +++ b/tensorflow/core/platform/cpu_info.cc @@ -344,5 +344,28 @@ int CPUModelNum() { #endif } +int CPUIDNumSMT() { +#ifdef PLATFORM_IS_X86 + // https://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration + // https://software.intel.com/en-us/articles/intel-sdm (Vol 3A) + // Section: Detecting Hardware Multi-threads Support and Topology + // Uses CPUID Leaf 11 to enumerate system topology on Intel x86 architectures + // Other cases not supported + uint32 eax, ebx, ecx, edx; + // Check if system supports Leaf 11 + GETCPUID(eax, ebx, ecx, edx, 0, 0); + if (eax >= 11) { + // 1) Leaf 11 available? CPUID.(EAX=11, ECX=0):EBX != 0 + // 2) SMT_Mask_Width = CPUID.(EAX=11, ECX=0):EAX[4:0] if CPUID.(EAX=11, + // ECX=0):ECX[15:8] is 1 + GETCPUID(eax, ebx, ecx, edx, 11, 0); + if (ebx != 0 && ((ecx & 0xff00) >> 8) == 1) { + return 1 << (eax & 0x1f); // 2 ^ SMT_Mask_Width + } + } +#endif // PLATFORM_IS_X86 + return 0; +} + } // namespace port } // namespace tensorflow diff --git a/tensorflow/core/platform/cpu_info.h b/tensorflow/core/platform/cpu_info.h index b5be7e8b54..175c9ae8b1 100644 --- a/tensorflow/core/platform/cpu_info.h +++ b/tensorflow/core/platform/cpu_info.h @@ -35,6 +35,10 @@ namespace port { // software can change it dynamically. int NumSchedulableCPUs(); +// Returns an estimate of the number of hyperthreads per physical core +// on the CPU +int NumHyperthreadsPerCore(); + // Mostly ISA related features that we care about enum CPUFeature { // Do not change numeric assignments. @@ -107,6 +111,9 @@ int CPUModelNum(); // Returns nominal core processor cycles per second of each processor. double NominalCPUFrequency(); +// Returns num of hyperthreads per physical core +int CPUIDNumSMT(); + } // namespace port } // namespace tensorflow diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl index ae81f9b5b3..a319ccbdbe 100644 --- a/tensorflow/core/platform/default/build_config.bzl +++ b/tensorflow/core/platform/default/build_config.bzl @@ -71,6 +71,8 @@ def pyx_library( name = filename + "_cython_translation", srcs = [filename], outs = [filename.split(".")[0] + ".cpp"], + # Optionally use PYTHON_BIN_PATH on Linux platforms so that python 3 + # works. Windows has issues with cython_binary so skip PYTHON_BIN_PATH. cmd = "PYTHONHASHSEED=0 $(location @cython//:cython_binary) --cplus $(SRCS) --output-file $(OUTS)", tools = ["@cython//:cython_binary"] + pxd_srcs, ) diff --git a/tensorflow/core/platform/hadoop/hadoop_file_system.cc b/tensorflow/core/platform/hadoop/hadoop_file_system.cc index 72c12318ca..ff4b4436bb 100644 --- a/tensorflow/core/platform/hadoop/hadoop_file_system.cc +++ b/tensorflow/core/platform/hadoop/hadoop_file_system.cc @@ -115,18 +115,17 @@ class LibHDFS { const char* kLibHdfsDso = "libhdfs.so"; #endif char* hdfs_home = getenv("HADOOP_HDFS_HOME"); - if (hdfs_home == nullptr) { - status_ = errors::FailedPrecondition( - "Environment variable HADOOP_HDFS_HOME not set"); - return; - } - string path = io::JoinPath(hdfs_home, "lib", "native", kLibHdfsDso); - status_ = TryLoadAndBind(path.c_str(), &handle_); - if (!status_.ok()) { - // try load libhdfs.so using dynamic loader's search path in case - // libhdfs.so is installed in non-standard location - status_ = TryLoadAndBind(kLibHdfsDso, &handle_); + if (hdfs_home != nullptr) { + string path = io::JoinPath(hdfs_home, "lib", "native", kLibHdfsDso); + status_ = TryLoadAndBind(path.c_str(), &handle_); + if (status_.ok()) { + return; + } } + + // Try to load the library dynamically in case it has been installed + // to a in non-standard location. + status_ = TryLoadAndBind(kLibHdfsDso, &handle_); } Status status_; diff --git a/tensorflow/core/platform/posix/port.cc b/tensorflow/core/platform/posix/port.cc index 8e316472fe..708f32ba80 100644 --- a/tensorflow/core/platform/posix/port.cc +++ b/tensorflow/core/platform/posix/port.cc @@ -74,6 +74,11 @@ int NumSchedulableCPUs() { return kDefaultCores; } +int NumHyperthreadsPerCore() { + static const int ht_per_core = tensorflow::port::CPUIDNumSMT(); + return (ht_per_core > 0) ? ht_per_core : 1; +} + void* AlignedMalloc(size_t size, int minimum_alignment) { #if defined(__ANDROID__) return memalign(minimum_alignment, size); diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 522a9d84fd..cb1fd09dbb 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -19,12 +19,12 @@ limitations under the License. // TensorFlow uses semantic versioning, see http://semver.org/. #define TF_MAJOR_VERSION 1 -#define TF_MINOR_VERSION 8 +#define TF_MINOR_VERSION 9 #define TF_PATCH_VERSION 0 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1", // "-beta", "-rc", "-rc.1") -#define TF_VERSION_SUFFIX "" +#define TF_VERSION_SUFFIX "-rc0" #define TF_STR_HELPER(x) #x #define TF_STR(x) TF_STR_HELPER(x) diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index dffc965b14..90b6533690 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -42,6 +42,7 @@ limitations under the License. #ifndef INTEL_MKL_ML #include "mkldnn.hpp" +#include "tensorflow/core/lib/core/stringpiece.h" using mkldnn::engine; using mkldnn::memory; @@ -712,15 +713,48 @@ inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor, return output_tensor; } #else +using mkldnn::stream; +template class MklDnnData; + template inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor, const MklDnnShape& mkl_shape) { Tensor output_tensor; - TensorShape output_shape; - - TF_CHECK_OK( - Status(error::Code::UNIMPLEMENTED, "Unimplemented conversion function")); - + try { + if (!mkl_shape.IsMklTensor()) + return mkl_tensor; // return input since it is already TF tensor + + TensorShape output_shape = mkl_shape.GetTfShape();; + + // Allocate output tensor. + context->allocate_temp(DataTypeToEnum::v(), + output_shape, &output_tensor); + + auto cpu_engine = engine(engine::cpu, 0); + MklDnnData input(&cpu_engine); + + // Get Mkl layout of input tensor. + auto input_mkl_md = mkl_shape.GetMklLayout(); + auto output_tf_md = mkl_shape.GetTfLayout(); + auto output_tf_pd = memory::primitive_desc(output_tf_md, cpu_engine); + input.SetUsrMem(input_mkl_md, &mkl_tensor); + + // reorder + if (input.IsReorderNeeded(output_tf_pd)) { + std::vector net; + CHECK_EQ(input.CheckReorderToOpMem(output_tf_pd, &output_tensor, &net), + true); + stream(stream::kind::eager).submit(net).wait(); + } else { + // If not, just forward input tensor to output tensor. + CHECK(output_tensor.CopyFrom(mkl_tensor, output_shape)); + } + } catch (mkldnn::error& e) { + string error_msg = "Status: " + std::to_string(e.status) + + ", message: " + string(e.message) + ", in file " + + string(__FILE__) + ":" + std::to_string(__LINE__); + LOG(FATAL) << "Operation received an exception: " << error_msg; + } return output_tensor; } #endif @@ -1843,7 +1877,7 @@ class FactoryKeyCreator { template void AddAsKey(const T data) { auto buffer = reinterpret_cast(&data); - Append(absl::string_view(buffer, sizeof(T))); + Append(StringPiece(buffer, sizeof(T))); } std::string GetKey() { @@ -1854,8 +1888,8 @@ class FactoryKeyCreator { string key_; const char delimiter = 'x'; const int kMaxKeyLength = 256; - void Append(absl::string_view s) { - key_.append(string(s)); + void Append(StringPiece s) { + key_.append(s.ToString()); key_.append(1, delimiter); } }; diff --git a/tensorflow/docs_src/community/groups.md b/tensorflow/docs_src/community/groups.md index d92f5775fa..0b07d413da 100644 --- a/tensorflow/docs_src/community/groups.md +++ b/tensorflow/docs_src/community/groups.md @@ -1,17 +1,38 @@ # User Groups -TensorFlow has communities around the world. +TensorFlow has communities around the world. [Submit your community!](https://docs.google.com/forms/d/e/1FAIpQLSc_RQIUYtVgLLihzATaO_WUXkEyBDE_OoRoOXYDPmBEvHuEBA/viewform) ## Asia -* [TensorFlow Korea (TF-KR) User Group](https://www.facebook.com/groups/TensorFlowKR/) _(Korean language)_ -* [TensorFlow User Group Tokyo](https://tfug-tokyo.connpass.com/) _(Japanese Language)_ -* [Soleil Data Dojo](https://soleildatadojo.connpass.com/) _(Japanese language)_ +* [TensorFlow China community](https://www.tensorflowers.cn) +* [TensorFlow Korea (TF-KR) User Group](https://www.facebook.com/groups/TensorFlowKR/) +* [TensorFlow User Group Tokyo](https://tfug-tokyo.connpass.com/) +* [Soleil Data Dojo](https://soleildatadojo.connpass.com/) * [TensorFlow User Group Utsunomiya](https://tfug-utsunomiya.connpass.com/) +* [TensorFlow Philippines Community](https://www.facebook.com/groups/TensorFlowPH/) +* [TensorFlow and Deep Learning Singapore](https://www.meetup.com/TensorFlow-and-Deep-Learning-Singapore/) +* [TensorFlow India](https://www.facebook.com/tensorflowindia) ## Europe * [TensorFlow Barcelona](https://www.meetup.com/Barcelona-Machine-Learning-Meetup/) * [TensorFlow Madrid](https://www.meetup.com/TensorFlow-Madrid/) +* [Tensorflow Belgium](https://www.meetup.com/TensorFlow-Belgium) +* [TensorFlow x Rome Meetup](https://www.meetup.com/it-IT/TensorFlow-x-Rome-Meetup) +* [TensorFlow London](https://www.meetup.com/TensorFlow-London/) +* [TensorFlow Edinburgh](https://www.meetup.com/tensorflow-edinburgh/) + +## America + +* [TensorFlow Buenos Aires](https://www.meetup.com/TensorFlow-Buenos-Aires/) + + +## Oceania +* [Melbourne TensorFlow Meetup](https://www.meetup.com/Melbourne-TensorFlow-Meetup) + + +## Africa + +* [TensorFlow Tunis Meetup](https://www.meetup.com/fr-FR/TensorFlow-Tunis-Meetup/) diff --git a/tensorflow/docs_src/get_started/eager.md b/tensorflow/docs_src/get_started/eager.md index f08ac74425..bbb25e20c6 100644 --- a/tensorflow/docs_src/get_started/eager.md +++ b/tensorflow/docs_src/get_started/eager.md @@ -1,3 +1,3 @@ # Get Started with Eager Execution -[Colab notebook](https://colab.research.google.com/github/tensorflow/models/blob/r1.8.0/samples/core/get_started/eager.ipynb) +[Colab notebook](https://colab.research.google.com/github/tensorflow/models/blob/r1.9.0/samples/core/get_started/eager.ipynb) diff --git a/tensorflow/docs_src/get_started/index.md b/tensorflow/docs_src/get_started/index.md index 55579d52fb..232d2f1547 100644 --- a/tensorflow/docs_src/get_started/index.md +++ b/tensorflow/docs_src/get_started/index.md @@ -10,9 +10,9 @@ course prior to diving into TensorFlow documentation: TensorFlow is a tool for machine learning. While it contains a wide range of functionality, TensorFlow is mainly designed for deep neural network models. -The easiest way to get started with TensorFlow is using Eager Execution. +The easiest way to get started with TensorFlow is by using Eager Execution. - * @{$get_started/eager}, is for anyone new to machine learning or TensorFlow. + * @{$get_started/eager}, is for anyone new to machine learning or TensorFlow. TensorFlow provides many APIs. The remainder of this section focuses on the Estimator API which provide scalable, high-performance models. See the diff --git a/tensorflow/docs_src/install/install_c.md b/tensorflow/docs_src/install/install_c.md index 1abd840ab3..2901848745 100644 --- a/tensorflow/docs_src/install/install_c.md +++ b/tensorflow/docs_src/install/install_c.md @@ -38,7 +38,7 @@ enable TensorFlow for C: OS="linux" # Change to "darwin" for macOS TARGET_DIRECTORY="/usr/local" curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.8.0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.9.0-rc0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md index 52a2a3f8a6..55bc0f64e7 100644 --- a/tensorflow/docs_src/install/install_go.md +++ b/tensorflow/docs_src/install/install_go.md @@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go: TF_TYPE="cpu" # Change to "gpu" for GPU support TARGET_DIRECTORY='/usr/local' curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.8.0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.9.0-rc0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md index 1256fb99c4..637231da12 100644 --- a/tensorflow/docs_src/install/install_java.md +++ b/tensorflow/docs_src/install/install_java.md @@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs: org.tensorflow tensorflow - 1.8.0 + 1.9.0-rc0 ``` @@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow: org.tensorflow tensorflow - 1.8.0 + 1.9.0-rc0 @@ -124,12 +124,12 @@ instead: org.tensorflow libtensorflow - 1.8.0 + 1.9.0-rc0 org.tensorflow libtensorflow_jni_gpu - 1.8.0 + 1.9.0-rc0 ``` @@ -148,7 +148,7 @@ refer to the simpler instructions above instead. Take the following steps to install TensorFlow for Java on Linux or macOS: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.9.0-rc0.jar), which is the TensorFlow Java Archive (JAR). 2. Decide whether you will run TensorFlow for Java on CPU(s) only or with @@ -167,7 +167,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: OS=$(uname -s | tr '[:upper:]' '[:lower:]') mkdir -p ./jni curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.8.0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.9.0-rc0.tar.gz" | tar -xz -C ./jni ### Install on Windows @@ -175,13 +175,13 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: Take the following steps to install TensorFlow for Java on Windows: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.9.0-rc0.jar), which is the TensorFlow Java Archive (JAR). 2. Download the following Java Native Interface (JNI) file appropriate for - [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.8.0.zip). + [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.9.0-rc0.zip). 3. Extract this .zip file. - +__Note__: The native library (`tensorflow_jni.dll`) requires `msvcp140.dll` at runtime, which is included in the [Visual C++ 2015 Redistributable](https://www.microsoft.com/en-us/download/details.aspx?id=48145) package. ### Validate the installation @@ -227,7 +227,7 @@ must be part of your `classpath`. For example, you can include the downloaded `.jar` in your `classpath` by using the `-cp` compilation flag as follows: -
javac -cp libtensorflow-1.8.0.jar HelloTF.java
+
javac -cp libtensorflow-1.9.0-rc0.jar HelloTF.java
### Running @@ -241,11 +241,11 @@ two files are available to the JVM: For example, the following command line executes the `HelloTF` program on Linux and macOS X: -
java -cp libtensorflow-1.8.0.jar:. -Djava.library.path=./jni HelloTF
+
java -cp libtensorflow-1.9.0-rc0.jar:. -Djava.library.path=./jni HelloTF
And the following command line executes the `HelloTF` program on Windows: -
java -cp libtensorflow-1.8.0.jar;. -Djava.library.path=jni HelloTF
+
java -cp libtensorflow-1.9.0-rc0.jar;. -Djava.library.path=jni HelloTF
If the program prints Hello from version, you've successfully installed TensorFlow for Java and are ready to use the API. If the program diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index 0ed8160027..c8d706cf3c 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -339,9 +339,7 @@ Docker will download the TensorFlow binary image the first time you launch it. #### GPU support -Prior to installing TensorFlow with GPU support, ensure that your system meets all -[NVIDIA software requirements](#NVIDIARequirements). To launch a Docker container -with NVidia GPU support, enter a command of the following format: +To launch a Docker container with NVidia GPU support, enter a command of the following format (this [does not require any local CUDA installation](https://github.com/nvidia/nvidia-docker/wiki/CUDA#requirements)):
 $ nvidia-docker run -it -p hostPort:containerPort TensorFlowGPUImage
@@ -438,7 +436,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
 
      
      (tensorflow)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc0-cp34-cp34m-linux_x86_64.whl
## Validate your installation @@ -517,7 +515,7 @@ on your system: from source. To use the TensorFlow binaries, version 3.5 or higher is required. See the [NVIDIA documentation](https://developer.nvidia.com/cuda-gpus) for a list of supported GPU cards. -* [GPU drivers](http://nvidia.com/driver) that support your version of the CUDA +* [GPU drivers](http://nvidia.com/drivers) that support your version of the CUDA Toolkit. * The `libcupti-dev` library is the NVIDIA CUDA Profile Tools Interface. This library provides advanced profiling support. To install this library, @@ -684,14 +682,14 @@ This section documents the relevant values for Linux installations. CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc0-cp27-none-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.9.0rc0-cp27-none-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -703,14 +701,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc0-cp34-cp34m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.9.0rc0-cp34-cp34m-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -722,14 +720,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc0-cp35-cp35m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.9.0rc0-cp35-cp35m-linux_x86_64.whl
 
@@ -741,14 +739,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc0-cp36-cp36m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.9.0rc0-cp36-cp36m-linux_x86_64.whl
 
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index 29a867a9e3..9d01271c5a 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -119,7 +119,7 @@ Take the following steps to install TensorFlow with Virtualenv: TensorFlow in the active Virtualenv is as follows:
 $ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py3-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py3-none-any.whl If you encounter installation problems, see [Common Installation Problems](#common-installation-problems). @@ -242,7 +242,7 @@ take the following steps: issue the following command:
 $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py3-none-any.whl 
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py3-none-any.whl If the preceding command fails, see [installation problems](#common-installation-problems). @@ -350,7 +350,7 @@ Take the following steps to install TensorFlow in an Anaconda environment: TensorFlow for Python 2.7:
 (targetDirectory)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py2-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py2-none-any.whl @@ -522,7 +522,7 @@ The value you specify depends on your Python version.
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py2-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py2-none-any.whl
 
@@ -530,5 +530,5 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py2-none-any.
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py3-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py3-none-any.whl
 
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index 5ba522b436..dc6c1e36fc 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -81,7 +81,7 @@ or [macOS](#PrepareMac) - + ## Prepare environment for Linux Before building TensorFlow on Linux, install the following build @@ -328,10 +328,10 @@ Invoke `pip install` to install that pip package. The filename of the `.whl` file depends on your platform. For example, the following command will install the pip package -for TensorFlow 1.8.0 on Linux: +for TensorFlow 1.9.0rc0 on Linux:
-$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.8.0-py2-none-any.whl
+$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.9.0rc0-py2-none-any.whl
 
## Validate your installation @@ -373,9 +373,9 @@ The build and installation problems you encounter typically depend on the operating system. See the "Common installation problems" section of one of the following guides: - * @{$install_linux#CommonInstallationProblems$Installing TensorFlow on Linux} - * @{$install_mac#CommonInstallationProblems$Installing TensorFlow on Mac OS} - * @{$install_windows#CommonInstallationProblems$Installing TensorFlow on Windows} + * @{$install_linux#common_installation_problems$Installing TensorFlow on Linux} + * @{$install_mac#common_installation_problems$Installing TensorFlow on Mac OS} + * @{$install_windows#common_installation_problems$Installing TensorFlow on Windows} Beyond the errors documented in those two guides, the following table notes additional errors specific to building TensorFlow. Note that we @@ -433,6 +433,8 @@ Stack Overflow and specify the `tensorflow` tag. **Linux** + + @@ -456,6 +458,7 @@ Stack Overflow and specify the `tensorflow` tag. **Mac**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.9.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.11.0N/AN/A
tensorflow_gpu-1.9.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.11.079
tensorflow-1.8.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.10.0N/AN/A
tensorflow_gpu-1.8.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.079
tensorflow-1.7.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.10.0N/AN/A
+ @@ -472,6 +475,8 @@ Stack Overflow and specify the `tensorflow` tag. **Windows**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.9.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.11.0N/AN/A
tensorflow-1.8.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.10.1N/AN/A
tensorflow-1.7.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.10.1N/AN/A
tensorflow-1.6.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.8.1N/AN/A
+ + diff --git a/tensorflow/docs_src/mobile/linking_libs.md b/tensorflow/docs_src/mobile/linking_libs.md index cf0db59021..efef5dd0da 100644 --- a/tensorflow/docs_src/mobile/linking_libs.md +++ b/tensorflow/docs_src/mobile/linking_libs.md @@ -27,7 +27,7 @@ called `libandroid_tensorflow_inference_java.jar`. There are three ways to include this functionality in your program: 1. Include the jcenter AAR which contains it, as in this - [example app](https://github.com/googlecodelabs/tensorflow-for-poets-2/blob/master/android/build.gradle#L59-L65) + [example app](https://github.com/googlecodelabs/tensorflow-for-poets-2/blob/master/android/tfmobile/build.gradle#L59-L65) 2. Download the nightly precompiled version from [ci.tensorflow.org](http://ci.tensorflow.org/view/Nightly/job/nightly-android/lastSuccessfulBuild/artifact/out/). diff --git a/tensorflow/docs_src/mobile/prepare_models.md b/tensorflow/docs_src/mobile/prepare_models.md index 8b22c04d87..2b84dbb973 100644 --- a/tensorflow/docs_src/mobile/prepare_models.md +++ b/tensorflow/docs_src/mobile/prepare_models.md @@ -105,8 +105,8 @@ inline constants so everything’s in one file. To handle the conversion, you need the `freeze_graph.py` script, that’s held in [`tensorflow/python/tools/freeze_graph.py`](https://www.tensorflow.org/code/tensorflow/python/tools/freeze_graph.py). You’ll run it like this: - bazel build tensorflow/tools:freeze_graph - bazel-bin/tensorflow/tools/freeze_graph \ + bazel build tensorflow/python/tools:freeze_graph + bazel-bin/tensorflow/python/tools/freeze_graph \ --input_graph=/tmp/model/my_graph.pb \ --input_checkpoint=/tmp/model/model.ckpt-1000 \ --output_graph=/tmp/frozen_graph.pb \ diff --git a/tensorflow/docs_src/performance/quantization.md b/tensorflow/docs_src/performance/quantization.md index 2fea02d861..c97f74139c 100644 --- a/tensorflow/docs_src/performance/quantization.md +++ b/tensorflow/docs_src/performance/quantization.md @@ -227,8 +227,8 @@ of 30.0f, and an 8-bit array, the quantized values represent the following:
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.9.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.9.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.8.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.8.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.7.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
- +
QuantizedFloat
0-10.0
25530.0
12810.0
25530.0
Table 2: Example quantized value range diff --git a/tensorflow/docs_src/programmers_guide/estimators.md b/tensorflow/docs_src/programmers_guide/estimators.md index c4aae1d9d6..b13b47184d 100644 --- a/tensorflow/docs_src/programmers_guide/estimators.md +++ b/tensorflow/docs_src/programmers_guide/estimators.md @@ -21,18 +21,17 @@ Note: TensorFlow also includes a deprecated `Estimator` class at Estimators provide the following benefits: -* You can run Estimators-based models on a local host or on a +* You can run Estimator-based models on a local host or on a distributed multi-server environment without changing your model. - Furthermore, you can run Estimators-based models on CPUs, GPUs, + Furthermore, you can run Estimator-based models on CPUs, GPUs, or TPUs without recoding your model. * Estimators simplify sharing implementations between model developers. -* You can develop a state of the art model with high-level intuitive code, +* You can develop a state of the art model with high-level intuitive code. In short, it is generally much easier to create models with Estimators than with the low-level TensorFlow APIs. -* Estimators are themselves built on tf.layers, which +* Estimators are themselves built on @{tf.layers}, which simplifies customization. -* Estimators build the graph for you. In other words, you don't have to - build the graph. +* Estimators build the graph for you. * Estimators provide a safe distributed training loop that controls how and when to: * build the graph @@ -57,7 +56,7 @@ the "plumbing" for you. That is, pre-made Estimators create and manage pre-made Estimators let you experiment with different model architectures by making only minimal code changes. @{tf.estimator.DNNClassifier$`DNNClassifier`}, for example, is a pre-made Estimator class that trains classification models -through dense, feed-forward neural networks. +based on dense, feed-forward neural networks. ### Structure of a pre-made Estimators program @@ -79,7 +78,7 @@ of the following four steps: an input function: def input_fn(dataset): - ... # manipulate dataset, extracting feature names and the label + ... # manipulate dataset, extracting the feature dict and the label return feature_dict, label (See @{$programmers_guide/datasets} for full details.) @@ -96,13 +95,13 @@ of the following four steps: population = tf.feature_column.numeric_column('population') crime_rate = tf.feature_column.numeric_column('crime_rate') median_education = tf.feature_column.numeric_column('median_education', - normalizer_fn='lambda x: x - global_education_mean') + normalizer_fn=lambda x: x - global_education_mean) 3. **Instantiate the relevant pre-made Estimator.** For example, here's a sample instantiation of a pre-made Estimator named `LinearClassifier`: # Instantiate an estimator, passing the feature columns. - estimator = tf.estimator.Estimator.LinearClassifier( + estimator = tf.estimator.LinearClassifier( feature_columns=[population, crime_rate, median_education], ) diff --git a/tensorflow/docs_src/programmers_guide/feature_columns.md b/tensorflow/docs_src/programmers_guide/feature_columns.md index 845194fe0e..90f5c53a17 100644 --- a/tensorflow/docs_src/programmers_guide/feature_columns.md +++ b/tensorflow/docs_src/programmers_guide/feature_columns.md @@ -528,10 +528,10 @@ suggested by the following snippet: categorical_column = ... # Create any categorical column # Represent the categorical column as an embedding column. -# This means creating a one-hot vector with one element for each category. +# This means creating an embedding vector lookup table with one element for each category. embedding_column = tf.feature_column.embedding_column( categorical_column=categorical_column, - dimension=dimension_of_embedding_vector) + dimension=embedding_dimensions) ``` @{$programmers_guide/embedding$Embeddings} is a significant topic within machine diff --git a/tensorflow/examples/learn/iris.py b/tensorflow/examples/learn/iris.py index 03e60972aa..86f5204ec3 100644 --- a/tensorflow/examples/learn/iris.py +++ b/tensorflow/examples/learn/iris.py @@ -21,7 +21,8 @@ from __future__ import division from __future__ import print_function import os -import urllib + +from six.moves.urllib.request import urlretrieve import tensorflow as tf @@ -38,9 +39,7 @@ FEATURE_KEYS = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width'] def maybe_download_iris_data(file_name, download_url): """Downloads the file and returns the number of data.""" if not os.path.exists(file_name): - raw = urllib.urlopen(download_url).read() - with open(file_name, 'w') as f: - f.write(raw) + urlretrieve(download_url, file_name) # The first line is a comma-separated string. The first one is the number of # total data in the file. diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 5602775b62..a5224fbda0 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -10955,7 +10955,7 @@ func SampleDistortedBoundingBoxAspectRatioRange(value []float32) SampleDistorted // SampleDistortedBoundingBoxAreaRange sets the optional area_range attribute to value. // // value: The cropped area of the image must contain a fraction of the -// supplied image within in this range. +// supplied image within this range. // If not specified, defaults to func SampleDistortedBoundingBoxAreaRange(value []float32) SampleDistortedBoundingBoxAttr { return func(m optionalAttr) { @@ -18098,9 +18098,10 @@ func SparseFillEmptyRowsGrad(scope *Scope, reverse_index_map tf.Output, grad_val } // Computes scaled exponential linear: `scale * alpha * (exp(features) - 1)` -// // if < 0, `scale * features` otherwise. // +// Assumes weights to have zero mean and variance 1.0 / fan_in. +// // See [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515) func Selu(scope *Scope, features tf.Output) (activations tf.Output) { if scope.Err() != nil { @@ -21625,7 +21626,7 @@ func ImageSummaryBadColor(value tf.Tensor) ImageSummaryAttr { // generated sequentially as '*tag*/image/0', '*tag*/image/1', etc. // // The `bad_color` argument is the color to use in the generated images for -// non-finite input values. It is a `unit8` 1-D tensor of length `channels`. +// non-finite input values. It is a `uint8` 1-D tensor of length `channels`. // Each element must be in the range `[0, 255]` (It represents the value of a // pixel in the output image). Non-finite values in the input tensor are // replaced by this tensor in the output image. The default value is the color @@ -24018,7 +24019,7 @@ func SampleDistortedBoundingBoxV2AspectRatioRange(value []float32) SampleDistort // SampleDistortedBoundingBoxV2AreaRange sets the optional area_range attribute to value. // // value: The cropped area of the image must contain a fraction of the -// supplied image within in this range. +// supplied image within this range. // If not specified, defaults to func SampleDistortedBoundingBoxV2AreaRange(value []float32) SampleDistortedBoundingBoxV2Attr { return func(m optionalAttr) { @@ -24714,8 +24715,7 @@ type DecodeProtoV2Attr func(optionalAttr) // If not specified, defaults to "local://" func DecodeProtoV2DescriptorSource(value string) DecodeProtoV2Attr { return func(m optionalAttr) { - m["descriptor_source"] = value - } + m["descriptor_source"] = value } } // DecodeProtoV2MessageFormat sets the optional message_format attribute to value. diff --git a/tensorflow/java/src/gen/cc/op_generator.cc b/tensorflow/java/src/gen/cc/op_generator.cc index debd95fc62..9b171f66ec 100644 --- a/tensorflow/java/src/gen/cc/op_generator.cc +++ b/tensorflow/java/src/gen/cc/op_generator.cc @@ -376,9 +376,6 @@ void GenerateOp(const OpSpec& op, const EndpointSpec& endpoint, } } // op annotations - op_class.add_annotation( - Annotation::Create("Generated", "javax.annotation") - .attributes("value = \"TensorFlow Java Op Generator\"")); if (endpoint.deprecated()) { op_class.add_annotation(Annotation::Create("Deprecated")); string explanation; @@ -415,8 +412,12 @@ void GenerateOp(const OpSpec& op, const EndpointSpec& endpoint, SourceFileWriter writer(op_file.get()); std::list dependencies; CollectOpDependencies(op, mode, &dependencies); - writer.Write(kLicense).EndLine().BeginType(op_class, PUBLIC | FINAL, - &dependencies, &op_javadoc); + writer.Write(kLicense) + .EndLine() + .Write("// This class has been generated, DO NOT EDIT!") + .EndLine() + .EndLine() + .BeginType(op_class, PUBLIC | FINAL, &dependencies, &op_javadoc); if (!op.optional_attributes().empty()) { RenderOptionsClass(op, op_class, &writer); } diff --git a/tensorflow/java/src/gen/cc/op_specs.cc b/tensorflow/java/src/gen/cc/op_specs.cc index 181fd4c5e3..941ab2699c 100644 --- a/tensorflow/java/src/gen/cc/op_specs.cc +++ b/tensorflow/java/src/gen/cc/op_specs.cc @@ -96,6 +96,7 @@ Type TypeResolver::TypeOf(const OpDef_ArgDef& arg_def, bool* iterable_out) { *iterable_out = true; visited_attrs_.insert(std::make_pair(arg_def.number_attr(), Type::Int())); } + Type type = Type::Wildcard(); if (arg_def.type() != DataType::DT_INVALID) { // resolve type from DataType diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index b2e6c60021..bd97b181ff 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -196,11 +196,11 @@ def implicit_val_and_grad(f): # TODO(cais): Remove calls to tf.constant() once the gradients functions # accept lists and np.ndarrays. - def grad_fn(*args): + def grad_fn(*args, **kwds): """Computes the gradient of the wrapped function.""" this_tape = tape.push_new_tape() try: - end_node = f(*args) + end_node = f(*args, **kwds) if end_node is None: raise ValueError("Cannot differentiate a function that returns None; " "did you forget to return a value from {}?".format( diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD index 9cd17e0407..20522098b0 100644 --- a/tensorflow/python/estimator/BUILD +++ b/tensorflow/python/estimator/BUILD @@ -978,7 +978,10 @@ py_test( size = "large", srcs = ["keras_test.py"], srcs_version = "PY2AND3", - tags = ["notsan"], + tags = [ + "no_windows", + "notsan", + ], deps = [ ":keras", "//tensorflow/core:protos_all_py", diff --git a/tensorflow/python/estimator/exporter.py b/tensorflow/python/estimator/exporter.py index 7cdf840c97..b18212cfcd 100644 --- a/tensorflow/python/estimator/exporter.py +++ b/tensorflow/python/estimator/exporter.py @@ -156,7 +156,7 @@ def _loss_smaller(best_eval_result, current_eval_result): return best_eval_result[default_key] > current_eval_result[default_key] -def _verify_compre_fn_args(compare_fn): +def _verify_compare_fn_args(compare_fn): """Verifies compare_fn arguments.""" args = set(util.fn_args(compare_fn)) if 'best_eval_result' not in args: @@ -265,7 +265,7 @@ class BestExporter(Exporter): self._compare_fn = compare_fn if self._compare_fn is None: raise ValueError('`compare_fn` must not be None.') - _verify_compre_fn_args(self._compare_fn) + _verify_compare_fn_args(self._compare_fn) self._saved_model_exporter = _SavedModelExporter( name, serving_input_receiver_fn, assets_extra, as_text) diff --git a/tensorflow/python/estimator/inputs/numpy_io.py b/tensorflow/python/estimator/inputs/numpy_io.py index 035c7c148c..a6cefdece2 100644 --- a/tensorflow/python/estimator/inputs/numpy_io.py +++ b/tensorflow/python/estimator/inputs/numpy_io.py @@ -136,11 +136,13 @@ def numpy_input_fn(x, values in `x` have same shape). ValueError: if duplicate keys are in both `x` and `y` when `y` is a dict. ValueError: if x or y is an empty dict. - TypeError: `x` is not a dict or array, or if `shuffle` is not bool. + TypeError: `x` is not a dict or array. + ValueError: if 'shuffle' is not provided or a bool. """ if not isinstance(shuffle, bool): - raise TypeError('shuffle must be explicitly set as boolean; ' - 'got {}'.format(shuffle)) + raise ValueError('shuffle must be provided and explicitly set as boolean ' + '(it is recommended to set it as True for training); ' + 'got {}'.format(shuffle)) def input_fn(): """Numpy input function.""" diff --git a/tensorflow/python/estimator/inputs/numpy_io_test.py b/tensorflow/python/estimator/inputs/numpy_io_test.py index 92d057e25d..81b201cc5c 100644 --- a/tensorflow/python/estimator/inputs/numpy_io_test.py +++ b/tensorflow/python/estimator/inputs/numpy_io_test.py @@ -286,8 +286,9 @@ class NumpyIoTest(test.TestCase): x = np.arange(32, 36) y = np.arange(4) with self.test_session(): - with self.assertRaisesRegexp(TypeError, - 'shuffle must be explicitly set as boolean'): + with self.assertRaisesRegexp(ValueError, + 'shuffle must be provided and explicitly ' + 'set as boolean'): # Default shuffle is None. numpy_io.numpy_input_fn(x, y) diff --git a/tensorflow/python/estimator/inputs/pandas_io.py b/tensorflow/python/estimator/inputs/pandas_io.py index 938e244fb3..57f8e5fd6a 100644 --- a/tensorflow/python/estimator/inputs/pandas_io.py +++ b/tensorflow/python/estimator/inputs/pandas_io.py @@ -68,15 +68,16 @@ def pandas_input_fn(x, Raises: ValueError: if `x` already contains a column with the same name as `y`, or if the indexes of `x` and `y` don't match. - TypeError: `shuffle` is not bool. + ValueError: if 'shuffle' is not provided or a bool. """ if not HAS_PANDAS: raise TypeError( 'pandas_input_fn should not be called without pandas installed') if not isinstance(shuffle, bool): - raise TypeError('shuffle must be explicitly set as boolean; ' - 'got {}'.format(shuffle)) + raise ValueError('shuffle must be provided and explicitly set as boolean ' + '(it is recommended to set it as True for training); ' + 'got {}'.format(shuffle)) x = x.copy() if y is not None: diff --git a/tensorflow/python/estimator/inputs/pandas_io_test.py b/tensorflow/python/estimator/inputs/pandas_io_test.py index e5912a3b28..dcecf6dd61 100644 --- a/tensorflow/python/estimator/inputs/pandas_io_test.py +++ b/tensorflow/python/estimator/inputs/pandas_io_test.py @@ -70,8 +70,9 @@ class PandasIoTest(test.TestCase): return x, _ = self.makeTestDataFrame() y_noindex = pd.Series(np.arange(-32, -28)) - with self.assertRaisesRegexp(TypeError, - 'shuffle must be explicitly set as boolean'): + with self.assertRaisesRegexp(ValueError, + 'shuffle must be provided and explicitly ' + 'set as boolean'): # Default shuffle is None pandas_io.pandas_input_fn(x, y_noindex) diff --git a/tensorflow/python/estimator/inputs/queues/feeding_functions.py b/tensorflow/python/estimator/inputs/queues/feeding_functions.py index 8e2ec83020..51a61adb21 100644 --- a/tensorflow/python/estimator/inputs/queues/feeding_functions.py +++ b/tensorflow/python/estimator/inputs/queues/feeding_functions.py @@ -250,7 +250,7 @@ class _PandasFeedFn(object): num_epochs=None): if len(placeholders) != len(dataframe.columns) + 1: raise ValueError("Expected {} placeholders; got {}.".format( - len(dataframe.columns), len(placeholders))) + len(dataframe.columns) + 1, len(placeholders))) self._index_placeholder = placeholders[0] self._col_placeholders = placeholders[1:] self._dataframe = dataframe diff --git a/tensorflow/python/estimator/keras.py b/tensorflow/python/estimator/keras.py index c80af08fba..2f439f765e 100644 --- a/tensorflow/python/estimator/keras.py +++ b/tensorflow/python/estimator/keras.py @@ -70,7 +70,7 @@ def _convert_tensor(x): return x -def _any_variable_initalized(): +def _any_variable_initialized(): """Check if any variable has been initialized in the Keras model. Returns: @@ -511,7 +511,7 @@ def model_to_estimator(keras_model=None, keras_model_fn, model_dir=model_dir, config=config) # Check if we need to call get_weights: - if _any_variable_initalized(): + if _any_variable_initialized(): keras_weights = keras_model.get_weights() # Warn if config passed to estimator tries to update GPUOptions. If a # session has already been created, the GPUOptions passed to the first diff --git a/tensorflow/python/estimator/keras_test.py b/tensorflow/python/estimator/keras_test.py index 6688a84130..5e094ae92b 100644 --- a/tensorflow/python/estimator/keras_test.py +++ b/tensorflow/python/estimator/keras_test.py @@ -31,10 +31,10 @@ from tensorflow.python.estimator import run_config as run_config_lib from tensorflow.python.estimator.inputs import numpy_io from tensorflow.python.framework import ops from tensorflow.python.framework import test_util -from tensorflow.python.keras import backend as K from tensorflow.python.keras import testing_utils from tensorflow.python.keras.applications import mobilenet from tensorflow.python.keras.optimizers import SGD +from tensorflow.python.ops.parsing_ops import gen_parsing_ops from tensorflow.python.platform import gfile from tensorflow.python.platform import test from tensorflow.python.summary.writer import writer_cache @@ -146,13 +146,13 @@ def randomize_io_type(array, name): def multi_inputs_multi_outputs_model(): a = keras.layers.Input(shape=(16,), name='input_a') b = keras.layers.Input(shape=(16,), name='input_b') - m = keras.layers.Input(shape=(8,), dtype='bool', name='input_m') + m = keras.layers.Input(shape=(8,), dtype='string', name='input_m') dense = keras.layers.Dense(8, name='dense_1') a_2 = dense(a) - # Apply a mask - s_2 = keras.layers.Lambda(lambda k: - K.switch(k[0], k[1], K.zeros_like(k[1])))([m, a_2]) + # Read m + m_2 = keras.layers.Lambda(gen_parsing_ops.string_to_number)(m) + s_2 = keras.layers.Lambda(lambda k: k[0] * k[1])([m_2, a_2]) b_2 = dense(b) merged = keras.layers.concatenate([s_2, b_2], name='merge') c = keras.layers.Dense(3, activation='softmax', name='dense_2')(merged) @@ -372,13 +372,13 @@ class TestKerasEstimator(test_util.TensorFlowTestCase): def train_input_fn(): input_dict = {'input_a': a_train, 'input_b': b_train, - 'input_m': input_m_train > 0} + 'input_m': input_m_train.astype(np.str)} output_dict = {'dense_2': c_train, 'dense_3': d_train} return input_dict, output_dict def eval_input_fn(): input_dict = {'input_a': a_test, 'input_b': b_test, - 'input_m': input_m_test > 0} + 'input_m': input_m_test.astype(np.str)} output_dict = {'dense_2': c_test, 'dense_3': d_test} return input_dict, output_dict diff --git a/tensorflow/python/grappler/layout_optimizer_test.py b/tensorflow/python/grappler/layout_optimizer_test.py index 2d6925d1a8..af5d709f7e 100644 --- a/tensorflow/python/grappler/layout_optimizer_test.py +++ b/tensorflow/python/grappler/layout_optimizer_test.py @@ -1389,7 +1389,7 @@ class LayoutOptimizerTest(test.TestCase): expected_num_transposes = 3 self.assertEqual(expected_num_transposes, num_transposes) self._assert_trans_nhwc_to_nchw('map/while/Conv2D-0', nodes) - self._assert_trans_nchw_to_nhwc('map/while/Add-0-2', nodes) + self._assert_trans_nchw_to_nhwc('map/while/Add_1-0-2', nodes) self.assertAllClose(output_val_ref, output_val, atol=1e-3) def testLoopWithVecAnd4D(self): @@ -1413,7 +1413,7 @@ class LayoutOptimizerTest(test.TestCase): expected_num_transposes = 2 self.assertEqual(expected_num_transposes, num_transposes) self._assert_trans_nhwc_to_nchw('map/while/Conv2D-0', nodes) - self._assert_trans_nchw_to_nhwc('map/while/Add-0-2', nodes) + self._assert_trans_nchw_to_nhwc('map/while/Add_1-0-2', nodes) self.assertAllClose(output_val_ref, output_val, atol=1e-3) def testBinaryOpSecondPort(self): diff --git a/tensorflow/python/keras/activations.py b/tensorflow/python/keras/activations.py index e487f583be..f608dea430 100644 --- a/tensorflow/python/keras/activations.py +++ b/tensorflow/python/keras/activations.py @@ -93,6 +93,8 @@ def selu(x): - To be used together with the initialization "lecun_normal". - To be used together with the dropout variant "AlphaDropout". + References: + - [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515) """ alpha = 1.6732632423543772848170429916717 scale = 1.0507009873554804934193349852946 diff --git a/tensorflow/python/keras/callbacks.py b/tensorflow/python/keras/callbacks.py index 70b6a8431a..9f91368e5b 100644 --- a/tensorflow/python/keras/callbacks.py +++ b/tensorflow/python/keras/callbacks.py @@ -724,15 +724,6 @@ class TensorBoard(Callback): for weight in layer.weights: mapped_weight_name = weight.name.replace(':', '_') tf_summary.histogram(mapped_weight_name, weight) - if self.write_grads: - grads = model.optimizer.get_gradients(model.total_loss, weight) - - def is_indexed_slices(grad): - return type(grad).__name__ == 'IndexedSlices' - - grads = [grad.values if is_indexed_slices(grad) else grad - for grad in grads] - tf_summary.histogram('{}_grad'.format(mapped_weight_name), grads) if self.write_images: w_img = array_ops.squeeze(weight) shape = K.int_shape(w_img) @@ -759,6 +750,18 @@ class TensorBoard(Callback): assert len(shape) == 4 and shape[-1] in [1, 3, 4] tf_summary.image(mapped_weight_name, w_img) + if self.write_grads: + for weight in layer.trainable_weights: + mapped_weight_name = weight.name.replace(':', '_') + grads = model.optimizer.get_gradients(model.total_loss, weight) + + def is_indexed_slices(grad): + return type(grad).__name__ == 'IndexedSlices' + + grads = [grad.values if is_indexed_slices(grad) else grad + for grad in grads] + tf_summary.histogram('{}_grad'.format(mapped_weight_name), grads) + if hasattr(layer, 'output'): tf_summary.histogram('{}_out'.format(layer.name), layer.output) self.merged = tf_summary.merge_all() diff --git a/tensorflow/python/keras/callbacks_test.py b/tensorflow/python/keras/callbacks_test.py index b355f4a269..5062a26580 100644 --- a/tensorflow/python/keras/callbacks_test.py +++ b/tensorflow/python/keras/callbacks_test.py @@ -653,6 +653,8 @@ class KerasCallbacksTest(test.TestCase): model.add( keras.layers.Dense( NUM_HIDDEN, input_dim=INPUT_DIM, activation='relu')) + # non_trainable_weights: moving_variance, moving_mean + model.add(keras.layers.BatchNormalization()) model.add(keras.layers.Dense(NUM_CLASSES, activation='softmax')) model.compile( loss='categorical_crossentropy', diff --git a/tensorflow/python/keras/engine/network.py b/tensorflow/python/keras/engine/network.py index a4cd017d60..1c9135982e 100644 --- a/tensorflow/python/keras/engine/network.py +++ b/tensorflow/python/keras/engine/network.py @@ -123,7 +123,7 @@ class Network(base_layer.Layer): # Entries are unique. Includes input and output layers. self._layers = [] - # Used in symbolic mode only, only in conjonction with graph-networks + # Used in symbolic mode only, only in conjunction with graph-networks self._outbound_nodes = [] self._inbound_nodes = [] diff --git a/tensorflow/python/keras/engine/saving_test.py b/tensorflow/python/keras/engine/saving_test.py index 6a94986b9c..7e82db028b 100644 --- a/tensorflow/python/keras/engine/saving_test.py +++ b/tensorflow/python/keras/engine/saving_test.py @@ -482,7 +482,7 @@ class TestWholeModelSaving(test.TestCase): with h5py.File(fname, 'r') as h5file: num_names_arrays = len([attr for attr in h5file['model_weights'].attrs if attr.startswith('layer_names')]) - # The chunking of layer names array should have happend. + # The chunking of layer names array should have happened. self.assertGreater(num_names_arrays, 0) out2 = model.predict(x) self.assertAllClose(out, out2, atol=1e-05) @@ -527,7 +527,7 @@ class TestWholeModelSaving(test.TestCase): num_weight_arrays = len( [attr for attr in h5file['model_weights']['nested_model'].attrs if attr.startswith('weight_names')]) - # The chunking of layer names array should have happend. + # The chunking of layer names array should have happened. self.assertGreater(num_weight_arrays, 0) out2 = model.predict(x) self.assertAllClose(out, out2, atol=1e-05) diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py index 89c1f1a40f..fce6cbdb7a 100644 --- a/tensorflow/python/keras/engine/training.py +++ b/tensorflow/python/keras/engine/training.py @@ -24,6 +24,7 @@ import numpy as np from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.ops import iterator_ops from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_util @@ -409,11 +410,13 @@ class Model(Network): else: if sample_weight_mode == 'temporal': sample_weights.append(array_ops.placeholder_with_default( - [[1.]], shape=[None, None], name=name + '_sample_weights')) + constant_op.constant([[1.]], dtype=K.floatx()), + shape=[None, None], name=name + '_sample_weights')) sample_weight_modes.append('temporal') else: sample_weights.append(array_ops.placeholder_with_default( - [1.], shape=[None], name=name + '_sample_weights')) + constant_op.constant([1.], dtype=K.floatx()), + shape=[None], name=name + '_sample_weights')) sample_weight_modes.append(None) self.sample_weight_modes = sample_weight_modes self._feed_sample_weight_modes = [] diff --git a/tensorflow/python/keras/engine/training_eager.py b/tensorflow/python/keras/engine/training_eager.py index 2ecbff3a1c..e8838cd3bc 100644 --- a/tensorflow/python/keras/engine/training_eager.py +++ b/tensorflow/python/keras/engine/training_eager.py @@ -732,7 +732,7 @@ def slice_arrays(arrays, indices, contiguous=True): """Slices batches out of provided arrays (workaround for eager tensors). Unfortunately eager tensors don't have the same slicing behavior as - Numpy arrays (they folow the same slicing behavior as symbolic TF tensors), + Numpy arrays (they follow the same slicing behavior as symbolic TF tensors), hence we cannot use `generic_utils.slice_arrays` directly and we have to implement this workaround based on `concat`. This has a performance cost. diff --git a/tensorflow/python/keras/initializers_test.py b/tensorflow/python/keras/initializers_test.py index a54d6da839..c519e194bd 100644 --- a/tensorflow/python/keras/initializers_test.py +++ b/tensorflow/python/keras/initializers_test.py @@ -71,7 +71,7 @@ class KerasInitializersTest(test.TestCase): stddev=1, seed=126), tensor_shape, - target_mean=0., target_std=None, target_max=2) + target_mean=0., target_max=2, target_min=-2) def test_constant(self): tensor_shape = (5, 6, 4) @@ -83,49 +83,49 @@ class KerasInitializersTest(test.TestCase): tensor_shape = (5, 6, 4, 2) with self.test_session(): fan_in, _ = init_ops._compute_fans(tensor_shape) - scale = np.sqrt(3. / fan_in) + std = np.sqrt(1. / fan_in) self._runner(keras.initializers.lecun_uniform(seed=123), tensor_shape, - target_mean=0., target_max=scale, target_min=-scale) + target_mean=0., target_std=std) def test_glorot_uniform(self): tensor_shape = (5, 6, 4, 2) with self.test_session(): fan_in, fan_out = init_ops._compute_fans(tensor_shape) - scale = np.sqrt(6. / (fan_in + fan_out)) + std = np.sqrt(2. / (fan_in + fan_out)) self._runner(keras.initializers.glorot_uniform(seed=123), tensor_shape, - target_mean=0., target_max=scale, target_min=-scale) + target_mean=0., target_std=std) def test_he_uniform(self): tensor_shape = (5, 6, 4, 2) with self.test_session(): fan_in, _ = init_ops._compute_fans(tensor_shape) - scale = np.sqrt(6. / fan_in) + std = np.sqrt(2. / fan_in) self._runner(keras.initializers.he_uniform(seed=123), tensor_shape, - target_mean=0., target_max=scale, target_min=-scale) + target_mean=0., target_std=std) def test_lecun_normal(self): tensor_shape = (5, 6, 4, 2) with self.test_session(): fan_in, _ = init_ops._compute_fans(tensor_shape) - scale = np.sqrt(1. / fan_in) + std = np.sqrt(1. / fan_in) self._runner(keras.initializers.lecun_normal(seed=123), tensor_shape, - target_mean=0., target_std=None, target_max=2 * scale) + target_mean=0., target_std=std) def test_glorot_normal(self): tensor_shape = (5, 6, 4, 2) with self.test_session(): fan_in, fan_out = init_ops._compute_fans(tensor_shape) - scale = np.sqrt(2. / (fan_in + fan_out)) + std = np.sqrt(2. / (fan_in + fan_out)) self._runner(keras.initializers.glorot_normal(seed=123), tensor_shape, - target_mean=0., target_std=None, target_max=2 * scale) + target_mean=0., target_std=std) def test_he_normal(self): tensor_shape = (5, 6, 4, 2) with self.test_session(): fan_in, _ = init_ops._compute_fans(tensor_shape) - scale = np.sqrt(2. / fan_in) + std = np.sqrt(2. / fan_in) self._runner(keras.initializers.he_normal(seed=123), tensor_shape, - target_mean=0., target_std=None, target_max=2 * scale) + target_mean=0., target_std=std) def test_orthogonal(self): tensor_shape = (20, 20) diff --git a/tensorflow/python/keras/layers/core.py b/tensorflow/python/keras/layers/core.py index 5061825d38..f60064ed63 100644 --- a/tensorflow/python/keras/layers/core.py +++ b/tensorflow/python/keras/layers/core.py @@ -19,7 +19,9 @@ from __future__ import division from __future__ import print_function import copy +import sys import types as python_types +import warnings import numpy as np @@ -714,6 +716,7 @@ class Lambda(Layer): return self.mask def get_config(self): + module = self.function.__module__ if isinstance(self.function, python_types.LambdaType): function = generic_utils.func_dump(self.function) function_type = 'lambda' @@ -721,21 +724,26 @@ class Lambda(Layer): function = self.function.__name__ function_type = 'function' + output_shape_module = None if isinstance(self._output_shape, python_types.LambdaType): output_shape = generic_utils.func_dump(self._output_shape) output_shape_type = 'lambda' + output_shape_module = self._output_shape.__module__ elif callable(self._output_shape): output_shape = self._output_shape.__name__ output_shape_type = 'function' + output_shape_module = self._output_shape.__module__ else: output_shape = self._output_shape output_shape_type = 'raw' config = { 'function': function, + 'module': module, 'function_type': function_type, 'output_shape': output_shape, 'output_shape_type': output_shape_type, + 'output_shape_module': output_shape_module, 'arguments': self.arguments } base_config = super(Lambda, self).get_config() @@ -745,8 +753,16 @@ class Lambda(Layer): def from_config(cls, config, custom_objects=None): config = config.copy() globs = globals() + module = config.pop('module', None) + if module in sys.modules: + globs.update(sys.modules[module].__dict__) + elif module is not None: + # Note: we don't know the name of the function if it's a lambda. + warnings.warn('{} is not loaded, but a Lambda layer uses it. ' + 'It may cause errors.'.format(module) + , UserWarning) if custom_objects: - globs = dict(list(globs.items()) + list(custom_objects.items())) + globs.update(custom_objects) function_type = config.pop('function_type') if function_type == 'function': # Simple lookup in custom objects @@ -760,6 +776,14 @@ class Lambda(Layer): else: raise TypeError('Unknown function type:', function_type) + output_shape_module = config.pop('output_shape_module', None) + if output_shape_module in sys.modules: + globs.update(sys.modules[output_shape_module].__dict__) + elif output_shape_module is not None: + # Note: we don't know the name of the function if it's a lambda. + warnings.warn('{} is not loaded, but a Lambda layer uses it. ' + 'It may cause errors.'.format(output_shape_module) + , UserWarning) output_shape_type = config.pop('output_shape_type') if output_shape_type == 'function': # Simple lookup in custom objects diff --git a/tensorflow/python/keras/models_test.py b/tensorflow/python/keras/models_test.py index c616d8f24f..e6e45902a8 100644 --- a/tensorflow/python/keras/models_test.py +++ b/tensorflow/python/keras/models_test.py @@ -144,5 +144,19 @@ class CheckpointingTests(test.TestCase): model.load_weights(save_prefix) self.assertEqual(12., self.evaluate(beta1_power)) +class TestModelBackend(test.TestCase): + + def test_model_backend_float64_use_cases(self): + # Test case for GitHub issue 19318 + floatx = keras.backend.floatx() + keras.backend.set_floatx('float64') + + x = keras.Input((5,)) + y = keras.layers.Dense(1)(x) + model = keras.models.Model(x, y) + model.compile('rmsprop', 'mse') + + keras.backend.set_floatx(floatx) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/kernel_tests/as_string_op_test.py b/tensorflow/python/kernel_tests/as_string_op_test.py index 9d54add264..94ed8ebd31 100644 --- a/tensorflow/python/kernel_tests/as_string_op_test.py +++ b/tensorflow/python/kernel_tests/as_string_op_test.py @@ -130,6 +130,16 @@ class AsStringOpTest(test.TestCase): result = output.eval(feed_dict={input_: int_inputs_}) self.assertAllEqual(s(result), ["%d" % x for x in int_inputs_]) + def testHalfInt(self): + s = lambda strs: [x.decode("ascii") for x in strs] + + with self.test_session(): + input_ = array_ops.placeholder(dtypes.int16) + int_inputs_ = [np.iinfo(np.int16).min, np.iinfo(np.int16).max] + output = string_ops.as_string(input_) + result = output.eval(feed_dict={input_: int_inputs_}) + self.assertAllEqual(s(result), ["%d" % x for x in int_inputs_]) + def testBool(self): bool_inputs_ = [False, True] s = lambda strs: [x.decode("ascii") for x in strs] diff --git a/tensorflow/python/kernel_tests/betainc_op_test.py b/tensorflow/python/kernel_tests/betainc_op_test.py index 08b03f8518..16fdedac41 100644 --- a/tensorflow/python/kernel_tests/betainc_op_test.py +++ b/tensorflow/python/kernel_tests/betainc_op_test.py @@ -172,7 +172,7 @@ class BetaincTest(test.TestCase): tf_gout_t = math_ops.betainc(tf_ga_s, tf_gb_s, tf_gx_s) err = gradient_checker.compute_gradient_error( [tf_gx_s], [gx_s.shape], tf_gout_t, gx_s.shape) - print("betainc gradient err = %g " % err) + tf_logging.info("betainc gradient err = %g " % err) self.assertLess(err, err_tolerance) # Test broadcast gradient @@ -181,7 +181,7 @@ class BetaincTest(test.TestCase): tf_gout_t = math_ops.betainc(tf_ga_s, tf_gb_s, tf_gx_s) err = gradient_checker.compute_gradient_error( [tf_gx_s], [()], tf_gout_t, ga_s.shape) - print("betainc gradient err = %g " % err) + tf_logging.info("betainc gradient err = %g " % err) self.assertLess(err, err_tolerance) diff --git a/tensorflow/python/kernel_tests/clip_ops_test.py b/tensorflow/python/kernel_tests/clip_ops_test.py index e08123b041..fb52d10475 100644 --- a/tensorflow/python/kernel_tests/clip_ops_test.py +++ b/tensorflow/python/kernel_tests/clip_ops_test.py @@ -18,9 +18,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numpy as np + from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops from tensorflow.python.ops import clip_ops from tensorflow.python.ops import gradient_checker from tensorflow.python.platform import test @@ -414,6 +417,16 @@ class ClipTest(test.TestCase): self.assertAllClose(np_ans, tf_ans) + def testClipByValueEmptyTensor(self): + # Test case for GitHub issue 19337 + zero = array_ops.placeholder(dtype=dtypes.float32, shape=None) + x = clip_ops.clip_by_value(zero, zero, zero) + y = clip_ops.clip_by_value(zero, 1.0, 1.0) + z = clip_ops.clip_by_value(zero, zero, 1.0) + w = clip_ops.clip_by_value(zero, 1.0, zero) + with self.test_session(use_gpu=True) as sess: + sess.run([x, y, z, w], feed_dict={zero: np.zeros((7, 0))}) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/kernel_tests/conv_ops_test.py b/tensorflow/python/kernel_tests/conv_ops_test.py index 8699fd5b25..80ba7dafc9 100644 --- a/tensorflow/python/kernel_tests/conv_ops_test.py +++ b/tensorflow/python/kernel_tests/conv_ops_test.py @@ -312,8 +312,8 @@ class Conv2DTest(test.TestCase): expected_values = self.evaluate(expected_results) computed_values = self.evaluate(computed_results) for e_value, c_value in zip(expected_values, computed_values): - print("expected = ", e_value) - print("actual = ", c_value) + tf_logging.info("expected = ", e_value) + tf_logging.info("actual = ", c_value) self.assertAllClose( e_value.flatten(), c_value.flatten(), atol=tolerance, rtol=1e-4) @@ -337,8 +337,8 @@ class Conv2DTest(test.TestCase): for i in range(len(tensors)): conv = tensors[i] value = values[i] - print("expected = ", expected) - print("actual = ", value) + tf_logging.info("expected = ", expected) + tf_logging.info("actual = ", value) tol = 1e-5 if value.dtype == np.float16: tol = 1e-3 @@ -547,8 +547,8 @@ class Conv2DTest(test.TestCase): # "values" consists of two tensors for two backprops value = self.evaluate(conv) self.assertShapeEqual(value, conv) - print("expected = ", expected) - print("actual = ", value) + tf_logging.info("expected = ", expected) + tf_logging.info("actual = ", value) self.assertArrayNear(expected, value.flatten(), err) def _CompareBackpropInput(self, input_sizes, filter_sizes, output_sizes, @@ -723,8 +723,8 @@ class Conv2DTest(test.TestCase): data_format=data_format) value = self.evaluate(conv) self.assertShapeEqual(value, conv) - print("expected = ", expected) - print("actual = ", value) + tf_logging.info("expected = ", expected) + tf_logging.info("actual = ", value) self.assertArrayNear(expected, value.flatten(), 1e-5) def _CompareBackFilter(self, input_sizes, filter_sizes, output_sizes, @@ -912,8 +912,8 @@ class Conv2DTest(test.TestCase): value_2 = sess.run(conv_2) self.assertShapeEqual(value, conv) self.assertShapeEqual(value_2, conv_2) - print("expected = ", value_2) - print("actual = ", value) + tf_logging.info("expected = ", value_2) + tf_logging.info("actual = ", value) self.assertArrayNear(value_2.flatten(), value.flatten(), err) # Testing for backprops @@ -965,8 +965,8 @@ class Conv2DTest(test.TestCase): value_2 = sess.run(conv_2) self.assertShapeEqual(value, conv) self.assertShapeEqual(value_2, conv_2) - print("expected = ", value_2) - print("actual = ", value) + tf_logging.info("expected = ", value_2) + tf_logging.info("actual = ", value) self.assertArrayNear(value_2.flatten(), value.flatten(), err) def testConv2D2x2Depth3ValidBackpropFilterStride1x1Dilation2x1(self): @@ -1178,7 +1178,7 @@ class Conv2DTest(test.TestCase): # since fp16 numerical gradients are too imprecise. err = np.fabs(jacob_t - reference_jacob_t).max() - print("conv_2d gradient error = ", err) + tf_logging.info("conv_2d gradient error = ", err) self.assertLess(err, 0.002) def testInputGradientValidPaddingStrideOne(self): @@ -1546,7 +1546,7 @@ class DepthwiseConv2DTest(test.TestCase): conv = nn_impl.depthwise_conv2d( t1, t2, strides=[1, stride, stride, 1], padding=padding) value = sess.run(conv) - print("value = ", value) + tf_logging.info("value = ", value) self.assertArrayNear(expected, np.ravel(value), 1e-5) self.assertShapeEqual(value, conv) @@ -1668,7 +1668,7 @@ class SeparableConv2DTest(test.TestCase): conv = array_ops.transpose(conv, [0, 2, 3, 1]) value = sess.run(conv) - print("value = ", value) + tf_logging.info("value = ", value) self.assertArrayNear(expected, np.ravel(value), 1e-5) self.assertShapeEqual(value, conv) @@ -1826,7 +1826,7 @@ class Conv2DBenchmark(test.Benchmark): wall_time = time.time() - start self.report_benchmark( name="conv_stack_iter_%d" % iter_index, wall_time=wall_time) - print("conv_stack_iter_%d: %.4f" % (iter_index, wall_time)) + tf_logging.info("conv_stack_iter_%d: %.4f" % (iter_index, wall_time)) def GetInceptionFwdTest(input_size, filter_size, stride, padding, diff --git a/tensorflow/python/kernel_tests/gather_nd_op_test.py b/tensorflow/python/kernel_tests/gather_nd_op_test.py index 91ebe8de99..58e2a8ac2a 100644 --- a/tensorflow/python/kernel_tests/gather_nd_op_test.py +++ b/tensorflow/python/kernel_tests/gather_nd_op_test.py @@ -197,7 +197,21 @@ class GatherNdTest(test.TestCase): self.assertEqual(None, shape.ndims) self.assertEqual(None, shape[0].value) - def testBadIndices(self): + def testBadIndicesCPU(self): + with self.test_session(use_gpu=False): + params = [0, 1, 2] + indices = [[[0], [7]]] # Make this one higher rank + gather_nd = array_ops.gather_nd(params, indices) + with self.assertRaisesOpError( + r"flat indices\[1, :\] = \[7\] does not index into param " + r"\(shape: \[3\]\)"): + gather_nd.eval() + + def _disabledTestBadIndicesGPU(self): + # TODO disabled due to different behavior on GPU and CPU + # On GPU the bad indices do not raise error but fetch 0 values + if not test.is_gpu_available(): + return with self.test_session(use_gpu=True): params = [0, 1, 2] indices = [[[0], [7]]] # Make this one higher rank @@ -207,7 +221,21 @@ class GatherNdTest(test.TestCase): r"\(shape: \[3\]\)"): gather_nd.eval() - def testBadIndicesWithSlices(self): + def testBadIndicesWithSlicesCPU(self): + with self.test_session(use_gpu=False): + params = [[0, 1, 2]] + indices = [[[0], [0], [1]]] # Make this one higher rank + gather_nd = array_ops.gather_nd(params, indices) + with self.assertRaisesOpError( + r"flat indices\[2, :\] = \[1\] does not index into param " + r"\(shape: \[1,3\]\)"): + gather_nd.eval() + + def _disabledTestBadIndicesWithSlicesGPU(self): + # TODO disabled due to different behavior on GPU and CPU + # On GPU the bad indices do not raise error but fetch 0 values + if not test.is_gpu_available(): + return with self.test_session(use_gpu=True): params = [[0, 1, 2]] indices = [[[0], [0], [1]]] # Make this one higher rank diff --git a/tensorflow/python/kernel_tests/gather_op_test.py b/tensorflow/python/kernel_tests/gather_op_test.py index a2fcd751df..033fa95935 100644 --- a/tensorflow/python/kernel_tests/gather_op_test.py +++ b/tensorflow/python/kernel_tests/gather_op_test.py @@ -27,7 +27,8 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradients_impl from tensorflow.python.platform import test -_TEST_TYPES = (dtypes.float32, dtypes.complex64, dtypes.complex128) +_TEST_TYPES = (dtypes.int64, dtypes.float32, + dtypes.complex64, dtypes.complex128) class GatherTest(test.TestCase): @@ -122,6 +123,9 @@ class GatherTest(test.TestCase): gather, [tf_params, tf_indices, tf_axis], gather_grad) self.assertEqual(indices_grad, None) self.assertEqual(axis_grad, None) + if dtype.is_integer: + self.assertEqual(params_grad, None) + continue # For axis 0, we are able to create an efficient IndexedSlices for # the gradient. if axis == 0: @@ -177,7 +181,19 @@ class GatherTest(test.TestCase): gather_t = array_ops.gather(params, indices, axis=axis) self.assertEqual(None, gather_t.shape) - def testBadIndices(self): + def testBadIndicesCPU(self): + with self.test_session(use_gpu=False): + params = [[0, 1, 2], [3, 4, 5]] + with self.assertRaisesOpError(r"indices\[0,0\] = 7 is not in \[0, 2\)"): + array_ops.gather(params, [[7]], axis=0).eval() + with self.assertRaisesOpError(r"indices\[0,0\] = 7 is not in \[0, 3\)"): + array_ops.gather(params, [[7]], axis=1).eval() + + def _disabledTestBadIndicesGPU(self): + # TODO disabled due to different behavior on GPU and CPU + # On GPU the bad indices do not raise error but fetch 0 values + if not test.is_gpu_available(): + return with self.test_session(use_gpu=True): params = [[0, 1, 2], [3, 4, 5]] with self.assertRaisesOpError(r"indices\[0,0\] = 7 is not in \[0, 2\)"): diff --git a/tensorflow/python/kernel_tests/init_ops_test.py b/tensorflow/python/kernel_tests/init_ops_test.py index a9b55854f1..795aa67248 100644 --- a/tensorflow/python/kernel_tests/init_ops_test.py +++ b/tensorflow/python/kernel_tests/init_ops_test.py @@ -362,6 +362,33 @@ class UniformUnitScalingInitializationTest(test.TestCase): dtype=dtypes.string) +class VarianceScalingInitializationTest(test.TestCase): + + def testNormalDistribution(self): + shape = [100, 100] + expect_mean = 0. + expect_var = 1. / shape[0] + init = init_ops.variance_scaling_initializer(distribution='normal') + + with self.test_session(use_gpu=True): + x = init(shape).eval() + + self.assertNear(np.mean(x), expect_mean, err=1e-2) + self.assertNear(np.var(x), expect_var, err=1e-2) + + def testUniformDistribution(self): + shape = [100, 100] + expect_mean = 0. + expect_var = 1. / shape[0] + init = init_ops.variance_scaling_initializer(distribution='uniform') + + with self.test_session(use_gpu=True): + x = init(shape).eval() + + self.assertNear(np.mean(x), expect_mean, err=1e-2) + self.assertNear(np.var(x), expect_var, err=1e-2) + + # TODO(vrv): move to sequence_ops_test? class RangeTest(test.TestCase): diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py index a0c372db7d..e95c729715 100644 --- a/tensorflow/python/kernel_tests/pooling_ops_test.py +++ b/tensorflow/python/kernel_tests/pooling_ops_test.py @@ -947,7 +947,7 @@ class PoolingTest(test.TestCase): output_sizes, x_init_value=x_init_value, delta=1e-2) - print("%s gradient error = " % func_name, err) + tf_logging.info("%s gradient error = " % func_name, err) self.assertLess(err, err_tolerance) def _ConstructAndTestSecondGradient(self, @@ -1024,7 +1024,7 @@ class PoolingTest(test.TestCase): input_sizes, x_init_value=x_init_value, delta=1e-2) - print("%s second-order gradient error = " % func_name, err) + tf_logging.info("%s second-order gradient error = " % func_name, err) self.assertLess(err, err_tolerance) def _testMaxPoolGradValidPadding1_1(self, data_format, use_gpu): diff --git a/tensorflow/python/kernel_tests/py_func_test.py b/tensorflow/python/kernel_tests/py_func_test.py index 677253946e..253e43920b 100644 --- a/tensorflow/python/kernel_tests/py_func_test.py +++ b/tensorflow/python/kernel_tests/py_func_test.py @@ -19,6 +19,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import gc import re import numpy as np @@ -434,13 +435,29 @@ class PyFuncTest(test.TestCase): # ----- Tests shared by py_func and eager_py_func ----- def testCleanup(self): - for _ in xrange(1000): - g = ops.Graph() - with g.as_default(): - c = constant_op.constant([1.], dtypes.float32) - _ = script_ops.py_func(lambda x: x + 1, [c], [dtypes.float32]) - _ = script_ops.eager_py_func(lambda x: x + 1, [c], [dtypes.float32]) - self.assertLess(script_ops._py_funcs.size(), 100) + # Delete everything created by previous tests to avoid side effects. + ops.reset_default_graph() + gc.collect() + initial_size = script_ops._py_funcs.size() + # Encapsulate the graph generation, so locals can be deleted. + def make_graphs(): + for _ in xrange(1000): + g = ops.Graph() + with g.as_default(): + c = constant_op.constant([1.], dtypes.float32) + _ = script_ops.py_func(lambda x: x + 1, [c], [dtypes.float32]) + _ = script_ops.eager_py_func(lambda x: x + 1, [c], [dtypes.float32]) + # These ops have a reference to 'c' which has a reference to the graph. + # Checks if the functions are being deleted though the graph is referenced from them. + # (see #18292) + _ = script_ops.py_func(lambda x: x + c.shape[0], [c], [dtypes.float32]) + _ = script_ops.eager_py_func(lambda x: x + c.shape[0], [c], [dtypes.float32]) + + # Call garbage collector to enforce deletion. + make_graphs() + ops.reset_default_graph() + gc.collect() + self.assertEqual(initial_size, script_ops._py_funcs.size()) # ----- Tests for eager_py_func ----- @test_util.run_in_graph_and_eager_modes() diff --git a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py index 79fe927b8a..faa4b49a8d 100644 --- a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py +++ b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py @@ -144,7 +144,9 @@ class StatefulScatterNdTest(test.TestCase): self.assertAllClose(new, ref_var.eval()) def _VariableRankTests(self, np_scatter, tf_scatter): - for vtype in (np.float32, np.float64, np.complex64, np.complex128): + for vtype in (np.int32, + np.float32, np.float64, + np.complex64, np.complex128): for itype in (np.int32, np.int64): self._VariableRankTest(np_scatter, tf_scatter, vtype, itype) @@ -221,7 +223,7 @@ class StatefulScatterNdTest(test.TestCase): # self._VariableRankTests(_NumpyDiv, state_ops.scatter_nd_div) def _ScatterRepeatIndicesTest(self, np_scatter, tf_scatter): - for vtype in (np.float32, np.float64): + for vtype in (np.int32, np.float32, np.float64): for itype in (np.int32, np.int64): self._VariableRankTest( np_scatter, tf_scatter, vtype, itype, repeat_indices=True) diff --git a/tensorflow/python/kernel_tests/scatter_ops_test.py b/tensorflow/python/kernel_tests/scatter_ops_test.py index c70a4ffce7..1a0fa744ae 100644 --- a/tensorflow/python/kernel_tests/scatter_ops_test.py +++ b/tensorflow/python/kernel_tests/scatter_ops_test.py @@ -159,7 +159,13 @@ class ScatterTest(test.TestCase): # Clips small values to avoid division by zero. def clip_small_values(x): - return 1e-4 * np.sign(x) if np.abs(x) < 1e-4 else x + threshold = 1e-4 + sign = np.sign(x) + + if isinstance(x, np.int32): + threshold = 1 + sign = np.random.choice([-1, 1]) + return threshold * sign if np.abs(x) < threshold else x updates = np.vectorize(clip_small_values)(updates) old = _AsType(np.random.randn(*((first_dim,) + extra_shape)), vtype) @@ -181,7 +187,11 @@ class ScatterTest(test.TestCase): tf_scatter, repeat_indices=False, updates_are_scalar=False): - for vtype in (np.float32, np.float64): + vtypes = [np.float32, np.float64] + if tf_scatter != state_ops.scatter_div: + vtypes.append(np.int32) + + for vtype in vtypes: for itype in (np.int32, np.int64): self._VariableRankTest(tf_scatter, vtype, itype, repeat_indices, updates_are_scalar) diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py index 794be096b7..a82855dfeb 100644 --- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py +++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py @@ -264,7 +264,9 @@ class UnsortedSegmentTest(SegmentReductionHelper): # A subset of ops has been enabled for complex numbers self.complex_ops_list = [(np.add, None, - math_ops.unsorted_segment_sum, lambda t: 0)] + math_ops.unsorted_segment_sum, lambda t: 0), + (np.ndarray.__mul__, None, + math_ops.unsorted_segment_prod, lambda t: 1)] self.differentiable_dtypes = [dtypes_lib.float16, dtypes_lib.float32, dtypes_lib.float64] self.all_dtypes = (self.differentiable_dtypes + diff --git a/tensorflow/python/kernel_tests/string_split_op_test.py b/tensorflow/python/kernel_tests/string_split_op_test.py index a5bd1b6ee0..e20daccb28 100644 --- a/tensorflow/python/kernel_tests/string_split_op_test.py +++ b/tensorflow/python/kernel_tests/string_split_op_test.py @@ -146,5 +146,101 @@ class StringSplitOpTest(test.TestCase): self.assertAllEqual(shape, [3, 1]) +class StringSplitV2OpTest(test.TestCase): + + def testSplitV2(self): + strings = ["pigs on the wing", "animals"] + + with self.test_session() as sess: + tokens = string_ops.string_split_v2(strings) + indices, values, shape = sess.run(tokens) + self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2], [0, 3], [1, 0]]) + self.assertAllEqual(values, [b"pigs", b"on", b"the", b"wing", b"animals"]) + self.assertAllEqual(shape, [2, 4]) + + def testSplitV2MultiCharSeparator(self): + # Match Python behavior: + # >>> '1<>2<>3'.split('<>') + # ['1', '2', '3'] + # >>> "<><>4<>5<><>6<>".split("<>") + # ['', '', '4', '5', '', '6', ''] + strings = ["1<>2<>3", "<><>4<>5<><>6<>"] + + with self.test_session() as sess: + tokens = string_ops.string_split_v2(strings, sep="<>") + indices, values, shape = sess.run(tokens) + self.assertAllEqual( + indices, [[0, 0], [0, 1], [0, 2], + [1, 0], [1, 1], [1, 2], [1, 3], [1, 4], [1, 5], [1, 6]]) + self.assertAllEqual(values, [b"1", b"2", b"3", + b"", b"", b"4", b"5", b"", b"6", b""]) + self.assertAllEqual(shape, [2, 7]) + + def testSplitV2SimpleSeparator(self): + # Match Python behavior: + # >>> '1,2,3'.split(',') + # ['1', '2', '3'] + # >>> '1,2,,3,'.split(',') + # ['1', '2', '', '3', ''] + strings = ["1,2,3", "4,5,,6,"] + + with self.test_session() as sess: + tokens = string_ops.string_split_v2(strings, sep=',') + indices, values, shape = sess.run(tokens) + self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2], + [1, 0], [1, 1], [1, 2], [1, 3], [1, 4]]) + self.assertAllEqual(values, [b"1", b"2", b"3", + b"4", b"5", b"", b"6", b""]) + self.assertAllEqual(shape, [2, 5]) + + def testSplitV2EmptySeparator(self): + # Match Python behavior: + # >>> '1 2 3'.split() + # ['1', '2', '3'] + #>>> ' 1 2 3 '.split() + #['1', '2', '3'] + strings = ["1 2 3", " 4 5 6 "] + + with self.test_session() as sess: + tokens = string_ops.string_split_v2(strings) + indices, values, shape = sess.run(tokens) + self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2], + [1, 0], [1, 1], [1, 2]]) + self.assertAllEqual(values, [b"1", b"2", b"3", b"4", b"5", b"6"]) + self.assertAllEqual(shape, [2, 3]) + + def testSplitV2SimpleSeparatorMaxSplit(self): + # Match Python behavior: + # >>> '1,2,3'.split(',', maxsplit=1) + # ['1', '2,3'] + # >>> '4,5,,6,'.split(',', maxsplit=1) + # ['4', '5,,6,'] + strings = ["1,2,3", "4,5,,6,"] + + with self.test_session() as sess: + tokens = string_ops.string_split_v2(strings, sep=',', maxsplit=1) + indices, values, shape = sess.run(tokens) + self.assertAllEqual(indices, [[0, 0], [0, 1], + [1, 0], [1, 1]]) + self.assertAllEqual(values, [b"1", b"2,3", b"4", b"5,,6,"]) + self.assertAllEqual(shape, [2, 2]) + + def testSplitV2EmptySeparatorMaxSplit(self): + # Match Python behavior: + # '1 2 3'.split(maxsplit=1) + # ['1', '2 3'] + # >>> " 4 5 6 ".split(maxsplit=1) + # ['4', '5 6 '] + strings = ["1 2 3", " 4 5 6 "] + + with self.test_session() as sess: + tokens = string_ops.string_split_v2(strings, maxsplit=1) + indices, values, shape = sess.run(tokens) + self.assertAllEqual(indices, [[0, 0], [0, 1], + [1, 0], [1, 1]]) + self.assertAllEqual(values, [b"1", b"2 3", b"4", b"5 6 "]) + self.assertAllEqual(shape, [2, 2]) + + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 8129334703..fae63b1132 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -2619,6 +2619,10 @@ reverse.__doc__ = gen_array_ops.reverse_v2.__doc__ # pylint: disable=redefined-builtin @tf_export("reverse_sequence") +@deprecation.deprecated_args( + None, "seq_dim is deprecated, use seq_axis instead", "seq_dim") +@deprecation.deprecated_args( + None, "batch_dim is deprecated, use batch_axis instead", "batch_dim") def reverse_sequence(input, seq_lengths, seq_axis=None, diff --git a/tensorflow/python/ops/gradient_checker.py b/tensorflow/python/ops/gradient_checker.py index 12afcd0b51..94c8d79335 100644 --- a/tensorflow/python/ops/gradient_checker.py +++ b/tensorflow/python/ops/gradient_checker.py @@ -283,10 +283,10 @@ def compute_gradient(x, numbers. For example, if `x` is complex with shape `[m]` and `y` is complex with shape `[n]`, each Jacobian `J` will have shape `[m * 2, n * 2]` with - J[:m, :n] = d(Re y)/d(Re x) - J[:m, n:] = d(Im y)/d(Re x) - J[m:, :n] = d(Re y)/d(Im x) - J[m:, n:] = d(Im y)/d(Im x) + J[::2, ::2] = d(Re y)/d(Re x) + J[::2, 1::2] = d(Im y)/d(Re x) + J[1::2, ::2] = d(Re y)/d(Im x) + J[1::2, 1::2] = d(Im y)/d(Im x) Args: x: a tensor or list of tensors diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index bdcf420980..f27d9224c1 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -28,6 +28,7 @@ from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import functional_ops from tensorflow.python.ops import gen_image_ops from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import math_ops @@ -258,14 +259,14 @@ def random_flip_up_down(image, seed=None): dimension, which is `height`. Otherwise output the image as-is. Args: - image: A 3-D tensor of shape `[height, width, channels].` + image: 4-D Tensor of shape `[batch, height, width, channels]` or + 3-D Tensor of shape `[height, width, channels]`. seed: A Python integer. Used to create a random seed. See @{tf.set_random_seed} for behavior. Returns: - A 3-D tensor of the same type and shape as `image`. - + A tensor of the same type and shape as `image`. Raises: ValueError: if the shape of `image` not supported. """ @@ -280,13 +281,14 @@ def random_flip_left_right(image, seed=None): second dimension, which is `width`. Otherwise output the image as-is. Args: - image: A 3-D tensor of shape `[height, width, channels].` + image: 4-D Tensor of shape `[batch, height, width, channels]` or + 3-D Tensor of shape `[height, width, channels]`. seed: A Python integer. Used to create a random seed. See @{tf.set_random_seed} for behavior. Returns: - A 3-D tensor of the same type and shape as `image`. + A tensor of the same type and shape as `image`. Raises: ValueError: if the shape of `image` not supported. @@ -297,7 +299,8 @@ def random_flip_left_right(image, seed=None): def _random_flip(image, flip_index, seed, scope_name): """Randomly (50% chance) flip an image along axis `flip_index`. Args: - image: A 3-D tensor of shape `[height, width, channels].` + image: 4-D Tensor of shape `[batch, height, width, channels]` or + 3-D Tensor of shape `[height, width, channels]`. flip_index: The dimension along which to flip the image. Vertical: 0, Horizontal: 1 seed: A Python integer. Used to create a random seed. See @@ -306,22 +309,37 @@ def _random_flip(image, flip_index, seed, scope_name): scope_name: Name of the scope in which the ops are added. Returns: - A 3-D tensor of the same type and shape as `image`. + A tensor of the same type and shape as `image`. Raises: ValueError: if the shape of `image` not supported. """ with ops.name_scope(None, scope_name, [image]) as scope: image = ops.convert_to_tensor(image, name='image') - image = _Assert3DImage(image) - uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed) - mirror_cond = math_ops.less(uniform_random, .5) - result = control_flow_ops.cond( - mirror_cond, - lambda: array_ops.reverse(image, [flip_index]), - lambda: image, - name=scope) - return fix_image_flip_shape(image, result) + image = _AssertAtLeast3DImage(image) + shape = image.get_shape() + if shape.ndims == 3 or shape.ndims is None: + uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed) + mirror_cond = math_ops.less(uniform_random, .5) + result = control_flow_ops.cond( + mirror_cond, + lambda: array_ops.reverse(image, [flip_index]), + lambda: image, + name=scope + ) + return fix_image_flip_shape(image, result) + elif shape.ndims == 4: + uniform_random = random_ops.random_uniform( + [array_ops.shape(image)[0]], 0, 1.0, seed=seed + ) + mirror_cond = math_ops.less(uniform_random, .5) + return array_ops.where( + mirror_cond, + image, + functional_ops.map_fn(lambda x: array_ops.reverse(x, [flip_index]), image, dtype=image.dtype) + ) + else: + raise ValueError('\'image\' must have either 3 or 4 dimensions.') @tf_export('image.flip_left_right') @@ -1634,13 +1652,13 @@ def is_jpeg(contents, name=None): @tf_export('image.decode_image') -def decode_image(contents, channels=None, name=None): +def decode_image(contents, channels=None, dtype=dtypes.uint8, name=None): """Convenience function for `decode_bmp`, `decode_gif`, `decode_jpeg`, and `decode_png`. Detects whether an image is a BMP, GIF, JPEG, or PNG, and performs the - appropriate operation to convert the input bytes `string` into a `Tensor` of - type `uint8`. + appropriate operation to convert the input bytes `string` into a `Tensor` + of type `dtype`. Note: `decode_gif` returns a 4-D array `[num_frames, height, width, 3]`, as opposed to `decode_bmp`, `decode_jpeg` and `decode_png`, which return 3-D @@ -1652,10 +1670,11 @@ def decode_image(contents, channels=None, name=None): contents: 0-D `string`. The encoded image bytes. channels: An optional `int`. Defaults to `0`. Number of color channels for the decoded image. + dtype: The desired DType of the returned `Tensor`. name: A name for the operation (optional) Returns: - `Tensor` with type `uint8` with shape `[height, width, num_channels]` for + `Tensor` with type `dtype` and shape `[height, width, num_channels]` for BMP, JPEG, and PNG images and shape `[num_frames, height, width, 3]` for GIF images. @@ -1679,7 +1698,7 @@ def decode_image(contents, channels=None, name=None): channels_msg = 'Channels must be in (None, 0, 3) when decoding BMP images' assert_channels = control_flow_ops.Assert(good_channels, [channels_msg]) with ops.control_dependencies([assert_decode, assert_channels]): - return gen_image_ops.decode_bmp(contents) + return convert_image_dtype(gen_image_ops.decode_bmp(contents), dtype) def _gif(): # Create assert to make sure that channels is not set to 1 @@ -1692,7 +1711,7 @@ def decode_image(contents, channels=None, name=None): channels_msg = 'Channels must be in (None, 0, 3) when decoding GIF images' assert_channels = control_flow_ops.Assert(good_channels, [channels_msg]) with ops.control_dependencies([assert_channels]): - return gen_image_ops.decode_gif(contents) + return convert_image_dtype(gen_image_ops.decode_gif(contents), dtype) def check_gif(): # Create assert op to check that bytes are GIF decodable @@ -1701,7 +1720,11 @@ def decode_image(contents, channels=None, name=None): def _png(): """Decodes a PNG image.""" - return gen_image_ops.decode_png(contents, channels) + return convert_image_dtype( + gen_image_ops.decode_png(contents, channels, + dtype=dtypes.uint8 + if dtype == dtypes.uint8 + else dtypes.uint16), dtype) def check_png(): """Checks if an image is PNG.""" @@ -1717,7 +1740,8 @@ def decode_image(contents, channels=None, name=None): 'images') assert_channels = control_flow_ops.Assert(good_channels, [channels_msg]) with ops.control_dependencies([assert_channels]): - return gen_image_ops.decode_jpeg(contents, channels) + return convert_image_dtype( + gen_image_ops.decode_jpeg(contents, channels), dtype) # Decode normal JPEG images (start with \xff\xd8\xff\xe0) # as well as JPEG images with EXIF data (start with \xff\xd8\xff\xe1). @@ -1878,7 +1902,7 @@ def sample_distorted_bounding_box(image_size, width / height within this range. area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The cropped area of the image must contain a fraction of the - supplied image within in this range. + supplied image within this range. max_attempts: An optional `int`. Defaults to `100`. Number of attempts at generating a cropped region of the image of the specified constraints. After `max_attempts` failures, return the diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py index 45499dcce0..2a6ab26e96 100644 --- a/tensorflow/python/ops/image_ops_test.py +++ b/tensorflow/python/ops/image_ops_test.py @@ -533,6 +533,37 @@ class FlipImageBenchmark(test.Benchmark): iters=benchmark_rounds, wall_time=step_time) + def _benchmarkBatchedRandomFlipLeftRight(self, device, cpu_count): + image_shape = [16, 299, 299, 3] + warmup_rounds = 100 + benchmark_rounds = 1000 + config = config_pb2.ConfigProto() + if cpu_count is not None: + config.inter_op_parallelism_threads = 1 + config.intra_op_parallelism_threads = cpu_count + with session.Session("", graph=ops.Graph(), config=config) as sess: + with ops.device(device): + inputs = variables.Variable( + random_ops.random_uniform(image_shape, dtype=dtypes.float32) * 255, + trainable=False, + dtype=dtypes.float32) + run_op = image_ops.random_flip_left_right(inputs) + sess.run(variables.global_variables_initializer()) + for i in xrange(warmup_rounds + benchmark_rounds): + if i == warmup_rounds: + start = time.time() + sess.run(run_op) + end = time.time() + step_time = (end - start) / benchmark_rounds + tag = device + "_%s" % (cpu_count if cpu_count is not None else "_all") + print("benchmarkBatchedRandomFlipLeftRight_16_299_299_3_%s step_time: " + "%.2f us" % + (tag, step_time * 1e6)) + self.report_benchmark( + name="benchmarkBatchedRandomFlipLeftRight_16_299_299_3_%s" % (tag), + iters=benchmark_rounds, + wall_time=step_time) + def benchmarkFlipLeftRightCpu1(self): self._benchmarkFlipLeftRight("/cpu:0", 1) @@ -551,6 +582,15 @@ class FlipImageBenchmark(test.Benchmark): def benchmarkRandomFlipLeftRightGpu(self): self._benchmarkRandomFlipLeftRight(test.gpu_device_name(), None) + def benchmarkBatchedRandomFlipLeftRightCpu1(self): + self._benchmarkBatchedRandomFlipLeftRight("/cpu:0", 1) + + def benchmarkBatchedRandomFlipLeftRightCpuAll(self): + self._benchmarkBatchedRandomFlipLeftRight("/cpu:0", None) + + def benchmarkBatchedRandomFlipLeftRightGpu(self): + self._benchmarkBatchedRandomFlipLeftRight(test.gpu_device_name(), None) + class AdjustHueBenchmark(test.Benchmark): @@ -987,7 +1027,7 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): with self.test_session(use_gpu=True): x_tf = constant_op.constant(x_np, shape=x_np.shape) - y = image_ops.random_flip_left_right(x_tf) + y = image_ops.random_flip_left_right(x_tf, seed=seed) self.assertTrue(y.op.name.startswith("random_flip_left_right")) count_flipped = 0 @@ -1008,6 +1048,50 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): self.assertGreaterEqual(count_flipped, 20) self.assertGreaterEqual(count_unflipped, 20) + def testRandomFlipLeftRightWithBatch(self): + batch_size = 16 + seed = 42 + + # create single item of test data + x_np_raw = np.array( + [[1, 2, 3], [1, 2, 3]], dtype=np.uint8 + ).reshape([1, 2, 3, 1]) + y_np_raw = np.array( + [[3, 2, 1], [3, 2, 1]], dtype=np.uint8 + ).reshape([1, 2, 3, 1]) + + # create batched test data + x_np = np.vstack([x_np_raw for _ in range(batch_size)]) + y_np = np.vstack([y_np_raw for _ in range(batch_size)]) + + with self.test_session(use_gpu=True): + x_tf = constant_op.constant(x_np, shape=x_np.shape) + y = image_ops.random_flip_left_right(x_tf, seed=seed) + self.assertTrue(y.op.name.startswith("random_flip_left_right")) + + count_flipped = 0 + count_unflipped = 0 + for _ in range(100): + y_tf = y.eval() + + # check every element of the batch + for i in range(batch_size): + if y_tf[i][0][0] == 1: + self.assertAllEqual(y_tf[i], x_np[i]) + count_unflipped += 1 + else: + self.assertAllEqual(y_tf[i], y_np[i]) + count_flipped += 1 + + # 100 trials, each containing batch_size elements + # Mean: 50 * batch_size + # Std Dev: ~5 * sqrt(batch_size) + # Six Sigma: 50 * batch_size - (5 * 6 * sqrt(batch_size)) + # = 50 * batch_size - 30 * sqrt(batch_size) = 800 - 30 * 4 = 680 + six_sigma = 50 * batch_size - 30 * np.sqrt(batch_size) + self.assertGreaterEqual(count_flipped, six_sigma) + self.assertGreaterEqual(count_unflipped, six_sigma) + def testInvolutionUpDown(self): x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1]) @@ -1057,9 +1141,11 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1]) y_np = np.array([[4, 5, 6], [1, 2, 3]], dtype=np.uint8).reshape([2, 3, 1]) + seed = 42 + with self.test_session(use_gpu=True): x_tf = constant_op.constant(x_np, shape=x_np.shape) - y = image_ops.random_flip_up_down(x_tf, seed=42) + y = image_ops.random_flip_up_down(x_tf, seed=seed) self.assertTrue(y.op.name.startswith("random_flip_up_down")) count_flipped = 0 count_unflipped = 0 @@ -1079,6 +1165,50 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): self.assertGreaterEqual(count_flipped, 20) self.assertGreaterEqual(count_unflipped, 20) + def testRandomFlipUpDownWithBatch(self): + batch_size = 16 + seed = 42 + + # create single item of test data + x_np_raw = np.array( + [[1, 2, 3], [4, 5, 6]], dtype=np.uint8 + ).reshape([1, 2, 3, 1]) + y_np_raw = np.array( + [[4, 5, 6], [1, 2, 3]], dtype=np.uint8 + ).reshape([1, 2, 3, 1]) + + # create batched test data + x_np = np.vstack([x_np_raw for _ in range(batch_size)]) + y_np = np.vstack([y_np_raw for _ in range(batch_size)]) + + with self.test_session(use_gpu=True): + x_tf = constant_op.constant(x_np, shape=x_np.shape) + y = image_ops.random_flip_up_down(x_tf, seed=seed) + self.assertTrue(y.op.name.startswith("random_flip_up_down")) + + count_flipped = 0 + count_unflipped = 0 + for _ in range(100): + y_tf = y.eval() + + # check every element of the batch + for i in range(batch_size): + if y_tf[i][0][0] == 1: + self.assertAllEqual(y_tf[i], x_np[i]) + count_unflipped += 1 + else: + self.assertAllEqual(y_tf[i], y_np[i]) + count_flipped += 1 + + # 100 trials, each containing batch_size elements + # Mean: 50 * batch_size + # Std Dev: ~5 * sqrt(batch_size) + # Six Sigma: 50 * batch_size - (5 * 6 * sqrt(batch_size)) + # = 50 * batch_size - 30 * sqrt(batch_size) = 800 - 30 * 4 = 680 + six_sigma = 50 * batch_size - 30 * np.sqrt(batch_size) + self.assertGreaterEqual(count_flipped, six_sigma) + self.assertGreaterEqual(count_unflipped, six_sigma) + def testInvolutionTranspose(self): x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1]) @@ -1156,6 +1286,7 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): #Ops that support 4D input for op in [ image_ops.flip_left_right, image_ops.flip_up_down, + image_ops.random_flip_left_right, image_ops.random_flip_up_down, image_ops.transpose_image, image_ops.rot90 ]: transformed_unknown_dims_4 = op(p_unknown_dims_4) @@ -1166,14 +1297,6 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): "must be at least three-dimensional"): op(p_wrong_rank) - for op in [ - image_ops.random_flip_left_right, - image_ops.random_flip_up_down, - ]: - with self.assertRaisesRegexp(ValueError, "must be three-dimensional"): - op(p_wrong_rank) - - def testRot90GroupOrder(self): image = np.arange(24, dtype=np.uint8).reshape([2, 4, 3]) with self.test_session(use_gpu=True): @@ -1208,41 +1331,6 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): y_np = np.rot90(image, k=k, axes=(1, 2)) self.assertAllEqual(y_np, y_tf.eval({k_placeholder: k})) -class RandomFlipTest(test_util.TensorFlowTestCase): - - def testRandomLeftRight(self): - x_np = np.array([0, 1], dtype=np.uint8).reshape([1, 2, 1]) - num_iterations = 500 - - hist = [0, 0] - with self.test_session(use_gpu=True): - x_tf = constant_op.constant(x_np, shape=x_np.shape) - y = image_ops.random_flip_left_right(x_tf) - for _ in xrange(num_iterations): - y_np = y.eval().flatten()[0] - hist[y_np] += 1 - - # Ensure that each entry is observed within 4 standard deviations. - four_stddev = 4.0 * np.sqrt(num_iterations / 2.0) - self.assertAllClose(hist, [num_iterations / 2.0] * 2, atol=four_stddev) - - def testRandomUpDown(self): - x_np = np.array([0, 1], dtype=np.uint8).reshape([2, 1, 1]) - num_iterations = 500 - - hist = [0, 0] - with self.test_session(use_gpu=True): - x_tf = constant_op.constant(x_np, shape=x_np.shape) - y = image_ops.random_flip_up_down(x_tf) - for _ in xrange(num_iterations): - y_np = y.eval().flatten()[0] - hist[y_np] += 1 - - # Ensure that each entry is observed within 4 standard deviations. - four_stddev = 4.0 * np.sqrt(num_iterations / 2.0) - self.assertAllClose(hist, [num_iterations / 2.0] * 2, atol=four_stddev) - - class AdjustContrastTest(test_util.TensorFlowTestCase): def _testContrast(self, x_np, y_np, contrast_factor): @@ -3880,5 +3968,88 @@ class SobelEdgesTest(test_util.TensorFlowTestCase): self.assertAllClose(expected_batch, actual_sobel) +class DecodeImageTest(test_util.TensorFlowTestCase): + + def testJpegUint16(self): + with self.test_session(use_gpu=True) as sess: + base = "tensorflow/core/lib/jpeg/testdata" + jpeg0 = io_ops.read_file(os.path.join(base, "jpeg_merge_test1.jpg")) + image0 = image_ops.decode_image(jpeg0, dtype=dtypes.uint16) + image1 = image_ops.convert_image_dtype(image_ops.decode_jpeg(jpeg0), + dtypes.uint16) + image0, image1 = sess.run([image0, image1]) + self.assertAllEqual(image0, image1) + + def testPngUint16(self): + with self.test_session(use_gpu=True) as sess: + base = "tensorflow/core/lib/png/testdata" + png0 = io_ops.read_file(os.path.join(base, "lena_rgba.png")) + image0 = image_ops.decode_image(png0, dtype=dtypes.uint16) + image1 = image_ops.convert_image_dtype( + image_ops.decode_png(png0, dtype=dtypes.uint16), dtypes.uint16) + image0, image1 = sess.run([image0, image1]) + self.assertAllEqual(image0, image1) + + def testGifUint16(self): + with self.test_session(use_gpu=True) as sess: + base = "tensorflow/core/lib/gif/testdata" + gif0 = io_ops.read_file(os.path.join(base, "scan.gif")) + image0 = image_ops.decode_image(gif0, dtype=dtypes.uint16) + image1 = image_ops.convert_image_dtype(image_ops.decode_gif(gif0), + dtypes.uint16) + image0, image1 = sess.run([image0, image1]) + self.assertAllEqual(image0, image1) + + def testBmpUint16(self): + with self.test_session(use_gpu=True) as sess: + base = "tensorflow/core/lib/bmp/testdata" + bmp0 = io_ops.read_file(os.path.join(base, "lena.bmp")) + image0 = image_ops.decode_image(bmp0, dtype=dtypes.uint16) + image1 = image_ops.convert_image_dtype(image_ops.decode_bmp(bmp0), + dtypes.uint16) + image0, image1 = sess.run([image0, image1]) + self.assertAllEqual(image0, image1) + + def testJpegFloat32(self): + with self.test_session(use_gpu=True) as sess: + base = "tensorflow/core/lib/jpeg/testdata" + jpeg0 = io_ops.read_file(os.path.join(base, "jpeg_merge_test1.jpg")) + image0 = image_ops.decode_image(jpeg0, dtype=dtypes.float32) + image1 = image_ops.convert_image_dtype(image_ops.decode_jpeg(jpeg0), + dtypes.float32) + image0, image1 = sess.run([image0, image1]) + self.assertAllEqual(image0, image1) + + def testPngFloat32(self): + with self.test_session(use_gpu=True) as sess: + base = "tensorflow/core/lib/png/testdata" + png0 = io_ops.read_file(os.path.join(base, "lena_rgba.png")) + image0 = image_ops.decode_image(png0, dtype=dtypes.float32) + image1 = image_ops.convert_image_dtype( + image_ops.decode_png(png0, dtype=dtypes.uint16), dtypes.float32) + image0, image1 = sess.run([image0, image1]) + self.assertAllEqual(image0, image1) + + def testGifFloat32(self): + with self.test_session(use_gpu=True) as sess: + base = "tensorflow/core/lib/gif/testdata" + gif0 = io_ops.read_file(os.path.join(base, "scan.gif")) + image0 = image_ops.decode_image(gif0, dtype=dtypes.float32) + image1 = image_ops.convert_image_dtype(image_ops.decode_gif(gif0), + dtypes.float32) + image0, image1 = sess.run([image0, image1]) + self.assertAllEqual(image0, image1) + + def testBmpFloat32(self): + with self.test_session(use_gpu=True) as sess: + base = "tensorflow/core/lib/bmp/testdata" + bmp0 = io_ops.read_file(os.path.join(base, "lena.bmp")) + image0 = image_ops.decode_image(bmp0, dtype=dtypes.float32) + image1 = image_ops.convert_image_dtype(image_ops.decode_bmp(bmp0), + dtypes.float32) + image0, image1 = sess.run([image0, image1]) + self.assertAllEqual(image0, image1) + + if __name__ == "__main__": googletest.main() diff --git a/tensorflow/python/ops/init_ops.py b/tensorflow/python/ops/init_ops.py index 2df230d470..724fcc39cd 100644 --- a/tensorflow/python/ops/init_ops.py +++ b/tensorflow/python/ops/init_ops.py @@ -467,7 +467,8 @@ class VarianceScaling(Initializer): else: scale /= max(1., (fan_in + fan_out) / 2.) if self.distribution == "normal": - stddev = math.sqrt(scale) + # constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.) + stddev = math.sqrt(scale) / .87962566103423978 return random_ops.truncated_normal( shape, 0.0, stddev, dtype, seed=self.seed) else: diff --git a/tensorflow/python/ops/logging_ops.py b/tensorflow/python/ops/logging_ops.py index 222b8ebc9d..8276047cb6 100644 --- a/tensorflow/python/ops/logging_ops.py +++ b/tensorflow/python/ops/logging_ops.py @@ -35,8 +35,9 @@ from tensorflow.python.util.tf_export import tf_export # Assert and Print are special symbols in python, so we must -# use an upper-case version of them. -@tf_export("Print") +# have an upper-case version of them. For users with Python 3 or Python 2.7 +# with `from __future__ import print_function`, we also allow lowercase. +@tf_export("Print", "print") def Print(input_, data, message=None, first_n=None, summarize=None, name=None): """Prints a list of tensors. diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index e40481f3a7..466d0dadc8 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -125,8 +125,8 @@ def abs(x, name=None): # pylint: disable=redefined-builtin ``` Args: - x: A `Tensor` or `SparseTensor` of type `float32`, `float64`, `int32`, - `int64`, `complex64` or `complex128`. + x: A `Tensor` or `SparseTensor` of type `float16`, `float32`, `float64`, + `int32`, `int64`, `complex64` or `complex128`. name: A name for the operation (optional). Returns: @@ -430,10 +430,10 @@ def pow(x, y, name=None): # pylint: disable=redefined-builtin ``` Args: - x: A `Tensor` of type `float32`, `float64`, `int32`, `int64`, `complex64`, - or `complex128`. - y: A `Tensor` of type `float32`, `float64`, `int32`, `int64`, `complex64`, - or `complex128`. + x: A `Tensor` of type `float16`, `float32`, `float64`, `int32`, `int64`, + `complex64`, or `complex128`. + y: A `Tensor` of type `float16`, `float32`, `float64`, `int32`, `int64`, + `complex64`, or `complex128`. name: A name for the operation (optional). Returns: @@ -600,7 +600,7 @@ def round(x, name=None): # pylint: disable=redefined-builtin ``` Args: - x: A `Tensor` of type `float32` or `float64`. + x: A `Tensor` of type `float16`, `float32`, `float64`, `int32`, or `int64`. name: A name for the operation (optional). Returns: @@ -1257,7 +1257,7 @@ def reduce_sum(input_tensor, entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. - If `axis` has no entries, all dimensions are reduced, and a + If `axis` is None, all dimensions are reduced, and a tensor with a single element is returned. For example: @@ -1397,7 +1397,7 @@ def reduce_mean(input_tensor, entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. - If `axis` has no entries, all dimensions are reduced, and a + If `axis` is None, all dimensions are reduced, and a tensor with a single element is returned. For example: @@ -1469,7 +1469,7 @@ def reduce_prod(input_tensor, entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. - If `axis` has no entries, all dimensions are reduced, and a + If `axis` is None, all dimensions are reduced, and a tensor with a single element is returned. Args: @@ -1519,7 +1519,7 @@ def reduce_min(input_tensor, entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. - If `axis` has no entries, all dimensions are reduced, and a + If `axis` is None, all dimensions are reduced, and a tensor with a single element is returned. Args: @@ -1568,7 +1568,7 @@ def reduce_max(input_tensor, entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. - If `axis` has no entries, all dimensions are reduced, and a + If `axis` is None, all dimensions are reduced, and a tensor with a single element is returned. Args: @@ -1617,7 +1617,7 @@ def reduce_all(input_tensor, entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. - If `axis` has no entries, all dimensions are reduced, and a + If `axis` is None, all dimensions are reduced, and a tensor with a single element is returned. For example: @@ -1675,7 +1675,7 @@ def reduce_any(input_tensor, entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. - If `axis` has no entries, all dimensions are reduced, and a + If `axis` is None, all dimensions are reduced, and a tensor with a single element is returned. For example: diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py index 783d485892..f47f38e29e 100644 --- a/tensorflow/python/ops/nn_impl.py +++ b/tensorflow/python/ops/nn_impl.py @@ -621,7 +621,7 @@ def normalize_moments(counts, mean_ss, variance_ss, shift, name=None): """Calculate the mean and variance of based on the sufficient statistics. Args: - counts: A `Tensor` containing a the total count of the data (one value). + counts: A `Tensor` containing the total count of the data (one value). mean_ss: A `Tensor` containing the mean sufficient statistics: the (possibly shifted) sum of the elements to average over. variance_ss: A `Tensor` containing the variance sufficient statistics: the @@ -689,6 +689,9 @@ def moments( # Compute true mean while keeping the dims for proper broadcasting. mean = math_ops.reduce_mean(y, axes, keepdims=True, name="mean") # sample variance, not unbiased variance + # Note: stop_gradient does not change the gradient that gets + # backpropagated to the mean from the variance calculation, + # because that gradient is zero variance = math_ops.reduce_mean( math_ops.squared_difference(y, array_ops.stop_gradient(mean)), axes, diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index a0b55eb077..0c2f5b06c4 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -1596,12 +1596,12 @@ def leaky_relu(features, alpha=0.2, name=None): Returns: The activation value. """ - with ops.name_scope(name, "LeakyRelu", [features, alpha]): + with ops.name_scope(name, "LeakyRelu", [features, alpha]) as name: features = ops.convert_to_tensor(features, name="features") if features.dtype.is_integer: features = math_ops.to_float(features) alpha = ops.convert_to_tensor(alpha, dtype=features.dtype, name="alpha") - return math_ops.maximum(alpha * features, features) + return math_ops.maximum(alpha * features, features, name=name) def _flatten_outer_dims(logits): diff --git a/tensorflow/python/ops/nn_test.py b/tensorflow/python/ops/nn_test.py index 46a5f4fae6..035b4735af 100644 --- a/tensorflow/python/ops/nn_test.py +++ b/tensorflow/python/ops/nn_test.py @@ -962,6 +962,16 @@ class LeakyReluTest(test_lib.TestCase): self.assertAllClose( outputs, [-0.4, -0.2, 0.0, 1.0, 2.0], rtol=tol, atol=tol) + def testName(self): + np_values = np.array([-2, -1, 0, 1, 2], dtype=np.float64) + outputs_with_name_set = nn_ops.leaky_relu( + constant_op.constant(np_values), + name='test_relu_op') + self.assertEqual(outputs_with_name_set.name, 'test_relu_op:0') + outputs_without_name_set = nn_ops.leaky_relu( + constant_op.constant(np_values)) + self.assertEqual(outputs_without_name_set.name, 'LeakyRelu:0') + class SwishTest(test_lib.TestCase): diff --git a/tensorflow/python/ops/script_ops.py b/tensorflow/python/ops/script_ops.py index f8676ccb5f..219562de5d 100644 --- a/tensorflow/python/ops/script_ops.py +++ b/tensorflow/python/ops/script_ops.py @@ -23,6 +23,7 @@ import threading # Used by py_util.cc to get tracebacks. import traceback # pylint: disable=unused-import +import weakref import numpy as np import six @@ -129,11 +130,14 @@ class FuncRegistry(object): def __init__(self): self._lock = threading.Lock() self._unique_id = 0 # GUARDED_BY(self._lock) - self._funcs = {} + # Only store weakrefs to the funtions. The strong reference is stored in + # the graph. + self._funcs = weakref.WeakValueDictionary() def insert(self, func): """Registers `func` and returns a unique token for this entry.""" token = self._next_unique_token() + # Store a weakref to the function self._funcs[token] = func return token @@ -186,7 +190,7 @@ class FuncRegistry(object): Raises: ValueError: if no function is registered for `token`. """ - func = self._funcs[token] + func = self._funcs.get(token, None) if func is None: raise ValueError("callback %s is not found" % token) if isinstance(func, EagerFunc): @@ -228,19 +232,6 @@ _py_funcs = FuncRegistry() pywrap_tensorflow.InitializePyTrampoline(_py_funcs) -class CleanupFunc(object): - """A helper class to remove a registered function from _py_funcs.""" - - def __init__(self, token): - self._token = token - - def __del__(self): - if _py_funcs is not None: - # If _py_funcs is None, the program is most likely in shutdown, and the - # _py_funcs object has been destroyed already. - _py_funcs.remove(self._token) - - def _internal_py_func(func, inp, Tout, @@ -270,17 +261,15 @@ def _internal_py_func(func, # bound to that of the outer graph instead. graph = graph._outer_graph - cleanup = CleanupFunc(token) - # TODO(zhifengc): Consider adding a Graph method to collect # `cleanup` objects in one of its member. - if not hasattr(graph, "_cleanup_py_funcs_used_in_graph"): - graph._cleanup_py_funcs_used_in_graph = [] + if not hasattr(graph, "_py_funcs_used_in_graph"): + graph._py_funcs_used_in_graph = [] - # When `graph` is destroyed, elements in _cleanup_py_funcs_used_in_graph - # will be destroyed and their __del__ will remove the 'token' from - # the funcs registry. - graph._cleanup_py_funcs_used_in_graph.append(cleanup) + # Store a reference to the function in the graph to ensure it stays alive + # as long as the graph lives. When the graph is destroyed, the function + # is left to the garbage collector for destruction as well. + graph._py_funcs_used_in_graph.append(func) # pylint: enable=protected-access if eager: diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py index 0130233746..c3b16a7bd5 100644 --- a/tensorflow/python/ops/sparse_ops.py +++ b/tensorflow/python/ops/sparse_ops.py @@ -84,6 +84,8 @@ def _convert_to_sparse_tensors(sp_inputs): # pylint: disable=protected-access @tf_export("sparse_concat") +@deprecation.deprecated_args( + None, "concat_dim is deprecated, use axis instead", "concat_dim") def sparse_concat(axis, sp_inputs, name=None, @@ -597,6 +599,8 @@ class KeywordRequired(object): @tf_export("sparse_split") +@deprecation.deprecated_args( + None, "split_dim is deprecated, use axis instead", "split_dim") def sparse_split(keyword_required=KeywordRequired(), sp_input=None, num_split=None, diff --git a/tensorflow/python/ops/string_ops.py b/tensorflow/python/ops/string_ops.py index ae79c01949..0280c89c10 100644 --- a/tensorflow/python/ops/string_ops.py +++ b/tensorflow/python/ops/string_ops.py @@ -91,6 +91,59 @@ def string_split(source, delimiter=" ", skip_empty=True): # pylint: disable=inv shape.set_shape([2]) return sparse_tensor.SparseTensor(indices, values, shape) +@tf_export("strings.split") +def string_split_v2(source, sep=None, maxsplit=-1): + """Split elements of `source` based on `sep` into a `SparseTensor`. + + Let N be the size of source (typically N will be the batch size). Split each + element of `source` based on `sep` and return a `SparseTensor` + containing the split tokens. Empty tokens are ignored. + + For example, N = 2, source[0] is 'hello world' and source[1] is 'a b c', + then the output will be + + st.indices = [0, 0; + 0, 1; + 1, 0; + 1, 1; + 1, 2] + st.shape = [2, 3] + st.values = ['hello', 'world', 'a', 'b', 'c'] + + If `sep` is given, consecutive delimiters are not grouped together and are + deemed to delimit empty strings. For example, source of `"1<>2<><>3"` and + sep of `"<>"` returns `["1", "2", "", "3"]`. If `sep` is None or an empty + string, consecutive whitespace are regarded as a single separator, and the + result will contain no empty strings at the startor end if the string has + leading or trailing whitespace. + + Note that the above mentioned behavior matches python's str.split. + + Args: + source: `1-D` string `Tensor`, the strings to split. + sep: `0-D` string `Tensor`, the delimiter character. + maxsplit: An `int`. If `maxsplit > 0`, limit of the split of the result. + + Raises: + ValueError: If sep is not a string. + + Returns: + A `SparseTensor` of rank `2`, the strings split according to the delimiter. + The first column of the indices corresponds to the row in `source` and the + second column corresponds to the index of the split component in this row. + """ + if sep is None: + sep = '' + sep = ops.convert_to_tensor(sep, dtype=dtypes.string) + source = ops.convert_to_tensor(source, dtype=dtypes.string) + + indices, values, shape = gen_string_ops.string_split_v2( + source, sep=sep, maxsplit=maxsplit) + indices.set_shape([None, 2]) + values.set_shape([None]) + shape.set_shape([2]) + return sparse_tensor.SparseTensor(indices, values, shape) + def _reduce_join_reduction_dims(x, axis, reduction_indices): """Returns range(rank(x) - 1, 0, -1) if reduction_indices is None.""" diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py index f49e2d314d..47414c28af 100644 --- a/tensorflow/python/ops/variable_scope.py +++ b/tensorflow/python/ops/variable_scope.py @@ -1786,6 +1786,23 @@ class variable_scope(object): assert v.name == "foo/bar/v:0" ``` + Simple example of how to reenter a premade variable scope safely: + + ```python + with tf.variable_scope("foo") as vs: + pass + + # Re-enter the variable scope. + with tf.variable_scope(vs, + auxiliary_name_scope=False) as vs1: + # Restore the original name_scope. + with tf.name_scope(vs1.original_name_scope): + v = tf.get_variable("v", [1]) + assert v.name == "foo/v:0" + c = tf.constant([1], name="c") + assert c.name == "foo/c:0" + ``` + Basic example of sharing a variable AUTO_REUSE: ```python @@ -1924,7 +1941,9 @@ class variable_scope(object): (which must have the same shape). Constraints are not safe to use when doing asynchronous distributed training. auxiliary_name_scope: If `True`, we create an auxiliary name scope with - the scope. If `False`, we don't touch name scope. + the scope. If `False`, we don't create it. Note that the argument is + not inherited, and it only takes effect for once when creating. You + should only use it for re-entering a premade variable scope. Returns: A scope that can be captured and reused. diff --git a/tensorflow/python/tools/import_pb_to_tensorboard.py b/tensorflow/python/tools/import_pb_to_tensorboard.py old mode 100755 new mode 100644 diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 522965990b..b59f8e1f98 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -1719,7 +1719,7 @@ def tf_py_build_info_genrule(): name="py_build_info_gen", outs=["platform/build_info.py"], cmd= - "$(location //tensorflow/tools/build_info:gen_build_info.py) --raw_generate \"$@\" --build_config " + if_cuda("cuda", "cpu"), + "$(location //tensorflow/tools/build_info:gen_build_info.py) --raw_generate \"$@\" --build_config " + if_cuda("cuda", "cpu"), local=1, tools=[clean_dep("//tensorflow/tools/build_info:gen_build_info.py")],) diff --git a/tensorflow/tools/api/generator/create_python_api.py b/tensorflow/tools/api/generator/create_python_api.py index bca9fa49eb..671b7e387e 100644 --- a/tensorflow/tools/api/generator/create_python_api.py +++ b/tensorflow/tools/api/generator/create_python_api.py @@ -41,7 +41,11 @@ _GENERATED_FILE_HEADER = """# This file is MACHINE GENERATED! Do not edit. # Generated by: tensorflow/tools/api/generator/create_python_api.py script. \"\"\"%s \"\"\" + +from __future__ import print_function + """ +_GENERATED_FILE_FOOTER = "\n\ndel print_function\n" class SymbolExposedTwiceError(Exception): @@ -149,6 +153,7 @@ class _ModuleInitCodeBuilder(object): _names_with_underscore = [%s] __all__ = [_s for _s in dir() if not _s.startswith('_')] __all__.extend([_s for _s in _names_with_underscore]) +__all__.remove('print_function') ''' % underscore_names_str return module_text_map @@ -333,7 +338,8 @@ def create_api_files( if module or not root_init_template: contents = ( _GENERATED_FILE_HEADER % - get_module_docstring(module, package, api_name) + text) + get_module_docstring(module, package, api_name) + + text + _GENERATED_FILE_FOOTER) else: # Read base init file with open(root_init_template, 'r') as root_init_template_file: diff --git a/tensorflow/tools/api/golden/tensorflow.image.pbtxt b/tensorflow/tools/api/golden/tensorflow.image.pbtxt index 5bb3b3c444..10171b3d60 100644 --- a/tensorflow/tools/api/golden/tensorflow.image.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.image.pbtxt @@ -58,7 +58,7 @@ tf_module { } member_method { name: "decode_image" - argspec: "args=[\'contents\', \'channels\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'contents\', \'channels\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \"\", \'None\'], " } member_method { name: "decode_jpeg" diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt index dc2bd40096..3051c4437e 100644 --- a/tensorflow/tools/api/golden/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.pbtxt @@ -1532,6 +1532,10 @@ tf_module { name: "pow" argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "print" + argspec: "args=[\'input_\', \'data\', \'message\', \'first_n\', \'summarize\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], " + } member_method { name: "py_func" argspec: "args=[\'func\', \'inp\', \'Tout\', \'stateful\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], " diff --git a/tensorflow/tools/api/golden/tensorflow.strings.pbtxt b/tensorflow/tools/api/golden/tensorflow.strings.pbtxt index a3fbe95bba..b641c39feb 100644 --- a/tensorflow/tools/api/golden/tensorflow.strings.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.strings.pbtxt @@ -4,4 +4,8 @@ tf_module { name: "regex_full_match" argspec: "args=[\'input\', \'pattern\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "split" + argspec: "args=[\'source\', \'sep\', \'maxsplit\'], varargs=None, keywords=None, defaults=[\'None\', \'-1\'], " + } } diff --git a/tensorflow/tools/ci_build/builds/pip.sh b/tensorflow/tools/ci_build/builds/pip.sh index 5fa75e1d61..883bb93647 100755 --- a/tensorflow/tools/ci_build/builds/pip.sh +++ b/tensorflow/tools/ci_build/builds/pip.sh @@ -322,6 +322,10 @@ create_activate_virtualenv_and_install_tensorflow() { pip install -v ${PIP_FLAGS} ${WHL_PATH} || \ die "pip install (forcing to reinstall tensorflow) FAILED" echo "Successfully installed pip package ${TF_WHEEL_PATH}" + + # Force downgrade setuptools. + pip install --upgrade setuptools==39.1.0 + } ################################################################################ diff --git a/tensorflow/tools/ci_build/builds/with_the_same_user b/tensorflow/tools/ci_build/builds/with_the_same_user index d4bf546d40..b216e3549f 100755 --- a/tensorflow/tools/ci_build/builds/with_the_same_user +++ b/tensorflow/tools/ci_build/builds/with_the_same_user @@ -40,7 +40,7 @@ if [ -n "${CI_BUILD_USER_FORCE_BADNAME}" ]; then ADDUSER_OPTS="--force-badname" fi -getent group "${CI_BUILD_GID}" || addgroup --gid "${CI_BUILD_GID}" "${CI_BUILD_GROUP}" +getent group "${CI_BUILD_GID}" || addgroup ${ADDUSER_OPTS} --gid "${CI_BUILD_GID}" "${CI_BUILD_GROUP}" getent passwd "${CI_BUILD_UID}" || adduser ${ADDUSER_OPTS} \ --gid "${CI_BUILD_GID}" --uid "${CI_BUILD_UID}" \ --gecos "${CI_BUILD_USER} (generated by with_the_same_user script)" \ diff --git a/tensorflow/tools/ci_build/ci_build.sh b/tensorflow/tools/ci_build/ci_build.sh index 072dd6ab99..1f0fd0387a 100755 --- a/tensorflow/tools/ci_build/ci_build.sh +++ b/tensorflow/tools/ci_build/ci_build.sh @@ -134,6 +134,12 @@ if [[ $? != "0" ]]; then die "ERROR: docker build failed. Dockerfile is at ${DOCKERFILE_PATH}" fi +# If caller wants the with_the_same_user script to allow bad usernames, +# pass the var to the docker environment +if [ -n "${CI_BUILD_USER_FORCE_BADNAME}" ]; then + CI_BUILD_USER_FORCE_BADNAME_ENV="-e CI_BUILD_USER_FORCE_BADNAME=yes" +fi + # Run the command inside the container. echo "Running '${COMMAND[*]}' inside ${DOCKER_IMG_NAME}..." mkdir -p ${WORKSPACE}/bazel-ci_build-cache @@ -148,6 +154,7 @@ ${DOCKER_BINARY} run --rm --pid=host \ -e "CI_BUILD_GROUP=$(id -g -n)" \ -e "CI_BUILD_GID=$(id -g)" \ -e "CI_TENSORFLOW_SUBMODULE_PATH=${CI_TENSORFLOW_SUBMODULE_PATH}" \ + ${CI_BUILD_USER_FORCE_BADNAME_ENV} \ -v ${WORKSPACE}:/workspace \ -w /workspace \ ${GPU_EXTRA_PARAMS} \ diff --git a/tensorflow/tools/ci_build/copy_binary.py b/tensorflow/tools/ci_build/copy_binary.py index 420d390d2b..148526492d 100755 --- a/tensorflow/tools/ci_build/copy_binary.py +++ b/tensorflow/tools/ci_build/copy_binary.py @@ -32,7 +32,8 @@ import shutil import tempfile import zipfile -TF_NIGHTLY_REGEX = r"(.+)tf_nightly(|_gpu)-(\d\.\d\.\d.dev[\d]{0,8})-(.+)\.whl" +TF_NIGHTLY_REGEX = (r"(.+)tf_nightly(|_gpu)-(\d\.[\d]{1,2}" + "\.\d.dev[\d]{0,8})-(.+)\.whl") BINARY_STRING_TEMPLATE = "%s-%s-%s.whl" diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh index 60290df833..88f1d04193 100755 --- a/tensorflow/tools/ci_build/install/install_pip_packages.sh +++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh @@ -115,3 +115,7 @@ pip2 install keras_applications==1.0.2 pip3 install keras_applications==1.0.2 pip2 install keras_preprocessing==1.0.1 pip3 install keras_preprocessing==1.0.1 + +# Install last working version of setuptools. +pip2 install --upgrade setuptools==39.1.0 +pip3 install --upgrade setuptools==39.1.0 diff --git a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh index edb9d4b929..acd69ef346 100755 --- a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh +++ b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh @@ -39,7 +39,6 @@ if [[ -z $pip35_version ]]; then fi set -e -pip3.5 install --upgrade setuptools pip3.5 install --upgrade pip pip3.5 install --upgrade virtualenv @@ -86,4 +85,7 @@ pip3.5 install --upgrade termcolor pip3.5 install keras_applications==1.0.2 pip3.5 install keras_preprocessing==1.0.1 +# Install last working version of setuptools. +pip3.5 install --upgrade setuptools==39.1.0 + # LINT.ThenChange(//tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh) diff --git a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh index 5635977731..323b30f48e 100755 --- a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh +++ b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh @@ -49,7 +49,6 @@ cd Python-3.6.1 make altinstall ln -s /usr/local/bin/pip3.6 /usr/local/bin/pip3 -pip3 install --upgrade setuptools pip3 install --upgrade pip pip3 install --upgrade virtualenv @@ -101,4 +100,8 @@ pip3 install --upgrade termcolor # Keras pip3.5 install keras_applications==1.0.2 pip3.5 install keras_preprocessing==1.0.1 + +# Install last working version of setuptools. +pip3 install --upgrade setuptools==39.1.0 + # LINT.ThenChange(//tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh) diff --git a/tensorflow/tools/ci_build/linux/mkl/basic-mkl-test.sh b/tensorflow/tools/ci_build/linux/mkl/basic-mkl-test.sh new file mode 100755 index 0000000000..10a09a415a --- /dev/null +++ b/tensorflow/tools/ci_build/linux/mkl/basic-mkl-test.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# +# Usage: basic_mkl_test.sh + +# Helper function to traverse directories up until given file is found. +function upsearch () { + test / == "$PWD" && return || \ + test -e "$1" && echo "$PWD" && return || \ + cd .. && upsearch "$1" +} + +# Set up WORKSPACE. +WORKSPACE="${WORKSPACE:-$(upsearch WORKSPACE)}" + +BUILD_TAG=mkl-ci-test CI_BUILD_USER_FORCE_BADNAME=yes ${WORKSPACE}/tensorflow/tools/ci_build/ci_build.sh cpu tensorflow/tools/ci_build/linux/cpu/run_mkl.sh diff --git a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh index 1bd1852ffc..b8bce57c87 100755 --- a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh +++ b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh @@ -79,6 +79,7 @@ if [[ $1 == "PI_ONE" ]]; then --linkopt=-L${OPENBLAS_INSTALL_PATH}/lib/ --linkopt=-l:libopenblas.a" echo "Building for the Pi One/Zero, with no NEON support" + WHEEL_ARCH=linux_armv6l else PI_COPTS='--copt=-march=armv7-a --copt=-mfpu=neon-vfpv4 --copt=-std=gnu11 --copt=-DS_IREAD=S_IRUSR --copt=-DS_IWRITE=S_IWUSR @@ -86,6 +87,7 @@ else --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8' + WHEEL_ARCH=linux_armv7l echo "Building for the Pi Two/Three, with NEON acceleration" fi @@ -100,6 +102,8 @@ bazel build -c opt ${PI_COPTS} \ --copt=-fomit-frame-pointer --cpu=armeabi \ --crosstool_top=@local_config_arm_compiler//:toolchain \ --verbose_failures \ + //tensorflow:libtensorflow.so \ + //tensorflow:libtensorflow_framework.so \ //tensorflow/tools/benchmark:benchmark_model \ //tensorflow/tools/pip_package:build_pip_package @@ -112,10 +116,12 @@ BDIST_OPTS="--universal" \ bazel-bin/tensorflow/tools/pip_package/build_pip_package "${OUTDIR}" OLD_FN=$(ls "${OUTDIR}" | grep -m 1 \.whl) -SUB='s/tensorflow-([^-]+)-([^-]+)-.*/tensorflow-\1-\2-none-any.whl/; print' +SUB='s/tensorflow-([^-]+)-([^-]+)-.*/tensorflow-\1-\2-none-'${WHEEL_ARCH}'.whl/; print' NEW_FN=$(echo "${OLD_FN}" | perl -ne "${SUB}") mv "${OUTDIR}/${OLD_FN}" "${OUTDIR}/${NEW_FN}" cp bazel-bin/tensorflow/tools/benchmark/benchmark_model "${OUTDIR}" +cp bazel-bin/tensorflow/libtensorflow.so "${OUTDIR}" +cp bazel-bin/tensorflow/libtensorflow_framework.so "${OUTDIR}" echo "Output can be found here:" find "${OUTDIR}" diff --git a/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl b/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl index 47539b2423..f8f63e276c 100644 --- a/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl +++ b/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl @@ -31,7 +31,11 @@ def _def_file_filter_configure_impl(repository_ctx): vc_path = find_vc_path(repository_ctx) if vc_path == "visual-studio-not-found": auto_configure_fail("Visual C++ build tools not found on your machine") - undname_bin_path = find_msvc_tool(repository_ctx, vc_path, "undname.exe").replace("\\", "\\\\") + + undname = find_msvc_tool(repository_ctx, vc_path, "undname.exe") + if undname == None: + auto_configure_fail("Couldn't find undname.exe under %s, please check your VC installation and set BAZEL_VC environment variable correctly." % vc_path) + undname_bin_path = undname.replace("\\", "\\\\") repository_ctx.template( "def_file_filter.py", diff --git a/tensorflow/tools/dist_test/local_test.sh b/tensorflow/tools/dist_test/local_test.sh index 06c2b997cb..b0114721bd 100755 --- a/tensorflow/tools/dist_test/local_test.sh +++ b/tensorflow/tools/dist_test/local_test.sh @@ -64,9 +64,6 @@ die() { # Configurations DOCKER_IMG_NAME="tensorflow/tf-dist-test-local-cluster" -# Use TensorFlow v1.5.0 for Python 2.7 and CPU only as we set num_gpus to 0 in the below -DEFAULT_WHL_FILE_LOCATION="https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0-cp27-none-linux_x86_64.whl" - # Parse input arguments LEAVE_CONTAINER_RUNNING=0 MODEL_NAME="" @@ -77,8 +74,7 @@ SYNC_REPLICAS_FLAG="" WHL_FILE_LOCATION=${1} if [[ -z "${WHL_FILE_LOCATION}" ]]; then - WHL_FILE_LOCATION=${DEFAULT_WHL_FILE_LOCATION} - echo "use default whl file location" + echo "WARNING: No wheel url passed. Will use latest tf-nightly cpu p2 wheel." fi while true; do @@ -131,7 +127,11 @@ echo "Building in temporary directory: ${BUILD_DIR}" cp -r ${DIR}/* "${BUILD_DIR}"/ || \ die "Failed to copy files to ${BUILD_DIR}" -if [[ $WHL_FILE_LOCATION =~ 'http://' || $WHL_FILE_LOCATION =~ 'https://' ]]; then +# Download whl file into the build context directory. +if [[ -z "${WHL_FILE_LOCATION}" ]]; then + pip2 download --no-deps tf-nightly + cp tf-nightly-*.whl "${BUILD_DIR}"/tensorflow-none-any.whl +elif [[ $WHL_FILE_LOCATION =~ 'http://' || $WHL_FILE_LOCATION =~ 'https://' ]]; then # Download whl file into the build context directory. wget -P "${BUILD_DIR}" "${WHL_FILE_LOCATION}" || \ die "Failed to download tensorflow whl file from URL: ${WHL_FILE_LOCATION}" diff --git a/tensorflow/tools/dist_test/remote_test.sh b/tensorflow/tools/dist_test/remote_test.sh index 935535312d..e188c88c8f 100755 --- a/tensorflow/tools/dist_test/remote_test.sh +++ b/tensorflow/tools/dist_test/remote_test.sh @@ -108,7 +108,7 @@ fi # Parse command-line arguments. WHL_URL=${1} if [[ -z "${WHL_URL}" ]]; then - die "whl URL is not specified" + echo "WARNING: No wheel url passed. Will use latest tf-nightly cpu p2 wheel." fi # Create docker build context directory. @@ -121,8 +121,13 @@ cp -r ${DIR}/* ${BUILD_DIR}/ || \ die "Failed to copy files to ${BUILD_DIR}" # Download whl file into the build context directory. -wget -P "${BUILD_DIR}" ${WHL_URL} || \ - die "Failed to download tensorflow whl file from URL: ${WHL_URL}" +if [[ -z "${WHL_URL}" ]]; then + pip2 download --no-deps tf-nightly + cp tf-nightly-*.whl "${BUILD_DIR}"/tensorflow-none-any.whl +else + wget -P "${BUILD_DIR}" ${WHL_URL} || \ + die "Failed to download tensorflow whl file from URL: ${WHL_URL}" +fi # Build docker image for test. docker build ${NO_CACHE_FLAG} \ diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel index 406d134699..57a491255e 100644 --- a/tensorflow/tools/docker/Dockerfile.devel +++ b/tensorflow/tools/docker/Dockerfile.devel @@ -76,7 +76,7 @@ RUN mkdir /bazel && \ # Download and build TensorFlow. WORKDIR /tensorflow -RUN git clone --branch=r1.8 --depth=1 https://github.com/tensorflow/tensorflow.git . +RUN git clone --branch=r1.9 --depth=1 https://github.com/tensorflow/tensorflow.git . # TODO(craigcitro): Don't install the pip package, since it makes it # more difficult to experiment with local changes. Instead, just add diff --git a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl index a6cd44ced1..6796ad70e5 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl +++ b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl @@ -3,7 +3,7 @@ FROM tensorflow/tensorflow:latest-devel LABEL maintainer="Clayne Robison" # These arguments are parameterized. Use --build-args to override. -ARG TF_BRANCH=r1.8 +ARG TF_BRANCH=r1.9 ARG WHL_DIR=/whl RUN apt-get update && apt-get install -y --no-install-recommends \ diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu index 2fe47f3356..204b5b4dba 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-gpu +++ b/tensorflow/tools/docker/Dockerfile.devel-gpu @@ -13,8 +13,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ cuda-cusparse-dev-9-0 \ curl \ git \ - libcudnn7=7.0.5.15-1+cuda9.0 \ - libcudnn7-dev=7.0.5.15-1+cuda9.0 \ + libcudnn7=7.1.4.18-1+cuda9.0 \ + libcudnn7-dev=7.1.4.18-1+cuda9.0 \ libcurl3-dev \ libfreetype6-dev \ libhdf5-serial-dev \ @@ -85,7 +85,7 @@ RUN mkdir /bazel && \ # Download and build TensorFlow. WORKDIR /tensorflow -RUN git clone --branch=r1.8 --depth=1 https://github.com/tensorflow/tensorflow.git . +RUN git clone --branch=r1.9 --depth=1 https://github.com/tensorflow/tensorflow.git . # Configure the build for our CUDA configuration. ENV CI_BUILD_PYTHON python diff --git a/tensorflow/tools/docker/Dockerfile.gpu b/tensorflow/tools/docker/Dockerfile.gpu index bff4a20392..9197651ff4 100644 --- a/tensorflow/tools/docker/Dockerfile.gpu +++ b/tensorflow/tools/docker/Dockerfile.gpu @@ -12,7 +12,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ cuda-cusolver-9-0 \ cuda-cusparse-9-0 \ curl \ - libcudnn7=7.0.5.15-1+cuda9.0 \ + libcudnn7=7.1.4.18-1+cuda9.0 \ libfreetype6-dev \ libhdf5-serial-dev \ libpng12-dev \ diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index 5910f0625e..620fef9363 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -61,6 +61,7 @@ COMMON_PIP_DEPS = [ "//tensorflow/contrib/autograph/core:core", "//tensorflow/contrib/autograph/impl:impl", "//tensorflow/contrib/autograph/lang:lang", + "//tensorflow/contrib/autograph/operators:operators", "//tensorflow/contrib/autograph/pyct:pyct", "//tensorflow/contrib/autograph/pyct/static_analysis:static_analysis", "//tensorflow/contrib/boosted_trees:boosted_trees_pip", diff --git a/tensorflow/tools/pip_package/build_pip_package.sh b/tensorflow/tools/pip_package/build_pip_package.sh index 0c4065bc77..f7e42ce536 100755 --- a/tensorflow/tools/pip_package/build_pip_package.sh +++ b/tensorflow/tools/pip_package/build_pip_package.sh @@ -41,51 +41,15 @@ function is_windows() { fi } -function main() { +function prepare_src() { if [ $# -lt 1 ] ; then echo "No destination dir provided" exit 1 fi - DEST=$(real_path $1) - TMPDIR=$(mktemp -d -t tmp.XXXXXXXXXX) - - PKG_NAME_FLAG="" - GPU_BUILD=0 - NIGHTLY_BUILD=0 - PROJECT_NAME="" - while true; do - if [[ "$1" == "--nightly_flag" ]]; then - NIGHTLY_BUILD=1 - elif [[ "$1" == "--gpu" ]]; then - GPU_BUILD=1 - elif [[ "$1" == "--gpudirect" ]]; then - PKG_NAME_FLAG="--project_name tensorflow_gpudirect" - elif [[ "$1" == "--project_name" ]]; then - shift - if [[ -z "$1" ]]; then - break - fi - PROJECT_NAME="$1" - fi - shift - - if [[ -z "$1" ]]; then - break - fi - done - - if [[ -n ${PROJECT_NAME} ]]; then - PKG_NAME_FLAG="--project_name ${PROJECT_NAME}" - elif [[ ${NIGHTLY_BUILD} == "1" && ${GPU_BUILD} == "1" ]]; then - PKG_NAME_FLAG="--project_name tf_nightly_gpu" - elif [[ ${NIGHTLY_BUILD} == "1" ]]; then - PKG_NAME_FLAG="--project_name tf_nightly" - elif [[ ${GPU_BUILD} == "1" ]]; then - PKG_NAME_FLAG="--project_name tensorflow_gpu" - fi - - echo $(date) : "=== Using tmpdir: ${TMPDIR}" + TMPDIR="$1" + mkdir -p "$TMPDIR" + echo $(date) : "=== Preparing sources in dir: ${TMPDIR}" if [ ! -d bazel-bin/tensorflow ]; then echo "Could not find bazel-bin. Did you run from the root of the build tree?" @@ -155,17 +119,28 @@ function main() { # over so user defined ops can be compiled. mkdir -p ${TMPDIR}/google mkdir -p ${TMPDIR}/third_party - pushd ${RUNFILES%org_tensorflow} + pushd ${RUNFILES%org_tensorflow} > /dev/null for header in $(find protobuf_archive -name \*.h); do mkdir -p "${TMPDIR}/google/$(dirname ${header})" cp "$header" "${TMPDIR}/google/$(dirname ${header})/" done - popd + popd > /dev/null cp -R $RUNFILES/third_party/eigen3 ${TMPDIR}/third_party cp tensorflow/tools/pip_package/MANIFEST.in ${TMPDIR} cp tensorflow/tools/pip_package/README ${TMPDIR} cp tensorflow/tools/pip_package/setup.py ${TMPDIR} +} + +function build_wheel() { + if [ $# -lt 2 ] ; then + echo "No src and dest dir provided" + exit 1 + fi + + TMPDIR="$1" + DEST="$2" + PKG_NAME_FLAG="$3" # Before we leave the top-level directory, make sure we know how to # call python. @@ -173,15 +148,110 @@ function main() { source tools/python_bin_path.sh fi - pushd ${TMPDIR} + pushd ${TMPDIR} > /dev/null rm -f MANIFEST echo $(date) : "=== Building wheel" "${PYTHON_BIN_PATH:-python}" setup.py bdist_wheel ${PKG_NAME_FLAG} >/dev/null mkdir -p ${DEST} cp dist/* ${DEST} - popd - rm -rf ${TMPDIR} + popd > /dev/null echo $(date) : "=== Output wheel file is in: ${DEST}" } +function usage() { + echo "Usage:" + echo "$0 [--src srcdir] [--dst dstdir] [options]" + echo "$0 dstdir [options]" + echo "" + echo " --src prepare sources in srcdir" + echo " will use temporary dir if not specified" + echo "" + echo " --dst build wheel in dstdir" + echo " if dstdir is not set do not build, only prepare sources" + echo "" + echo " Options:" + echo " --project_name set project name to name" + echo " --gpu build tensorflow_gpu" + echo " --gpudirect build tensorflow_gpudirect" + echo " --nightly_flag build tensorflow nightly" + echo "" + exit 1 +} + +function main() { + PKG_NAME_FLAG="" + PROJECT_NAME="" + GPU_BUILD=0 + NIGHTLY_BUILD=0 + SRCDIR="" + DSTDIR="" + CLEANSRC=1 + while true; do + if [[ "$1" == "--help" ]]; then + usage + exit 1 + elif [[ "$1" == "--nightly_flag" ]]; then + NIGHTLY_BUILD=1 + elif [[ "$1" == "--gpu" ]]; then + GPU_BUILD=1 + elif [[ "$1" == "--gpudirect" ]]; then + PKG_NAME_FLAG="--project_name tensorflow_gpudirect" + elif [[ "$1" == "--project_name" ]]; then + shift + if [[ -z "$1" ]]; then + break + fi + PROJECT_NAME="$1" + elif [[ "$1" == "--src" ]]; then + shift + SRCDIR="$(real_path $1)" + CLEANSRC=0 + elif [[ "$1" == "--dst" ]]; then + shift + DSTDIR="$(real_path $1)" + else + DSTDIR="$(real_path $1)" + fi + shift + + if [[ -z "$1" ]]; then + break + fi + done + + if [[ -z "$DSTDIR" ]] && [[ -z "$SRCDIR" ]]; then + echo "No destination dir provided" + usage + exit 1 + fi + + if [[ -z "$SRCDIR" ]]; then + # make temp srcdir if none set + SRCDIR="$(mktemp -d -t tmp.XXXXXXXXXX)" + fi + + prepare_src "$SRCDIR" + + if [[ -z "$DSTDIR" ]]; then + # only want to prepare sources + exit + fi + + if [[ -n ${PROJECT_NAME} ]]; then + PKG_NAME_FLAG="--project_name ${PROJECT_NAME}" + elif [[ ${NIGHTLY_BUILD} == "1" && ${GPU_BUILD} == "1" ]]; then + PKG_NAME_FLAG="--project_name tf_nightly_gpu" + elif [[ ${NIGHTLY_BUILD} == "1" ]]; then + PKG_NAME_FLAG="--project_name tf_nightly" + elif [[ ${GPU_BUILD} == "1" ]]; then + PKG_NAME_FLAG="--project_name tensorflow_gpu" + fi + + build_wheel "$SRCDIR" "$DSTDIR" "$PKG_NAME_FLAG" + + if [[ $CLEANSRC -ne 0 ]]; then + rm -rf "${TMPDIR}" + fi +} + main "$@" diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index d25a9e77b1..97f625e7e9 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -45,7 +45,7 @@ DOCLINES = __doc__.split('\n') # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.8.0' +_VERSION = '1.9.0-rc0' REQUIRED_PACKAGES = [ 'absl-py >= 0.1.6', @@ -54,6 +54,7 @@ REQUIRED_PACKAGES = [ 'numpy >= 1.13.3', 'six >= 1.10.0', 'protobuf >= 3.4.0', + 'setuptools <= 39.1.0', 'tensorboard >= 1.8.0, < 1.9.0', 'termcolor >= 1.1.0', ] diff --git a/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc b/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc index 29add6d5ea..15d7c70281 100644 --- a/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc +++ b/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc @@ -814,6 +814,9 @@ void Generator::Generate(const FileDescriptor& fd) { // Add header to cc file. SetOutput(&cc_); Print("// GENERATED FILE - DO NOT MODIFY"); + Print(); + Print("#include "); // for `std::stable_sort()` + Print(); headers = {GetProtoTextHeaderName(fd, true /* impl */)}; AddHeadersToCurrentSection(headers); Print(); diff --git a/tensorflow/tools/quantization/quantize_graph_test.py b/tensorflow/tools/quantization/quantize_graph_test.py index df71840b64..92bb5127da 100644 --- a/tensorflow/tools/quantization/quantize_graph_test.py +++ b/tensorflow/tools/quantization/quantize_graph_test.py @@ -119,8 +119,8 @@ def are_tensors_near(a, b, tolerance): flat_a = a.flatten() flat_b = b.flatten() if len(flat_a) != len(flat_b): - print("Tensors are different sizes: " + str(len(flat_a)) + " vs " + str( - len(flat_b))) + tf_logging.info("Tensors are different sizes: " + str(len(flat_a)) + " vs " + + str(len(flat_b))) return False value_count = len(flat_a) how_many_different = 0 @@ -140,10 +140,10 @@ def are_tensors_near(a, b, tolerance): if how_many_different == 0: return True else: - print("Tensors have {0} different values ({1}%), with mean difference" - " {2} and mean absolute difference {3}".format( - how_many_different, proportion_different * 100, mean_difference, - mean_abs_difference)) + tf_logging.info("Tensors have {0} different values ({1}%), with mean" + " difference {2} and mean absolute difference {3}".format( + how_many_different, proportion_different * 100, + mean_difference, mean_abs_difference)) return False diff --git a/tensorflow/tools/test/upload_test_benchmarks.py b/tensorflow/tools/test/upload_test_benchmarks.py index 9c45359ee1..c030575109 100644 --- a/tensorflow/tools/test/upload_test_benchmarks.py +++ b/tensorflow/tools/test/upload_test_benchmarks.py @@ -89,7 +89,6 @@ import shutil from six import text_type from google.cloud import datastore -from six import text_type def is_real_file(dirpath, fname): diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index dbec66216a..4f3df570a5 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -50,31 +50,31 @@ def tf_workspace(path_prefix="", tf_repo_name=""): mkl_repository( name = "mkl_linux", urls = [ - "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.13/mklml_lnx_2018.0.2.20180127.tgz", - "https://github.com/intel/mkl-dnn/releases/download/v0.13/mklml_lnx_2018.0.2.20180127.tgz", + "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.14/mklml_lnx_2018.0.3.20180406.tgz", + "https://github.com/intel/mkl-dnn/releases/download/v0.14/mklml_lnx_2018.0.3.20180406.tgz" ], - sha256 = "74844bd77294742bf2396ff040369d1aa4cdd9e826fcd38cf8398ae83564d146", - strip_prefix = "mklml_lnx_2018.0.2.20180127", + sha256 = "d2305244fdc9b87db7426ed4496e87a4b3977ad3374d73b8000e8b7a5b7aa725", + strip_prefix = "mklml_lnx_2018.0.3.20180406", build_file = clean_dep("//third_party/mkl:mkl.BUILD") ) mkl_repository( name = "mkl_windows", urls = [ - "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.13/mklml_win_2018.0.2.20180127.zip", - "https://github.com/intel/mkl-dnn/releases/download/v0.13/mklml_win_2018.0.2.20180127.zip" + "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.14/mklml_win_2018.0.3.20180406.zip", + "https://github.com/intel/mkl-dnn/releases/download/v0.14/mklml_win_2018.0.3.20180406.zip" ], - sha256 = "d8fbf0faa0684bffa3548005d05fe5cfe56ff9dbc0e15e7612d7ac01055a6ded", - strip_prefix = "mklml_win_2018.0.2.20180127", + sha256 = "a584a5bf1c8d2ad70b90d12b52652030e9a338217719064fdb84b7ad0d693694", + strip_prefix = "mklml_win_2018.0.3.20180406", build_file = clean_dep("//third_party/mkl:mkl.BUILD") ) mkl_repository( name = "mkl_darwin", urls = [ - "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.13/mklml_mac_2018.0.2.20180127.tgz", - "https://github.com/intel/mkl-dnn/releases/download/v0.13/mklml_mac_2018.0.2.20180127.tgz" + "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.14/mklml_mac_2018.0.3.20180406.tgz", + "https://github.com/intel/mkl-dnn/releases/download/v0.14/mklml_mac_2018.0.3.20180406.tgz" ], - sha256 = "aa740d71e14562bfea56e6829e6dc186e7487cbcf6748a88dec73826b7ec1943", - strip_prefix = "mklml_mac_2018.0.2.20180127", + sha256 = "094e3dfd61c816136dc8d12a45cc611ce26c5f4828176a3644cd0b0efa15a25b", + strip_prefix = "mklml_mac_2018.0.3.20180406", build_file = clean_dep("//third_party/mkl:mkl.BUILD") ) @@ -85,11 +85,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "mkl_dnn", urls = [ - "https://mirror.bazel.build/github.com/intel/mkl-dnn/archive/v0.13.tar.gz", - "https://github.com/intel/mkl-dnn/archive/v0.13.tar.gz", + "https://mirror.bazel.build/github.com/intel/mkl-dnn/archive/v0.14.tar.gz", + "https://github.com/intel/mkl-dnn/archive/v0.14.tar.gz", ], - sha256 = "d2cfd93a70cfe86ebe054477c530c9b5c1218b70f75856eb6d1956c68ee89e8f", - strip_prefix = "mkl-dnn-0.13", + sha256 = "efebc53882856afec86457a2da644693f5d59c68772d41d640d6b60a8efc4eb0", + strip_prefix = "mkl-dnn-0.14", build_file = clean_dep("//third_party/mkl_dnn:mkldnn.BUILD"), ) @@ -187,11 +187,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "highwayhash", urls = [ - "https://mirror.bazel.build/github.com/google/highwayhash/archive/dfcb97ca4fe9277bf9dc1802dd979b071896453b.tar.gz", - "https://github.com/google/highwayhash/archive/dfcb97ca4fe9277bf9dc1802dd979b071896453b.tar.gz", + "http://mirror.bazel.build/github.com/google/highwayhash/archive/fd3d9af80465e4383162e4a7c5e2f406e82dd968.tar.gz", + "https://github.com/google/highwayhash/archive/fd3d9af80465e4383162e4a7c5e2f406e82dd968.tar.gz", ], - sha256 = "0f30a15b1566d93f146c8d149878a06e91d9bb7ec2cfd76906df62a82be4aac9", - strip_prefix = "highwayhash-dfcb97ca4fe9277bf9dc1802dd979b071896453b", + sha256 = "9c3e0e87d581feeb0c18d814d98f170ff23e62967a2bd6855847f0b2fe598a37", + strip_prefix = "highwayhash-fd3d9af80465e4383162e4a7c5e2f406e82dd968", build_file = clean_dep("//third_party:highwayhash.BUILD"), ) diff --git a/third_party/eigen.BUILD b/third_party/eigen.BUILD index 07bb6645eb..e54c1a4501 100644 --- a/third_party/eigen.BUILD +++ b/third_party/eigen.BUILD @@ -64,6 +64,7 @@ cc_library( # This define (mostly) guarantees we don't link any problematic # code. We use it, but we do not rely on it, as evidenced above. "EIGEN_MPL2_ONLY", + "EIGEN_MAX_ALIGN_BYTES=64", ], includes = ["."], visibility = ["//visibility:public"], diff --git a/third_party/highwayhash.BUILD b/third_party/highwayhash.BUILD index 1b8e40765e..08cb84ea2c 100644 --- a/third_party/highwayhash.BUILD +++ b/third_party/highwayhash.BUILD @@ -10,6 +10,7 @@ cc_library( srcs = ["highwayhash/sip_hash.cc"], hdrs = [ "highwayhash/sip_hash.h", + "highwayhash/endianess.h", "highwayhash/state_helpers.h", ], visibility = ["//visibility:public"], diff --git a/third_party/jpeg/jpeg.BUILD b/third_party/jpeg/jpeg.BUILD index 4418ac32fc..663a218733 100644 --- a/third_party/jpeg/jpeg.BUILD +++ b/third_party/jpeg/jpeg.BUILD @@ -291,8 +291,10 @@ cc_library( "jchuff.h", "jconfig.h", "jdct.h", + "jerror.h", "jinclude.h", "jmorecfg.h", + "jpegint.h", "jpeglib.h", "jsimd.h", "jsimddct.h", diff --git a/third_party/png.BUILD b/third_party/png.BUILD index 76ab32d69c..17c5449cc0 100644 --- a/third_party/png.BUILD +++ b/third_party/png.BUILD @@ -28,7 +28,14 @@ cc_library( "pngwrite.c", "pngwtran.c", "pngwutil.c", - ], + ] + select({ + "@org_tensorflow//tensorflow:linux_ppc64le": [ + "powerpc/powerpc_init.c", + "powerpc/filter_vsx_intrinsics.c", + ], + "//conditions:default": [ + ], + }), hdrs = [ "png.h", "pngconf.h", diff --git a/third_party/py/python_configure.bzl b/third_party/py/python_configure.bzl index 954f21f5f8..3c7e5c8469 100644 --- a/third_party/py/python_configure.bzl +++ b/third_party/py/python_configure.bzl @@ -6,6 +6,7 @@ * `PYTHON_LIB_PATH`: Location of python libraries. """ +_BAZEL_SH = "BAZEL_SH" _PYTHON_BIN_PATH = "PYTHON_BIN_PATH" _PYTHON_LIB_PATH = "PYTHON_LIB_PATH" _TF_PYTHON_CONFIG_REPO = "TF_PYTHON_CONFIG_REPO" @@ -152,6 +153,22 @@ def _get_python_bin(repository_ctx): _PYTHON_BIN_PATH, repository_ctx.os.environ.get("PATH", ""))) +def _get_bash_bin(repository_ctx): + """Gets the bash bin path.""" + bash_bin = repository_ctx.os.environ.get(_BAZEL_SH) + if bash_bin != None: + return bash_bin + else: + bash_bin_path = repository_ctx.which("bash") + if bash_bin_path != None: + return str(bash_bin_path) + else: + _fail("Cannot find bash in PATH, please make sure " + + "bash is installed and add its directory in PATH, or --define " + + "%s='/path/to/bash'.\nPATH=%s" % ( + _BAZEL_SH, repository_ctx.os.environ.get("PATH", ""))) + + def _get_python_lib(repository_ctx, python_bin): """Gets the python lib path.""" python_lib = repository_ctx.os.environ.get(_PYTHON_LIB_PATH) @@ -184,14 +201,14 @@ def _get_python_lib(repository_ctx, python_bin): " print(paths[0])\n" + "END") cmd = '%s - %s' % (python_bin, print_lib) - result = repository_ctx.execute(["bash", "-c", cmd]) + result = repository_ctx.execute([_get_bash_bin(repository_ctx), "-c", cmd]) return result.stdout.strip('\n') def _check_python_lib(repository_ctx, python_lib): """Checks the python lib path.""" cmd = 'test -d "%s" -a -x "%s"' % (python_lib, python_lib) - result = repository_ctx.execute(["bash", "-c", cmd]) + result = repository_ctx.execute([_get_bash_bin(repository_ctx), "-c", cmd]) if result.return_code == 1: _fail("Invalid python library path: %s" % python_lib) @@ -199,7 +216,7 @@ def _check_python_lib(repository_ctx, python_lib): def _check_python_bin(repository_ctx, python_bin): """Checks the python bin path.""" cmd = '[[ -x "%s" ]] && [[ ! -d "%s" ]]' % (python_bin, python_bin) - result = repository_ctx.execute(["bash", "-c", cmd]) + result = repository_ctx.execute([_get_bash_bin(repository_ctx), "-c", cmd]) if result.return_code == 1: _fail("--define %s='%s' is not executable. Is it the python binary?" % ( _PYTHON_BIN_PATH, python_bin)) @@ -294,6 +311,7 @@ def _python_autoconf_impl(repository_ctx): python_configure = repository_rule( implementation = _python_autoconf_impl, environ = [ + _BAZEL_SH, _PYTHON_BIN_PATH, _PYTHON_LIB_PATH, _TF_PYTHON_CONFIG_REPO, diff --git a/third_party/repo.bzl b/third_party/repo.bzl index 36f5aa5bde..cb67d3e961 100644 --- a/third_party/repo.bzl +++ b/third_party/repo.bzl @@ -17,7 +17,6 @@ _SINGLE_URL_WHITELIST = depset([ "arm_compiler", "ortools_archive", - "gemmlowp", ]) def _is_windows(ctx): @@ -88,7 +87,9 @@ def _tf_http_archive(ctx): if ctx.attr.patch_file != None: _apply_patch(ctx, ctx.attr.patch_file) if ctx.attr.build_file != None: - ctx.template("BUILD", ctx.attr.build_file, { + # Use BUILD.bazel to avoid conflict with third party projects with + # BUILD or build (directory) underneath. + ctx.template("BUILD.bazel", ctx.attr.build_file, { "%prefix%": ".." if _repos_are_siblings() else "external", }, False) -- cgit v1.2.3 From 148b4381fd0259cae441e459ec8ebe2c5d557722 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Mon, 18 Jun 2018 11:48:36 -0700 Subject: Automated g4 rollback of changelist 201011811 PiperOrigin-RevId: 201033171 --- CONTRIBUTING.md | 2 +- README.md | 1 - RELEASE.md | 67 +--- configure.py | 5 - tensorflow/BUILD | 4 +- tensorflow/c/generate-pc.sh | 11 +- tensorflow/cc/gradients/math_grad.cc | 1 - tensorflow/cc/gradients/nn_grad.cc | 47 --- tensorflow/cc/gradients/nn_grad_test.cc | 84 +---- tensorflow/compiler/aot/codegen_test_h.golden | 4 +- .../compiler/aot/embedded_protocol_buffers.h | 2 +- tensorflow/compiler/aot/runtime.h | 4 +- tensorflow/compiler/aot/runtime_test.cc | 16 +- tensorflow/compiler/xla/service/cpu/BUILD | 18 +- tensorflow/compiler/xla/service/cpu/cpu_runtime.cc | 2 - tensorflow/compiler/xla/service/cpu/cpu_runtime.h | 1 - tensorflow/compiler/xla/service/cpu/ir_emitter.cc | 8 +- .../compiler/xla/service/cpu/runtime_fft_impl.h | 20 +- .../xla/service/cpu/runtime_single_threaded_fft.cc | 32 -- .../xla/service/cpu/runtime_single_threaded_fft.h | 31 -- .../compiler/xla/service/cpu/simple_orc_jit.cc | 2 - tensorflow/compiler/xla/service/pattern_matcher.h | 2 +- .../compiler/xla/service/tuple_simplifier.cc | 7 - tensorflow/compiler/xla/service/tuple_simplifier.h | 9 +- .../compiler/xla/service/tuple_simplifier_test.cc | 77 ---- tensorflow/contrib/autograph/__init__.py | 3 - tensorflow/contrib/cmake/tf_c.cmake | 22 +- tensorflow/contrib/cmake/tf_cc_ops.cmake | 2 +- tensorflow/contrib/cmake/tf_python.cmake | 3 +- tensorflow/contrib/cmake/tools/create_def_file.py | 9 +- .../bijectors/sinh_arcsinh_bijector_test.py | 28 +- tensorflow/contrib/eager/python/datasets.py | 3 +- .../python/examples/notebooks/4_high_level.ipynb | 4 +- .../feature_column/sequence_feature_column.py | 22 +- .../feature_column/sequence_feature_column_test.py | 41 -- tensorflow/contrib/ffmpeg/__init__.py | 1 + tensorflow/contrib/ffmpeg/ffmpeg_ops.py | 1 + tensorflow/contrib/framework/__init__.py | 3 +- .../ops/fused_conv2d_bias_activation_op_test.py | 11 +- .../src_impl/hexagon_controller.c | 2 +- tensorflow/contrib/lite/download_dependencies.sh | 4 +- .../contrib/lite/examples/minimal/minimal.cc | 2 +- .../contrib/lite/g3doc/tf_ops_compatibility.md | 14 +- tensorflow/contrib/lite/java/ovic/README.md | 4 +- .../kernels/internal/reference/reference_ops.h | 4 +- tensorflow/contrib/lite/python/interpreter.py | 2 +- .../interpreter_wrapper/interpreter_wrapper.cc | 9 +- .../interpreter_wrapper/interpreter_wrapper.h | 3 +- tensorflow/contrib/lite/python/lite.py | 11 - tensorflow/contrib/lite/toco/import_tensorflow.cc | 2 +- tensorflow/contrib/lite/toco/toco_port.cc | 6 - tensorflow/contrib/lite/toco/toco_port.h | 18 - tensorflow/contrib/makefile/compile_nsync.sh | 2 +- .../contrib/makefile/download_dependencies.sh | 4 +- .../contrib/metrics/python/ops/metric_ops.py | 2 +- tensorflow/contrib/mpi_collectives/kernels/ring.h | 2 +- .../contrib/opt/python/training/adamax_test.py | 6 +- .../opt/python/training/model_average_optimizer.py | 2 +- tensorflow/contrib/periodic_resample/BUILD | 20 +- .../kernels/periodic_resample_op.cc | 5 - .../kernels/periodic_resample_op.h | 415 ++++++--------------- .../contrib/periodic_resample/ops/array_ops.cc | 53 +-- .../periodic_resample/ops/array_ops_test.cc | 41 -- .../kernel_tests/periodic_resample_op_test.py | 27 +- .../python/ops/periodic_resample_op.py | 8 +- .../predictor/contrib_estimator_predictor.py | 5 +- .../contrib/predictor/core_estimator_predictor.py | 5 +- .../contrib/predictor/predictor_factories.py | 24 +- .../contrib/predictor/predictor_factories_test.py | 19 - .../contrib/predictor/saved_model_predictor.py | 6 +- tensorflow/contrib/quantize/README.md | 2 +- .../contrib/slim/python/slim/evaluation_test.py | 25 +- tensorflow/contrib/summary/summary.py | 5 +- .../contrib/tensor_forest/client/eval_metrics.py | 45 ++- .../contrib/tensor_forest/python/tensor_forest.py | 34 +- .../tensor_forest/python/tensor_forest_test.py | 45 --- .../contrib/tensorrt/convert/convert_graph.cc | 66 ++-- .../contrib/tensorrt/convert/convert_nodes.cc | 97 ++--- tensorflow/contrib/tpu/python/tpu/datasets.py | 16 +- tensorflow/contrib/tpu/python/tpu/datasets_test.py | 26 -- tensorflow/core/BUILD | 9 +- .../core/api_def/base_api/api_def_Selu.pbtxt | 4 - .../api_def/base_api/api_def_StringSplitV2.pbtxt | 48 --- .../api_def/python_api/api_def_StringSplitV2.pbtxt | 4 - tensorflow/core/common_runtime/bfc_allocator.cc | 8 +- tensorflow/core/common_runtime/bfc_allocator.h | 3 +- .../direct_session_with_tracking_alloc_test.cc | 16 - .../common_runtime/mkl_threadpool_device_test.cc | 53 --- tensorflow/core/common_runtime/process_util.cc | 11 +- .../core/common_runtime/threadpool_device.cc | 25 +- .../rpc/grpc_master_service_impl.cc | 4 +- .../core/distributed_runtime/rpc/grpc_testlib.cc | 10 +- tensorflow/core/framework/allocator.h | 5 + tensorflow/core/framework/op_gen_lib.cc | 1 - .../remote_fused_graph_execute_info.proto | 2 +- tensorflow/core/framework/tensor_test.cc | 24 +- tensorflow/core/graph/mkl_layout_pass.cc | 148 +------- tensorflow/core/graph/mkl_layout_pass_test.cc | 31 -- tensorflow/core/grappler/costs/graph_properties.cc | 1 + tensorflow/core/grappler/optimizers/BUILD | 2 +- tensorflow/core/grappler/optimizers/remapper.cc | 4 +- tensorflow/core/kernels/as_string_op.cc | 2 - tensorflow/core/kernels/cwise_op_clip.cc | 43 ++- .../core/kernels/dense_update_functor_gpu.cu.cc | 1 - tensorflow/core/kernels/gather_functor.cc | 1 - tensorflow/core/kernels/gather_functor_gpu.cu.cc | 1 - tensorflow/core/kernels/gather_nd_op.cc | 4 - tensorflow/core/kernels/gather_nd_op_gpu.cu.cc | 2 - tensorflow/core/kernels/gather_op.cc | 1 - tensorflow/core/kernels/mkl_concat_op.cc | 213 +++-------- tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc | 2 - tensorflow/core/kernels/mkl_pooling_ops_common.h | 6 +- tensorflow/core/kernels/scatter_nd_op.cc | 4 - tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc | 1 - .../core/kernels/scoped_allocator_ops_test.cc | 9 +- tensorflow/core/kernels/segment_reduction_ops.h | 10 +- tensorflow/core/kernels/sparse_matmul_op.cc | 2 +- tensorflow/core/kernels/string_split_op.cc | 130 ------- tensorflow/core/ops/candidate_sampling_ops.cc | 5 +- tensorflow/core/ops/dataset_ops.cc | 24 +- tensorflow/core/ops/image_ops.cc | 4 +- tensorflow/core/ops/math_ops.cc | 2 +- tensorflow/core/ops/nn_ops.cc | 1 - tensorflow/core/ops/string_ops.cc | 20 +- tensorflow/core/platform/cpu_info.cc | 23 -- tensorflow/core/platform/cpu_info.h | 7 - tensorflow/core/platform/default/build_config.bzl | 2 - .../core/platform/hadoop/hadoop_file_system.cc | 21 +- tensorflow/core/platform/posix/port.cc | 5 - tensorflow/core/public/version.h | 4 +- tensorflow/core/util/mkl_util.h | 50 +-- tensorflow/docs_src/community/groups.md | 29 +- tensorflow/docs_src/get_started/eager.md | 2 +- tensorflow/docs_src/get_started/index.md | 4 +- tensorflow/docs_src/install/install_c.md | 2 +- tensorflow/docs_src/install/install_go.md | 2 +- tensorflow/docs_src/install/install_java.md | 24 +- tensorflow/docs_src/install/install_linux.md | 24 +- tensorflow/docs_src/install/install_mac.md | 10 +- tensorflow/docs_src/install/install_sources.md | 17 +- tensorflow/docs_src/mobile/linking_libs.md | 2 +- tensorflow/docs_src/mobile/prepare_models.md | 4 +- tensorflow/docs_src/performance/quantization.md | 2 +- .../docs_src/programmers_guide/estimators.md | 19 +- .../docs_src/programmers_guide/feature_columns.md | 4 +- tensorflow/examples/learn/iris.py | 7 +- tensorflow/java/src/gen/cc/op_generator.cc | 11 +- tensorflow/java/src/gen/cc/op_specs.cc | 1 - tensorflow/python/eager/backprop.py | 4 +- tensorflow/python/estimator/BUILD | 5 +- tensorflow/python/estimator/exporter.py | 4 +- tensorflow/python/estimator/inputs/numpy_io.py | 8 +- .../python/estimator/inputs/numpy_io_test.py | 5 +- tensorflow/python/estimator/inputs/pandas_io.py | 7 +- .../python/estimator/inputs/pandas_io_test.py | 5 +- .../estimator/inputs/queues/feeding_functions.py | 2 +- tensorflow/python/estimator/keras.py | 4 +- tensorflow/python/estimator/keras_test.py | 14 +- .../python/grappler/layout_optimizer_test.py | 4 +- tensorflow/python/keras/activations.py | 2 - tensorflow/python/keras/callbacks.py | 21 +- tensorflow/python/keras/callbacks_test.py | 2 - tensorflow/python/keras/engine/network.py | 2 +- tensorflow/python/keras/engine/saving_test.py | 4 +- tensorflow/python/keras/engine/training.py | 7 +- tensorflow/python/keras/engine/training_eager.py | 2 +- tensorflow/python/keras/initializers_test.py | 26 +- tensorflow/python/keras/layers/core.py | 26 +- tensorflow/python/keras/models_test.py | 14 - .../python/kernel_tests/as_string_op_test.py | 10 - tensorflow/python/kernel_tests/betainc_op_test.py | 4 +- tensorflow/python/kernel_tests/clip_ops_test.py | 13 - tensorflow/python/kernel_tests/conv_ops_test.py | 32 +- .../python/kernel_tests/gather_nd_op_test.py | 32 +- tensorflow/python/kernel_tests/gather_op_test.py | 20 +- tensorflow/python/kernel_tests/init_ops_test.py | 27 -- tensorflow/python/kernel_tests/pooling_ops_test.py | 4 +- tensorflow/python/kernel_tests/py_func_test.py | 31 +- .../python/kernel_tests/scatter_nd_ops_test.py | 6 +- tensorflow/python/kernel_tests/scatter_ops_test.py | 14 +- .../kernel_tests/segment_reduction_ops_test.py | 4 +- .../python/kernel_tests/string_split_op_test.py | 96 ----- tensorflow/python/ops/array_ops.py | 4 - tensorflow/python/ops/gradient_checker.py | 8 +- tensorflow/python/ops/image_ops_impl.py | 74 ++-- tensorflow/python/ops/image_ops_test.py | 261 +++---------- tensorflow/python/ops/init_ops.py | 3 +- tensorflow/python/ops/logging_ops.py | 5 +- tensorflow/python/ops/math_ops.py | 28 +- tensorflow/python/ops/nn_impl.py | 5 +- tensorflow/python/ops/nn_ops.py | 4 +- tensorflow/python/ops/nn_test.py | 10 - tensorflow/python/ops/script_ops.py | 35 +- tensorflow/python/ops/sparse_ops.py | 4 - tensorflow/python/ops/string_ops.py | 53 --- tensorflow/python/ops/variable_scope.py | 21 +- .../python/tools/import_pb_to_tensorboard.py | 0 tensorflow/tensorflow.bzl | 2 +- .../tools/api/generator/create_python_api.py | 8 +- tensorflow/tools/api/golden/tensorflow.image.pbtxt | 2 +- tensorflow/tools/api/golden/tensorflow.pbtxt | 4 - .../tools/api/golden/tensorflow.strings.pbtxt | 4 - tensorflow/tools/ci_build/builds/pip.sh | 4 - .../tools/ci_build/builds/with_the_same_user | 2 +- tensorflow/tools/ci_build/ci_build.sh | 7 - tensorflow/tools/ci_build/copy_binary.py | 3 +- .../tools/ci_build/install/install_pip_packages.sh | 4 - .../install/install_python3.5_pip_packages.sh | 4 +- .../install/install_python3.6_pip_packages.sh | 5 +- .../tools/ci_build/linux/mkl/basic-mkl-test.sh | 29 -- tensorflow/tools/ci_build/pi/build_raspberry_pi.sh | 8 +- .../def_file_filter/def_file_filter_configure.bzl | 6 +- tensorflow/tools/dist_test/local_test.sh | 12 +- tensorflow/tools/dist_test/remote_test.sh | 11 +- tensorflow/tools/docker/Dockerfile.devel | 2 +- tensorflow/tools/docker/Dockerfile.devel-cpu-mkl | 2 +- tensorflow/tools/docker/Dockerfile.devel-gpu | 6 +- tensorflow/tools/docker/Dockerfile.gpu | 2 +- tensorflow/tools/pip_package/BUILD | 1 - tensorflow/tools/pip_package/build_pip_package.sh | 160 +++----- tensorflow/tools/pip_package/setup.py | 3 +- .../proto_text/gen_proto_text_functions_lib.cc | 3 - .../tools/quantization/quantize_graph_test.py | 12 +- tensorflow/tools/test/upload_test_benchmarks.py | 1 + tensorflow/workspace.bzl | 40 +- third_party/eigen.BUILD | 1 - third_party/highwayhash.BUILD | 1 - third_party/jpeg/jpeg.BUILD | 2 - third_party/png.BUILD | 9 +- third_party/py/python_configure.bzl | 24 +- third_party/repo.bzl | 5 +- 231 files changed, 903 insertions(+), 3337 deletions(-) delete mode 100644 tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.cc delete mode 100644 tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.h delete mode 100644 tensorflow/contrib/periodic_resample/ops/array_ops_test.cc delete mode 100644 tensorflow/core/api_def/base_api/api_def_StringSplitV2.pbtxt delete mode 100644 tensorflow/core/api_def/python_api/api_def_StringSplitV2.pbtxt delete mode 100644 tensorflow/core/common_runtime/mkl_threadpool_device_test.cc mode change 100644 => 100755 tensorflow/python/tools/import_pb_to_tensorboard.py delete mode 100755 tensorflow/tools/ci_build/linux/mkl/basic-mkl-test.sh (limited to 'configure.py') diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index db4b1581ae..8669c25c45 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -90,7 +90,7 @@ Bazel BUILD files also need to include a license section, e.g., Changes to TensorFlow C++ code should conform to [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html). -Use `clang-tidy` to check your C/C++ changes. To install `clang-tidy` on ubuntu:16.04, do: +Use `clang-tidy` to check your C/C++ changes. To install clang-tidy on ubuntu:16.04, do: ```bash apt-get install -y clang-tidy diff --git a/README.md b/README.md index 63853137cf..6fb4486d0d 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,6 @@ $ python 42 >>> sess.close() ``` -Learn more examples about how to do specific tasks in TensorFlow at the [tutorials page of tensorflow.org](https://www.tensorflow.org/tutorials/). ## Contribution guidelines diff --git a/RELEASE.md b/RELEASE.md index e09e9c6190..84d9d52868 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,62 +1,3 @@ -# Release 1.9.0 - -## Major Features And Improvements -* Update tf.keras to the Keras 2.1.6 API. -* `tfe.Network` is deprecated. Please inherit from `tf.keras.Model`. -* Adding support of core feature columns and losses to gradient boosted trees estimators. -* The distributions.Bijector API supports broadcasting for Bijectors with new API changes. See [here](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/distributions/bijectors/Bijector) for more details. -* Layered variable names have changed in the following conditions: - * Using `tf.keras.layers` with custom variable scopes. - * Using `tf.layers` in a subclassed `tf.keras.Model` class. See [here](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/layers) for more details - -## Breaking Chances - * If you're opening empty variable scopes; replace `variable_scope`('', ...) by `variable_scope`(`tf.get_variable_scope()`, ...). - -## Bug Fixes and Other Changes -* `tf.data`: - * The `DatasetBase::DebugString()` method is now `const`. - * Added the `tf.contrib.data.sample_from_datasets()` API for randomly sampling from multiple datasets. -* Eager Execution: -* `tf.keras`: - * Move Keras code out of _impl folder and remove API files. - * `tf.keras.Model.save_weights` now saves in TensorFlow format by default. - * Enable dataset iterators to be passed to `tf.keras.Model` training/eval methods. -* Accelerated Linear Algebra (XLA): -* TensorFlow Debugger (tfdbg): fix an issue in which the TensorBoard Debugger Plugin could not handle total source file size exceeding gRPC message size limit (4 MB). -* `tf.contrib`: - * Add `tf.contrib.data.choose_from_datasets()`. - * `tf.contrib.data.make_csv_dataset()` now supports line breaks in quoted strings. Two arguments were removed from `make_csv_dataset`. - * `tf.contrib.framework.zero_initializer` supports ResourceVariable. - * Adding "constrained_optimization" to tensorflow/contrib. -* Other: - * Add GCS Configuration Ops. - * Changing signature of `MakeIterator` to enable propagating error status. - * KL divergence for two Dirichlet distributions. - * More consistent GcsFileSystem behavior for certain reads past EOF. - * Update benchmark for tf.scan to match ranges across eager and graph modes. - * Fixed bug in `tf.reduce_prod gradient` for complex dtypes. - * Add optional `args` argument to `Dataset.from_generator()`. - * Allow the use of '.' in variables (e.g. "hparams.parse('a.b=1.0')"), which would previously raise an error. This will correspond to an attribute name with an embedded '.' symbol (e.g. 'a.b'), which can only be accessed indirectly (e.g. through getattr and setattr). To set this up the user will first need to explicitly add the variable to the hparam object (e.g. "hparams.add_hparam(name='a.b', value=0.0)"). - * Benchmark for tf.scan in graph and eager modes. - * Added complex128 support to FFT, FFT2D, FFT3D, IFFT, IFFT2D, and IFFT3D. - * Making ids unique in `nn.embedding_lookup_sparse`. This helps to reduce RPC calls for looking up the embeddings when there are repeated ids in the batch. - * Support indicator column in boosted trees. - * Prevent `tf.gradients()` from backpropagating through integer tensors. - * LinearOperator[1D,2D,3D]Circulant added to `tensorflow.linalg`. - * Conv3D, Conv3DBackpropInput, Conv3DBackpropFilter now supports arbitrary. - * Added `tf.train.Checkpoint` for reading/writing object-based checkpoints. - * `Dataset.list_files()` now produces determinstic results when `shuffle=False` or a `seed` is passed. - * Added LinearOperatorKronecker, a dense-free implementation of the Kronecker Product. - * Allow LinearOperator to broadcast. - * SavedModelBuilder will now deduplicate asset names that point to files with the same basename and the same contents. Note that this may result in new asset files included in SavedModels in cases where assets with the same name but different contents were previously overwriting each other. - - -## Thanks to our Contributors - -This release contains contributions from many people at Google, as well as: - -Abdullah Alrasheed, Achal Shah, Ad-530, ADiegoCAlonso, Aditya Yogi, Ag Ramesh, akindyakov, Andy Kernahan, Anya Petrova, Aurelien Geron, Ben, Ben Barsdell, Bhavani-Subramanian, braincodercn, Brett Koonce, Brian Nemsick, Brian Zier, Bryan Heden, candy.dc, cclauss, Clayne Robison, ctiijima, Dalmo Cirne, David Norman, David T.H. Kao, DosLin, ekelsen, Elson Rodriguez, Erik Smistad, Felix Abecassis, Fergal Cotter, fo40225, foo0x29a, Freedom" Koan-Sin Tan, FréDéRic Branchaud-Charron, gdh1995, Geoffrey Irving, Giuseppe, gracehoney, Guido Zuidhof, Guillaume Klein, Guozhong Zhuang, Haggai, Harald Husum, imsheridan, Ivan Zhang, Jan Zikes, Jayaram Bobba, Jesse Benson, Jesse Gumz, Jiajia Li, Jie, jinghuangintel, Jingwen, jjsjann123, Joe Yearsley, Joel Hestness, Joel Shor, josephyearsley, Junpeng Lao, Karol M. Langner, Kb Sriram, krantideep95, Krish Ravindranath, Letian Feng, Loo Rong Jie, Lukas Geiger, Maciej, Mahmoud Abuzaina, ManHyuk, Mark Ryan, mbhuiyan, Michal Turek, Mostafa Alaa, Myungsung Kwak, Nand Dalal, Nehal J Wani, Neil Tenenholtz, ngc92, Nicholas Nadeau, P.Eng., Avs, Niranjan Hasabnis, P-Hidringer, Paul Van Eck, Peng Yu, Qing Zhao, Qingying Chen, Quanlong, Rajendra Arora, Rholais Lii, rmanyari, Robin Richtsfeld, Russell Klopfer, Sagi, Sam Sendelbach, Sandeep N Gupta, Sandip Giri, Sarah Edkins, Scott Tseng, Sdalbsoo, Sergii Khomenko, Seungwoo Choi (Biggie), Seyed Majid Azimi, Shaoning Zeng, shengfuintel, Siu Kei, Muk, Smit Shilu, soonson, Stefan Schweter, Sukhwan Kim, Sunitha Kambhampati, Taehoon Lee, tamimaddari82, Tang, Wenyi, Ted Chang, u2takey, Utkarsh Upadhyay, Vadim Markovtsev, voegtlel, Wai Hon Law, wangsiyu, Wenhao Hu, wenhao.hu, William D. Irons, Yan Facai (颜发才), Yanbo Liang, Yihong Wang, Yilei (Dolee) Yang, Yong Tang, Yuan (Terry) Tang - # Release 1.8.0 ## Major Features And Improvements @@ -463,6 +404,14 @@ answered questions, and were part of inspiring discussions. # Release 1.4.0 +## Major Features And Improvements +* `tf.keras` is now part of the core TensorFlow API. +* [`tf.data`](http://tensorflow.org/programmers_guide/datasets) is now part of + the core TensorFlow API. + * The API is now subject to backwards compatibility guarantees. + +# Release 1.4.0 + ## Major Features And Improvements * `tf.keras` is now part of the core TensorFlow API. * [`tf.data`](http://tensorflow.org/programmers_guide/datasets) is now part of diff --git a/configure.py b/configure.py index ada342a50a..bde7af8c0e 100644 --- a/configure.py +++ b/configure.py @@ -1397,10 +1397,6 @@ def set_grpc_build_flags(): write_to_bazelrc('build --define grpc_no_ares=true') -def set_build_strip_flag(): - write_to_bazelrc('build --strip=always') - - def set_windows_build_flags(): if is_windows(): # The non-monolithic build is not supported yet @@ -1523,7 +1519,6 @@ def main(): set_grpc_build_flags() set_cc_opt_flags(environ_cp) - set_build_strip_flag() set_windows_build_flags() if get_var( diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 6d134dbb80..a73c4ca3aa 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -475,7 +475,7 @@ tf_cc_shared_object( # excludes all but a subset of function names. # On MacOS, the linker does not support version_script, but has an # an "-exported_symbols_list" command. -z defs disallows undefined -# symbols in object files. +# symbols in object files and -s strips the output. tf_cc_shared_object( name = "libtensorflow.so", @@ -489,6 +489,7 @@ tf_cc_shared_object( "//tensorflow:windows_msvc": [], "//conditions:default": [ "-z defs", + "-s", "-Wl,--version-script", # This line must be directly followed by the version_script.lds file "$(location //tensorflow/c:version_script.lds)", ], @@ -514,6 +515,7 @@ tf_cc_shared_object( "//tensorflow:windows_msvc": [], "//conditions:default": [ "-z defs", + "-s", "-Wl,--version-script", # This line must be directly followed by the version_script.lds file "$(location //tensorflow:tf_version_script.lds)", ], diff --git a/tensorflow/c/generate-pc.sh b/tensorflow/c/generate-pc.sh index 7184ad68fb..02a6a58b61 100755 --- a/tensorflow/c/generate-pc.sh +++ b/tensorflow/c/generate-pc.sh @@ -15,12 +15,10 @@ # ============================================================================== TF_PREFIX='/usr/local' -LIBDIR='lib' usage() { echo "Usage: $0 OPTIONS" echo -e "-p, --prefix\tset installation prefix (default: /usr/local)" - echo -e "-l, --libdir\tset lib directory (default: lib)" echo -e "-v, --version\tset TensorFlow version" echo -e "-h, --help\tdisplay this message" } @@ -28,7 +26,7 @@ usage() { [ $# == 0 ] && usage && exit 0 # read the options -ARGS=$(getopt -o p:l:v:h --long prefix:,libdir:,version:,help -n $0 -- "$@") +ARGS=$(getopt -o p:v:h --long prefix:,version:,help -n $0 -- "$@") eval set -- "$ARGS" # extract options and their arguments into variables. @@ -40,11 +38,6 @@ while true ; do "") shift 2 ;; *) TF_PREFIX=$2 ; shift 2 ;; esac ;; - -l|--libdir) - case "$2" in - "") shift 2 ;; - *) LIBDIR=$2 ; shift 2 ;; - esac ;; -v|--version) case "$2" in "") shift 2 ;; @@ -62,7 +55,7 @@ echo "Generating pkgconfig file for TensorFlow $TF_VERSION in $TF_PREFIX" cat << EOF > tensorflow.pc prefix=${TF_PREFIX} exec_prefix=\${prefix} -libdir=\${exec_prefix}/${LIBDIR} +libdir=\${exec_prefix}/lib includedir=\${prefix}/include Name: TensorFlow diff --git a/tensorflow/cc/gradients/math_grad.cc b/tensorflow/cc/gradients/math_grad.cc index 35a01e0341..52c177212a 100644 --- a/tensorflow/cc/gradients/math_grad.cc +++ b/tensorflow/cc/gradients/math_grad.cc @@ -38,7 +38,6 @@ REGISTER_NO_GRADIENT_OP("NotEqual"); REGISTER_NO_GRADIENT_OP("LogicalAnd"); REGISTER_NO_GRADIENT_OP("LogicalOr"); REGISTER_NO_GRADIENT_OP("LogicalNot"); -REGISTER_NO_GRADIENT_OP("Floor"); // Conjugate helper function returns the conjugate of an Output if it // is complex valued. diff --git a/tensorflow/cc/gradients/nn_grad.cc b/tensorflow/cc/gradients/nn_grad.cc index c73482d5f4..0cb3132e94 100644 --- a/tensorflow/cc/gradients/nn_grad.cc +++ b/tensorflow/cc/gradients/nn_grad.cc @@ -255,53 +255,6 @@ Status LRNGradHelper(const Scope& scope, const Operation& op, } REGISTER_GRADIENT_OP("LRN", LRNGradHelper); -Status SoftplusGradHelper(const Scope& scope, const Operation& op, - const std::vector& grad_inputs, - std::vector* grad_outputs) { - auto dx = internal::SoftplusGrad(scope, grad_inputs[0], op.input(0)); - grad_outputs->push_back(dx); - return scope.status(); -} -REGISTER_GRADIENT_OP("Softplus", SoftplusGradHelper); - -Status SoftsignGradHelper(const Scope& scope, const Operation& op, - const std::vector& grad_inputs, - std::vector* grad_outputs) { - auto dx = internal::SoftsignGrad(scope, grad_inputs[0], op.input(0)); - grad_outputs->push_back(dx); - return scope.status(); -} -REGISTER_GRADIENT_OP("Softsign", SoftsignGradHelper); - -Status FractionalAvgPoolGradHelper(const Scope& scope, const Operation& op, - const std::vector& grad_inputs, - std::vector* grad_outputs) { - bool overlapping; - TF_RETURN_IF_ERROR( - GetNodeAttr(op.output(0).node()->attrs(), "overlapping", &overlapping)); - auto dx = internal::FractionalAvgPoolGrad( - scope, Shape(scope, op.input(0), Shape::OutType(DT_INT64)), - grad_inputs[0], op.output(1), op.output(2), - internal::FractionalAvgPoolGrad::Overlapping(overlapping)); - grad_outputs->push_back(dx); - return scope.status(); -} -REGISTER_GRADIENT_OP("FractionalAvgPool", FractionalAvgPoolGradHelper); - -Status FractionalMaxPoolGradHelper(const Scope& scope, const Operation& op, - const std::vector& grad_inputs, - std::vector* grad_outputs) { - bool overlapping; - TF_RETURN_IF_ERROR( - GetNodeAttr(op.output(0).node()->attrs(), "overlapping", &overlapping)); - auto dx = internal::FractionalMaxPoolGrad( - scope, op.input(0), op.output(0), grad_inputs[0], op.output(1), - op.output(2), internal::FractionalMaxPoolGrad::Overlapping(overlapping)); - grad_outputs->push_back(dx); - return scope.status(); -} -REGISTER_GRADIENT_OP("FractionalMaxPool", FractionalMaxPoolGradHelper); - } // anonymous namespace } // namespace ops } // namespace tensorflow diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc index b4d457a9d1..c4eba7ecb0 100644 --- a/tensorflow/cc/gradients/nn_grad_test.cc +++ b/tensorflow/cc/gradients/nn_grad_test.cc @@ -28,8 +28,6 @@ namespace { using ops::BiasAdd; using ops::Conv2D; using ops::Elu; -using ops::FractionalAvgPool; -using ops::FractionalMaxPool; using ops::L2Loss; using ops::LogSoftmax; using ops::LRN; @@ -43,8 +41,6 @@ using ops::Relu; using ops::Relu6; using ops::Selu; using ops::Softmax; -using ops::Softplus; -using ops::Softsign; class NNGradTest : public ::testing::Test { protected: @@ -75,30 +71,22 @@ class NNGradTest : public ::testing::Test { EXPECT_LT(max_error, 1e-3); } - // Sets tensor with random values, ensuring that every pair of elements are at - // least a reasonable amount apart. - // This is an issue for max pooling operations, in which perturbations by the - // numeric gradient computation in the gradient checker can change the max - // value if a pool has values that are too close together. + // Sets tensor with random values, ensuring that the max value is largest by + // a reasonable amount. + // This is an issue for MaxPool, MaxPoolV2 and MaxPool3D, in which + // perturbations by the numeric gradient computation in the gradient checker + // can change the max value if values are too close together. template - void SetRandomValuesForMaxPooling(Tensor* tensor) { + void SetRandomValuesWithBumpedMax(Tensor* tensor) { auto tensor_flat = tensor->flat(); - // First set the array to an increasing sequence of values spaced - // a reasonable amount apart - T cur = 0; - for (size_t i = 0; i < tensor->NumElements(); i++) { - tensor_flat(i) = cur; - cur += 5e-2; - } - // Fischer-Yates shuffle the array - for (size_t i = tensor->NumElements() - 1; i >= 1; i--) { - // j <- random integer 0 <= j <= i - size_t j = random::New64() % (i + 1); - // swap values at i, j - T tmp = tensor_flat(i); - tensor_flat(i) = tensor_flat(j); - tensor_flat(j) = tmp; + tensor_flat.setRandom(); + int32 max_index = 0; + for (size_t i = 1; i < tensor->NumElements(); i++) { + if (tensor_flat(i) > tensor_flat(max_index)) { + max_index = i; + } } + tensor_flat(max_index) += 1e-2; } Scope scope_; @@ -201,7 +189,7 @@ TEST_F(NNGradTest, MaxPoolGradHelper) { const std::vector strides{1, 2, 2, 1}; auto y = MaxPool(scope_, x, ksize, strides, "VALID"); Tensor x_init_value = Tensor(DT_FLOAT, x_shape); - SetRandomValuesForMaxPooling(&x_init_value); + SetRandomValuesWithBumpedMax(&x_init_value); RunTest(x, x_init_value, y, y_shape); } @@ -214,7 +202,7 @@ TEST_F(NNGradTest, MaxPoolGradV2Helper) { Tensor strides = test::AsTensor({1, 2, 2, 1}, {4}); auto y = MaxPoolV2(scope_, x, ksize, strides, "VALID"); Tensor x_init_value = Tensor(DT_FLOAT, x_shape); - SetRandomValuesForMaxPooling(&x_init_value); + SetRandomValuesWithBumpedMax(&x_init_value); RunTest(x, x_init_value, y, y_shape); } @@ -227,7 +215,7 @@ TEST_F(NNGradTest, MaxPool3DGradHelper) { const std::vector strides{1, 3, 3, 3, 1}; auto y = MaxPool3D(scope_, x, ksize, strides, "VALID"); Tensor x_init_value = Tensor(DT_FLOAT, x_shape); - SetRandomValuesForMaxPooling(&x_init_value); + SetRandomValuesWithBumpedMax(&x_init_value); RunTest(x, x_init_value, y, y_shape); } @@ -260,45 +248,5 @@ TEST_F(NNGradTest, LRN){ RunTest(x, x_shape, y, x_shape); } -TEST_F(NNGradTest, SoftplusGrad) { - TensorShape shape({3, 7}); - auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape)); - auto y = Softplus(scope_, x); - RunTest(x, shape, y, shape); -} - -TEST_F(NNGradTest, SoftsignGrad) { - TensorShape shape({3, 7}); - auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape)); - auto y = Softsign(scope_, x); - RunTest(x, shape, y, shape); -} - -TEST_F(NNGradTest, FractionalAvgPoolGradHelper) { - TensorShape x_shape({1, 3, 7, 1}); - auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape)); - // Force consistent pooling regions for unit testing. - auto y = FractionalAvgPool( - scope_, x, {1, 1.2, 1.9, 1}, - FractionalAvgPool::Deterministic(true).Overlapping(true).Seed(1).Seed2( - 2)); - TensorShape y_shape({1, 2, 3, 1}); - RunTest(x, x_shape, y.output, y_shape); -} - -TEST_F(NNGradTest, FractionalMaxPoolGradHelper) { - TensorShape x_shape({1, 3, 7, 1}); - auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape)); - // Force consistent pooling regions for unit testing. - auto y = FractionalMaxPool( - scope_, x, {1, 1.2, 1.9, 1}, - FractionalMaxPool::Deterministic(true).Overlapping(true).Seed(1).Seed2( - 2)); - Tensor x_init_value = Tensor(DT_FLOAT, x_shape); - SetRandomValuesForMaxPooling(&x_init_value); - TensorShape y_shape({1, 2, 3, 1}); - RunTest(x, x_init_value, y.output, y_shape); -} - } // namespace } // namespace tensorflow diff --git a/tensorflow/compiler/aot/codegen_test_h.golden b/tensorflow/compiler/aot/codegen_test_h.golden index 6641d45e83..6e050cf564 100644 --- a/tensorflow/compiler/aot/codegen_test_h.golden +++ b/tensorflow/compiler/aot/codegen_test_h.golden @@ -56,9 +56,9 @@ namespace bar { // // Memory stats: // arg bytes total: 104 -// arg bytes aligned: 192 +// arg bytes aligned: 128 // temp bytes total: 126 -// temp bytes aligned: 320 +// temp bytes aligned: 224 class MyClass : public tensorflow::XlaCompiledCpuFunction { public: // Number of input arguments for the compiled computation. diff --git a/tensorflow/compiler/aot/embedded_protocol_buffers.h b/tensorflow/compiler/aot/embedded_protocol_buffers.h index 4e194a6aba..ebfe4806c2 100644 --- a/tensorflow/compiler/aot/embedded_protocol_buffers.h +++ b/tensorflow/compiler/aot/embedded_protocol_buffers.h @@ -71,7 +71,7 @@ struct ProtobufToEmbed { const ::tensorflow::protobuf::MessageLite* message; }; -// Embeds a sequence of protocol buffers into an object file. +// Embeds a a sequence of protocol buffers into an object file. // // `target_triple` is the target triple for the target architecture for the // generated object file. diff --git a/tensorflow/compiler/aot/runtime.h b/tensorflow/compiler/aot/runtime.h index d1a669ceb1..d085864f00 100644 --- a/tensorflow/compiler/aot/runtime.h +++ b/tensorflow/compiler/aot/runtime.h @@ -25,8 +25,8 @@ namespace tensorflow { namespace tfcompile { namespace runtime { -// Align to 64-bytes, to mimic tensorflow::Allocator::kAllocatorAlignment. -static constexpr size_t kAlign = 64; +// Align to 32-bytes, to mimic tensorflow::Allocator::kAllocatorAlignment. +static constexpr size_t kAlign = 32; // aligned_buffer_bytes returns the sum of each size in `sizes`, skipping -1 // values. There are `n` entries in `sizes`. Each buffer is aligned to kAlign diff --git a/tensorflow/compiler/aot/runtime_test.cc b/tensorflow/compiler/aot/runtime_test.cc index 06ec623eb2..6d603a02eb 100644 --- a/tensorflow/compiler/aot/runtime_test.cc +++ b/tensorflow/compiler/aot/runtime_test.cc @@ -24,7 +24,7 @@ namespace runtime { namespace { TEST(Runtime, AlignmentValue) { - // We've chosen 64 byte alignment for the tfcompile runtime to mimic the + // We've chosen 32 byte alignment for the tfcompile runtime to mimic the // regular tensorflow allocator, which was chosen to play nicely with Eigen. // The tfcompile runtime also has a requirement that comes from the xla // generated code, on the relation: buffer_size >= 16 ? 2 * sizeof(void*) : 8 @@ -39,13 +39,13 @@ TEST(Runtime, AlignedBufferBytes) { EXPECT_EQ(aligned_buffer_bytes(sizesA, 1), 0); static constexpr intptr_t sizesB[1] = {3}; - EXPECT_EQ(aligned_buffer_bytes(sizesB, 1), 64); + EXPECT_EQ(aligned_buffer_bytes(sizesB, 1), 32); static constexpr intptr_t sizesC[1] = {32}; - EXPECT_EQ(aligned_buffer_bytes(sizesC, 1), 64); + EXPECT_EQ(aligned_buffer_bytes(sizesC, 1), 32); static constexpr intptr_t sizesD[7] = {1, -1, 32, -1, 64, 2, 3}; - EXPECT_EQ(aligned_buffer_bytes(sizesD, 7), 320); + EXPECT_EQ(aligned_buffer_bytes(sizesD, 7), 192); } void* add_ptr(void* base, uintptr_t delta) { @@ -101,11 +101,11 @@ TEST(Runtime, MallocFreeContiguousBuffers) { EXPECT_NE(base, nullptr); EXPECT_EQ(bufD[0], add_ptr(base, 0)); EXPECT_EQ(bufD[1], nullptr); - EXPECT_EQ(bufD[2], add_ptr(base, 64)); + EXPECT_EQ(bufD[2], add_ptr(base, 32)); EXPECT_EQ(bufD[3], nullptr); - EXPECT_EQ(bufD[4], add_ptr(base, 128)); - EXPECT_EQ(bufD[5], add_ptr(base, 192)); - EXPECT_EQ(bufD[6], add_ptr(base, 256)); + EXPECT_EQ(bufD[4], add_ptr(base, 64)); + EXPECT_EQ(bufD[5], add_ptr(base, 128)); + EXPECT_EQ(bufD[6], add_ptr(base, 160)); for (int i = 0; i < 7; ++i) { const intptr_t size = sizesD[i]; if (size != -1) { diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index 1067b38f93..d82922a359 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -178,7 +178,6 @@ cc_library( ":runtime_matmul", ":runtime_matmul_mkl", ":runtime_single_threaded_conv2d", - ":runtime_single_threaded_fft", ":runtime_single_threaded_matmul", "@llvm//:execution_engine", "@llvm//:core", @@ -517,6 +516,7 @@ cc_library( deps = [ "//tensorflow/compiler/xla:executable_run_options", "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/core:framework", "//tensorflow/core:framework_lite", "//third_party/eigen3", ], @@ -578,22 +578,6 @@ cc_library( ], ) -cc_library( - name = "runtime_single_threaded_fft", - srcs = [ - "runtime_fft_impl.h", - "runtime_single_threaded_fft.cc", - ], - hdrs = ["runtime_single_threaded_fft.h"], - copts = runtime_copts(), - visibility = ["//visibility:public"], - deps = [ - "//tensorflow/compiler/xla:xla_data_proto", - "//tensorflow/core:framework_lite", - "//third_party/eigen3", - ], -) - cc_library( name = "runtime_single_threaded_matmul", srcs = ["runtime_single_threaded_matmul.cc"], diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc b/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc index 54c52bc08f..215405f680 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc @@ -51,8 +51,6 @@ extern const char* const kEigenConvF16SymbolName = extern const char* const kEigenConvF32SymbolName = "__xla_cpu_runtime_EigenConvF32"; extern const char* const kEigenFftSymbolName = "__xla_cpu_runtime_EigenFft"; -extern const char* const kEigenSingleThreadedFftSymbolName = - "__xla_cpu_runtime_EigenSingleThreadedFft"; extern const char* const kEigenSingleThreadedMatMulF16SymbolName = "__xla_cpu_runtime_EigenSingleThreadedMatMulF16"; extern const char* const kEigenSingleThreadedMatMulF32SymbolName = diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime.h b/tensorflow/compiler/xla/service/cpu/cpu_runtime.h index aa0e967123..1dce6efa5c 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_runtime.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime.h @@ -52,7 +52,6 @@ extern const char* const kMKLSingleThreadedMatMulF64SymbolName; extern const char* const kEigenConvF16SymbolName; extern const char* const kEigenConvF32SymbolName; extern const char* const kEigenFftSymbolName; -extern const char* const kEigenSingleThreadedFftSymbolName; extern const char* const kEigenSingleThreadedMatMulF16SymbolName; extern const char* const kEigenSingleThreadedMatMulF32SymbolName; extern const char* const kEigenSingleThreadedMatMulF64SymbolName; diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index 758b8c62b4..2c20be155f 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -1172,13 +1172,7 @@ Status IrEmitter::HandleFft(HloInstruction* fft) { {int8_ptr_type, int8_ptr_type, int8_ptr_type, int32_type, int32_type, int64_type, int64_type, int64_type, int64_type}, /*isVarArg=*/false); - - bool multi_threaded_eigen = - hlo_module_config_.debug_options().xla_cpu_multi_thread_eigen(); - const char* fn_name = multi_threaded_eigen - ? runtime::kEigenFftSymbolName - : runtime::kEigenSingleThreadedFftSymbolName; - + const char* fn_name = runtime::kEigenFftSymbolName; llvm::Function* fft_func = llvm::cast( module_->getOrInsertFunction(fn_name, fft_type)); fft_func->setCallingConv(llvm::CallingConv::C); diff --git a/tensorflow/compiler/xla/service/cpu/runtime_fft_impl.h b/tensorflow/compiler/xla/service/cpu/runtime_fft_impl.h index 0bf693edd0..984cb0616e 100644 --- a/tensorflow/compiler/xla/service/cpu/runtime_fft_impl.h +++ b/tensorflow/compiler/xla/service/cpu/runtime_fft_impl.h @@ -21,6 +21,8 @@ limitations under the License. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/framework/numeric_types.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/platform/types.h" // 'tensorflow' namespace is used so that int64 and other types don't require @@ -69,9 +71,11 @@ void EigenFftR2C(const EigenDevice& device, complex64* out, float* operand, in_dims[0] = input_batch; Eigen::DSizes out_dims; out_dims[0] = input_batch; + TensorShape temp_shape{input_batch}; for (int i = 0; i < FFTRank; i++) { in_dims[i + 1] = fft_shape[i]; out_dims[i + 1] = i == FFTRank - 1 ? fft_shape[i] / 2 + 1 : fft_shape[i]; + temp_shape.AddDim(fft_shape[i]); } const Eigen::TensorMap, Eigen::Aligned> @@ -84,8 +88,8 @@ void EigenFftR2C(const EigenDevice& device, complex64* out, float* operand, const auto axes = Eigen::ArrayXi::LinSpaced(FFTRank, 1, FFTRank); // Compute the full FFT using a temporary tensor. - Eigen::Tensor full_fft(in_dims); - + Tensor temp(DataTypeToEnum::v(), temp_shape); + auto full_fft = temp.flat_inner_dims(); const Eigen::DSizes zero_start_indices; full_fft.device(device) = input.template fft(axes); @@ -108,9 +112,11 @@ void EigenFftC2R(const EigenDevice& device, float* out, complex64* operand, in_dims[0] = input_batch; Eigen::DSizes out_dims; out_dims[0] = input_batch; + TensorShape temp_shape{input_batch}; for (int i = 0; i < FFTRank; i++) { in_dims[i + 1] = i == FFTRank - 1 ? fft_shape[i] / 2 + 1 : fft_shape[i]; out_dims[i + 1] = fft_shape[i]; + temp_shape.AddDim(fft_shape[i]); } const Eigen::TensorMap, Eigen::Aligned> @@ -123,7 +129,8 @@ void EigenFftC2R(const EigenDevice& device, float* out, complex64* operand, // region we will slice from input given fft_shape. We slice input to // fft_shape on its inner-most dimensions, except the last (which we // slice to fft_shape[-1] / 2 + 1). - Eigen::Tensor full_fft(out_dims); + Tensor temp(DataTypeToEnum::v(), temp_shape); + auto full_fft = temp.flat_inner_dims(); // Calculate the starting point and range of the source of // negative frequency part. @@ -172,6 +179,7 @@ template void EigenFftWithRank(const EigenDevice& device, void* out, void* operand, int32 fft_type, int64 input_batch, int64 fft_length0, int64 fft_length1, int64 fft_length2) { + CHECK(::xla::FftType_IsValid(fft_type)) << fft_type; switch (fft_type) { case ::xla::FftType::FFT: EigenFftC2C( @@ -196,8 +204,7 @@ void EigenFftWithRank(const EigenDevice& device, void* out, void* operand, input_batch, fft_length0, fft_length1, fft_length2); break; default: - // Unsupported FFT type - abort(); + LOG(FATAL) << "Unsupported FFT type: " << fft_type; } } @@ -223,8 +230,7 @@ void EigenFftImpl(const EigenDevice& device, void* out, void* operand, fft_length1, fft_length2); break; default: - // Unsupported FFT rank - abort(); + LOG(FATAL) << "Unsupported FFT rank " << fft_rank; } } diff --git a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.cc b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.cc deleted file mode 100644 index 2613ddb127..0000000000 --- a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.cc +++ /dev/null @@ -1,32 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.h" - -#include "tensorflow/compiler/xla/service/cpu/runtime_fft_impl.h" -#include "tensorflow/core/platform/dynamic_annotations.h" -#include "tensorflow/core/platform/types.h" - -using tensorflow::int32; -using tensorflow::int64; - -TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenSingleThreadedFft( - const void* run_options_ptr, void* out, void* operand, int32 fft_type, - int32 fft_rank, int64 input_batch, int64 fft_length0, int64 fft_length1, - int64 fft_length2) { - tensorflow::xla::EigenFftImpl(Eigen::DefaultDevice(), out, operand, fft_type, - fft_rank, input_batch, fft_length0, fft_length1, - fft_length2); -} diff --git a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.h b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.h deleted file mode 100644 index dcd133d012..0000000000 --- a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.h +++ /dev/null @@ -1,31 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_SINGLE_THREADED_FFT_H_ -#define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_SINGLE_THREADED_FFT_H_ - -#include "tensorflow/core/platform/types.h" - -extern "C" { - -extern void __xla_cpu_runtime_EigenSingleThreadedFft( - const void* /* xla::ExecutableRunOptions* */ run_options_ptr, void* out, - void* operand, tensorflow::int32 fft_type, tensorflow::int32 fft_rank, - tensorflow::int64 input_batch, tensorflow::int64 fft_length0, - tensorflow::int64 fft_length1, tensorflow::int64 fft_length2); - -} // extern "C" - -#endif // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_SINGLE_THREADED_FFT_H_ diff --git a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc index c4c90515ac..8d8c5e4c44 100644 --- a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc +++ b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc @@ -38,7 +38,6 @@ limitations under the License. #include "tensorflow/compiler/xla/service/cpu/runtime_matmul.h" #include "tensorflow/compiler/xla/service/cpu/runtime_matmul_mkl.h" #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.h" -#include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.h" #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.h" #include "tensorflow/compiler/xla/service/cpu/windows_compatibility.h" #include "tensorflow/compiler/xla/types.h" @@ -203,7 +202,6 @@ bool RegisterKnownJITSymbols() { REGISTER_CPU_RUNTIME_SYMBOL(MKLSingleThreadedMatMulF64); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF16); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF32); - REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedFft); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF16); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF32); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF64); diff --git a/tensorflow/compiler/xla/service/pattern_matcher.h b/tensorflow/compiler/xla/service/pattern_matcher.h index 2515222cf2..d3bc47e61e 100644 --- a/tensorflow/compiler/xla/service/pattern_matcher.h +++ b/tensorflow/compiler/xla/service/pattern_matcher.h @@ -204,7 +204,7 @@ class LayoutPattern { // Modifies the pattern to match only if the layout equals the given proto. // The layout must outlive the returned pattern. constexpr LayoutPattern> EqualTo( - const ::xla::Layout* layout) const { + const Layout* layout) const { return LayoutPattern>( LayoutPatternEqualImpl(impl_, layout), matched_layout_); } diff --git a/tensorflow/compiler/xla/service/tuple_simplifier.cc b/tensorflow/compiler/xla/service/tuple_simplifier.cc index 77bdcc9de0..e536c8afbf 100644 --- a/tensorflow/compiler/xla/service/tuple_simplifier.cc +++ b/tensorflow/compiler/xla/service/tuple_simplifier.cc @@ -30,17 +30,10 @@ limitations under the License. namespace xla { -TupleSimplifier::TupleSimplifier(bool exclude_entry_computation) : - exclude_entry_computation_(exclude_entry_computation) {} - StatusOr TupleSimplifier::Run(HloModule* module) { // Initially add all GTE and Tuple instructions to the worklist. std::queue worklist; for (auto* computation : module->computations()) { - if (exclude_entry_computation_ && - computation == module->entry_computation()) { - continue; - } for (auto* instruction : computation->instructions()) { if (instruction->opcode() == HloOpcode::kTuple || instruction->opcode() == HloOpcode::kGetTupleElement) { diff --git a/tensorflow/compiler/xla/service/tuple_simplifier.h b/tensorflow/compiler/xla/service/tuple_simplifier.h index 7509501883..e5e9b10b5b 100644 --- a/tensorflow/compiler/xla/service/tuple_simplifier.h +++ b/tensorflow/compiler/xla/service/tuple_simplifier.h @@ -27,20 +27,13 @@ namespace xla { // the module. class TupleSimplifier : public HloPassInterface { public: - TupleSimplifier() : TupleSimplifier(/*exclude_entry_computation=*/false) {} - explicit TupleSimplifier(bool exclude_entry_computation); + TupleSimplifier() {} ~TupleSimplifier() override {} tensorflow::StringPiece name() const override { return "tuple-simplifier"; } // Run tuple simplification on the given computation. Returns whether the // computation was changed. StatusOr Run(HloModule* module) override; - - private: - // When set, this pipeline stage will perform optimization of all computations - // apart from the module's entry computation. This is used by Graphcore's - // backend. - bool exclude_entry_computation_; }; } // namespace xla diff --git a/tensorflow/compiler/xla/service/tuple_simplifier_test.cc b/tensorflow/compiler/xla/service/tuple_simplifier_test.cc index d3635eae81..ca9ae91281 100644 --- a/tensorflow/compiler/xla/service/tuple_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/tuple_simplifier_test.cc @@ -42,12 +42,6 @@ class TupleSimplifierTest : public HloTestBase { TF_ASSERT_OK(changed_status.status()); EXPECT_EQ(change_expected, changed_status.ValueOrDie()); } - void Run(HloModule* module, bool change_expected, bool exclude_entry) { - TupleSimplifier simplifier(exclude_entry); - auto changed_status = simplifier.Run(module); - TF_ASSERT_OK(changed_status.status()); - EXPECT_EQ(change_expected, changed_status.ValueOrDie()); - } const Shape scalar_shape_ = ShapeUtil::MakeShape(F32, {}); const Shape tuple_shape_ = ShapeUtil::MakeTupleShape( @@ -217,76 +211,5 @@ TEST_F(TupleSimplifierTest, IncompatibleTuples) { EXPECT_THAT(computation->root_instruction(), tuple); } -TEST_F(TupleSimplifierTest, CanExcludeEntryComputation) { - // Verify that the root computation can be excluded - auto module = CreateNewModule(); - - HloInstruction* p0; - HloInstruction* p1; - HloComputation* c0; - HloComputation* c1; - HloComputation* entry; - - { - HloComputation::Builder builder(TestName() + "_1"); - p0 = builder.AddInstruction( - HloInstruction::CreateParameter(0, tuple_shape_, "param")); - HloInstruction* gte0 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(scalar_shape_, p0, 0)); - HloInstruction* gte1 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(scalar_shape_, p0, 1)); - HloInstruction* gte2 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(scalar_shape_, p0, 2)); - - builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1, gte2})); - - c0 = module->AddEmbeddedComputation(builder.Build()); - } - { - HloComputation::Builder builder(TestName() + "_2"); - p1 = builder.AddInstruction( - HloInstruction::CreateParameter(0, tuple_shape_, "param")); - HloInstruction* gte0 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(scalar_shape_, p1, 0)); - HloInstruction* gte1 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(scalar_shape_, p1, 1)); - HloInstruction* gte2 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(scalar_shape_, p1, 2)); - - builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1, gte2})); - - c1 = module->AddEmbeddedComputation(builder.Build()); - } - { - HloComputation::Builder builder(TestName() + "_Entry"); - HloInstruction* tuple_param = builder.AddInstruction( - HloInstruction::CreateParameter(0, tuple_shape_, "param")); - HloInstruction* call0 = builder.AddInstruction( - HloInstruction::CreateCall(tuple_shape_, {tuple_param}, c0)); - HloInstruction* call1 = builder.AddInstruction( - HloInstruction::CreateCall(tuple_shape_, {tuple_param}, c1)); - HloInstruction* gte0 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(scalar_shape_, call0, 0)); - HloInstruction* gte1 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(scalar_shape_, call1, 1)); - HloInstruction* tuple0 = - builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1})); - HloInstruction* gte2 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(scalar_shape_, tuple0, 0)); - HloInstruction* gte3 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(scalar_shape_, tuple0, 1)); - - builder.AddInstruction(HloInstruction::CreateTuple({gte2, gte3})); - - entry = module->AddEntryComputation(builder.Build()); - } - - Run(module.get(), /*change_expected=*/true, /*exclude_entry=*/ true); - - EXPECT_THAT(c0->root_instruction(), p0); - EXPECT_THAT(c1->root_instruction(), p1); - EXPECT_THAT(entry->instruction_count(), 9); -} - } // namespace } // namespace xla diff --git a/tensorflow/contrib/autograph/__init__.py b/tensorflow/contrib/autograph/__init__.py index dbdbad8f4c..637e49c082 100644 --- a/tensorflow/contrib/autograph/__init__.py +++ b/tensorflow/contrib/autograph/__init__.py @@ -23,7 +23,6 @@ from __future__ import print_function # TODO(mdan): Bring only the relevant symbols to the top level. from tensorflow.contrib.autograph import utils -from tensorflow.contrib.autograph import operators from tensorflow.contrib.autograph.impl.api import convert from tensorflow.contrib.autograph.impl.api import converted_call from tensorflow.contrib.autograph.impl.api import do_not_convert @@ -44,8 +43,6 @@ _allowed_symbols = [ 'do_not_convert', 'to_code', 'to_graph', - # Overloaded operators - 'operators', # Special functions and directives 'set_element_type', 'set_loop_options', diff --git a/tensorflow/contrib/cmake/tf_c.cmake b/tensorflow/contrib/cmake/tf_c.cmake index 2e0a2fcef4..bda5e26f43 100644 --- a/tensorflow/contrib/cmake/tf_c.cmake +++ b/tensorflow/contrib/cmake/tf_c.cmake @@ -37,15 +37,13 @@ add_dependencies( tf_core_lib tf_protos_cc) -if(tensorflow_BUILD_PYTHON_BINDINGS) - add_library(tf_c_python_api OBJECT - "${tensorflow_source_dir}/tensorflow/c/python_api.cc" - "${tensorflow_source_dir}/tensorflow/c/python_api.h" - ) - add_dependencies( - tf_c_python_api - tf_c - tf_core_lib - tf_core_framework - tf_protos_cc) -endif() +add_library(tf_c_python_api OBJECT + "${tensorflow_source_dir}/tensorflow/c/python_api.cc" + "${tensorflow_source_dir}/tensorflow/c/python_api.h" +) +add_dependencies( + tf_c_python_api + tf_c + tf_core_lib + tf_core_framework + tf_protos_cc) diff --git a/tensorflow/contrib/cmake/tf_cc_ops.cmake b/tensorflow/contrib/cmake/tf_cc_ops.cmake index 6c90cf398c..f73da0b8ab 100644 --- a/tensorflow/contrib/cmake/tf_cc_ops.cmake +++ b/tensorflow/contrib/cmake/tf_cc_ops.cmake @@ -155,7 +155,7 @@ if (WIN32) set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/pywrap_tensorflow_internal.lib") endif() else (WIN32) - set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/libpywrap_tensorflow_internal${CMAKE_SHARED_LIBRARY_SUFFIX}") + set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/libpywrap_tensorflow_internal.so") endif (WIN32) add_custom_target(tf_extension_ops) diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index 9244604489..a0c3ddd28b 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -715,7 +715,7 @@ if(WIN32) endif() else() add_custom_command(TARGET pywrap_tensorflow_internal POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/libpywrap_tensorflow_internal${CMAKE_SHARED_LIBRARY_SUFFIX} + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/libpywrap_tensorflow_internal.so ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/_pywrap_tensorflow_internal.so) endif() @@ -832,6 +832,7 @@ add_custom_command(TARGET tf_python_build_pip_package POST_BUILD add_custom_command(TARGET tf_python_copy_scripts_to_destination PRE_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${tensorflow_source_dir}/tensorflow/contrib/testing/python/framework/util_test.py ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/testing/python/framework/) + add_custom_command(TARGET tf_python_build_pip_package POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${tensorflow_source_dir}/tensorflow/tools/pip_package/README ${CMAKE_CURRENT_BINARY_DIR}/tf_python/) diff --git a/tensorflow/contrib/cmake/tools/create_def_file.py b/tensorflow/contrib/cmake/tools/create_def_file.py index 4f957f1e0b..cffe069aa3 100644 --- a/tensorflow/contrib/cmake/tools/create_def_file.py +++ b/tensorflow/contrib/cmake/tools/create_def_file.py @@ -44,8 +44,7 @@ UNDNAME = "undname.exe" DUMPBIN = "dumpbin.exe" # Exclude if matched -EXCLUDE_RE = re.compile(r"RTTI|deleting destructor|::internal::|Internal|" - r"python_op_gen_internal|grappler") +EXCLUDE_RE = re.compile(r"RTTI|deleting destructor|::internal::") # Include if matched before exclude INCLUDEPRE_RE = re.compile(r"google::protobuf::internal::ExplicitlyConstructed|" @@ -57,10 +56,6 @@ INCLUDEPRE_RE = re.compile(r"google::protobuf::internal::ExplicitlyConstructed|" r"tensorflow::ops::internal::Enter|" r"tensorflow::strings::internal::AppendPieces|" r"tensorflow::strings::internal::CatPieces|" - r"tensorflow::errors::Internal|" - r"tensorflow::Tensor::CopyFromInternal|" - r"tensorflow::kernel_factory::" - r"OpKernelRegistrar::InitInternal|" r"tensorflow::io::internal::JoinPathImpl") # Include if matched after exclude @@ -69,7 +64,7 @@ INCLUDE_RE = re.compile(r"^(TF_\w*)$|" r"tensorflow::|" r"functor::|" r"\?nsync_|" - r"stream_executor::") + r"perftools::gputools") # We want to identify data members explicitly in the DEF file, so that no one # can implicitly link against the DLL if they use one of the variables exported diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_bijector_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_bijector_test.py index 795f1993ba..45760a29ee 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_bijector_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_bijector_test.py @@ -151,24 +151,16 @@ class SinhArcsinhBijectorTest(test.TestCase): self.assertAllClose(y, bijector.forward(x).eval(), rtol=1e-4, atol=0.) self.assertAllClose(x, bijector.inverse(y).eval(), rtol=1e-4, atol=0.) - # On IBM PPC systems, longdouble (np.float128) is same as double except that it can have more precision. - # Type double being of 8 bytes, can't hold square of max of float64 (which is also 8 bytes) and - # below test fails due to overflow error giving inf. So this check avoids that error by skipping square - # calculation and corresponding assert. - - if np.amax(y) <= np.sqrt(np.finfo(np.float128).max) and \ - np.fabs(np.amin(y)) <= np.sqrt(np.fabs(np.finfo(np.float128).min)): - - # Do the numpy calculation in float128 to avoid inf/nan. - y_float128 = np.float128(y) - self.assertAllClose( - np.log(np.cosh( - np.arcsinh(y_float128) / tailweight - skewness) / np.sqrt( - y_float128**2 + 1)) - - np.log(tailweight), - bijector.inverse_log_det_jacobian(y, event_ndims=0).eval(), - rtol=1e-4, - atol=0.) + # Do the numpy calculation in float128 to avoid inf/nan. + y_float128 = np.float128(y) + self.assertAllClose( + np.log(np.cosh( + np.arcsinh(y_float128) / tailweight - skewness) / np.sqrt( + y_float128**2 + 1)) - + np.log(tailweight), + bijector.inverse_log_det_jacobian(y, event_ndims=0).eval(), + rtol=1e-4, + atol=0.) self.assertAllClose( -bijector.inverse_log_det_jacobian(y, event_ndims=0).eval(), bijector.forward_log_det_jacobian(x, event_ndims=0).eval(), diff --git a/tensorflow/contrib/eager/python/datasets.py b/tensorflow/contrib/eager/python/datasets.py index adf92c27ea..d7909dd5a2 100644 --- a/tensorflow/contrib/eager/python/datasets.py +++ b/tensorflow/contrib/eager/python/datasets.py @@ -106,8 +106,7 @@ class Iterator(iterator_ops.EagerIterator, checkpointable.CheckpointableBase): target_device=target, buffer_size=10, container="", - shared_name=_generate_shared_name( - "contrib_eager_iterator_function_buffer_resource")) + shared_name=_generate_shared_name("function_buffer_resource")) self._buffer_resource_deleter = resource_variable_ops.EagerResourceDeleter( # pylint: disable=line-too-long handle=self._buffer_resource_handle, handle_device=self._device) diff --git a/tensorflow/contrib/eager/python/examples/notebooks/4_high_level.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/4_high_level.ipynb index 5749f22ac5..4fe3a0e3f3 100644 --- a/tensorflow/contrib/eager/python/examples/notebooks/4_high_level.ipynb +++ b/tensorflow/contrib/eager/python/examples/notebooks/4_high_level.ipynb @@ -68,7 +68,7 @@ "# simply construct the object. Most layers take as a first argument the number\n", "# of output dimensions / channels.\n", "layer = tf.keras.layers.Dense(100)\n", - "# The number of input dimensions is often unnecessary, as it can be inferred\n", + "# The number of input dimensionss is often unnecessary, as it can be inferred\n", "# the first time the layer is used, but it can be provided if you want to \n", "# specify it manually, which is useful in some complex models.\n", "layer = tf.keras.layers.Dense(10, input_shape=(None, 5))" @@ -267,7 +267,7 @@ " * `build`, where you know the shapes of the input tensors and can do the rest of the initialization\n", " * `call`, where you do the forward computation\n", "\n", - "Note that you don't have to wait until `build` is called to create your variables, you can also create them in `__init__`. However, the advantage of creating them in `build` is that it enables late variable creation based on the shape of the inputs the layer will operate on. On the other hand, creating variables in `__init__` would mean that shapes required to create the variables will need to be explicitly specified." + "Note that you don't have to wait until `build` is called to create your variables, you can also create them in `__init__`. However, the advantage of creating them in `build` is that it enables late variable creation based on the shape of the inputs the layer will operate on. On the other hand, creating variables in `__init__` would mean that shapes requires to create the variables will need to be explicitly specified." ] }, { diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py index 05bcdac2ca..84a413c791 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py @@ -346,8 +346,7 @@ def sequence_numeric_column( key, shape=(1,), default_value=0., - dtype=dtypes.float32, - normalizer_fn=None): + dtype=dtypes.float32): """Returns a feature column that represents sequences of numeric data. Example: @@ -371,12 +370,6 @@ def sequence_numeric_column( default_value: A single value compatible with `dtype` that is used for padding the sparse data into a dense `Tensor`. dtype: The type of values. - normalizer_fn: If not `None`, a function that can be used to normalize the - value of the tensor after `default_value` is applied for parsing. - Normalizer function takes the input `Tensor` as its argument, and returns - the output `Tensor`. (e.g. lambda x: (x - 3.0) / 4.2). Please note that - even though the most common use case of this function is normalization, it - can be used for any kind of Tensorflow transformations. Returns: A `_SequenceNumericColumn`. @@ -390,16 +383,12 @@ def sequence_numeric_column( if not (dtype.is_integer or dtype.is_floating): raise ValueError('dtype must be convertible to float. ' 'dtype: {}, key: {}'.format(dtype, key)) - if normalizer_fn is not None and not callable(normalizer_fn): - raise TypeError( - 'normalizer_fn must be a callable. Given: {}'.format(normalizer_fn)) return _SequenceNumericColumn( key, shape=shape, default_value=default_value, - dtype=dtype, - normalizer_fn=normalizer_fn) + dtype=dtype) def _assert_all_equal_and_return(tensors, name=None): @@ -418,7 +407,7 @@ class _SequenceNumericColumn( fc._SequenceDenseColumn, collections.namedtuple( '_SequenceNumericColumn', - ['key', 'shape', 'default_value', 'dtype', 'normalizer_fn'])): + ['key', 'shape', 'default_value', 'dtype'])): """Represents sequences of numeric data.""" @property @@ -430,10 +419,7 @@ class _SequenceNumericColumn( return {self.key: parsing_ops.VarLenFeature(self.dtype)} def _transform_feature(self, inputs): - input_tensor = inputs.get(self.key) - if self.normalizer_fn is not None: - input_tensor = self.normalizer_fn(input_tensor) - return input_tensor + return inputs.get(self.key) @property def _variable_shape(self): diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py index 45d7b74046..ee74cf56dc 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py @@ -28,7 +28,6 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor -from tensorflow.python.ops import sparse_ops from tensorflow.python.platform import test from tensorflow.python.training import monitored_session @@ -948,7 +947,6 @@ class SequenceNumericColumnTest(test.TestCase): self.assertEqual((1,), a.shape) self.assertEqual(0., a.default_value) self.assertEqual(dtypes.float32, a.dtype) - self.assertIsNone(a.normalizer_fn) def test_shape_saved_as_tuple(self): a = sfc.sequence_numeric_column('aaa', shape=[1, 2]) @@ -967,10 +965,6 @@ class SequenceNumericColumnTest(test.TestCase): ValueError, 'dtype must be convertible to float'): sfc.sequence_numeric_column('aaa', dtype=dtypes.string) - def test_normalizer_fn_must_be_callable(self): - with self.assertRaisesRegexp(TypeError, 'must be a callable'): - sfc.sequence_numeric_column('aaa', normalizer_fn='NotACallable') - def test_get_sequence_dense_tensor(self): sparse_input = sparse_tensor.SparseTensorValue( # example 0, values [[0.], [1]] @@ -991,41 +985,6 @@ class SequenceNumericColumnTest(test.TestCase): self.assertAllEqual( expected_dense_tensor, dense_tensor.eval(session=sess)) - def test_get_sequence_dense_tensor_with_normalizer_fn(self): - - def _increment_two(input_sparse_tensor): - return sparse_ops.sparse_add( - input_sparse_tensor, - sparse_tensor.SparseTensor(((0, 0), (1, 1)), (2.0, 2.0), (2, 2)) - ) - - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, values [[0.], [1]] - # example 1, [[10.]] - indices=((0, 0), (0, 1), (1, 0)), - values=(0., 1., 10.), - dense_shape=(2, 2)) - - # Before _increment_two: - # [[0.], [1.]], - # [[10.], [0.]], - # After _increment_two: - # [[2.], [1.]], - # [[10.], [2.]], - expected_dense_tensor = [ - [[2.], [1.]], - [[10.], [2.]], - ] - numeric_column = sfc.sequence_numeric_column( - 'aaa', normalizer_fn=_increment_two) - - dense_tensor, _ = numeric_column._get_sequence_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) - - with monitored_session.MonitoredSession() as sess: - self.assertAllEqual( - expected_dense_tensor, dense_tensor.eval(session=sess)) - def test_get_sequence_dense_tensor_with_shape(self): """Tests get_sequence_dense_tensor with shape !=(1,).""" sparse_input = sparse_tensor.SparseTensorValue( diff --git a/tensorflow/contrib/ffmpeg/__init__.py b/tensorflow/contrib/ffmpeg/__init__.py index 484ffee3e7..daba965a98 100644 --- a/tensorflow/contrib/ffmpeg/__init__.py +++ b/tensorflow/contrib/ffmpeg/__init__.py @@ -28,6 +28,7 @@ from __future__ import print_function from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_audio from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_video from tensorflow.contrib.ffmpeg.ffmpeg_ops import encode_audio +from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_video from tensorflow.python.util.all_util import remove_undocumented diff --git a/tensorflow/contrib/ffmpeg/ffmpeg_ops.py b/tensorflow/contrib/ffmpeg/ffmpeg_ops.py index b1b5126d9e..020b5c99c6 100644 --- a/tensorflow/contrib/ffmpeg/ffmpeg_ops.py +++ b/tensorflow/contrib/ffmpeg/ffmpeg_ops.py @@ -21,6 +21,7 @@ from __future__ import print_function from tensorflow.contrib.ffmpeg.ops import gen_decode_audio_op_py from tensorflow.contrib.ffmpeg.ops import gen_decode_video_op_py from tensorflow.contrib.ffmpeg.ops import gen_encode_audio_op_py +from tensorflow.contrib.ffmpeg.ops import gen_decode_video_op_py from tensorflow.contrib.util import loader from tensorflow.python.framework import ops from tensorflow.python.platform import resource_loader diff --git a/tensorflow/contrib/framework/__init__.py b/tensorflow/contrib/framework/__init__.py index dc49383c5c..10d1ecc738 100644 --- a/tensorflow/contrib/framework/__init__.py +++ b/tensorflow/contrib/framework/__init__.py @@ -119,13 +119,14 @@ from tensorflow.python.framework.smart_cond import smart_cond from tensorflow.python.framework.smart_cond import smart_constant_value from tensorflow.python.framework.tensor_spec import BoundedTensorSpec from tensorflow.python.framework.tensor_spec import TensorSpec +from tensorflow.python.ops.array_ops import broadcast_to from tensorflow.python.ops.init_ops import convolutional_delta_orthogonal from tensorflow.python.ops.init_ops import convolutional_orthogonal_1d from tensorflow.python.ops.init_ops import convolutional_orthogonal_2d from tensorflow.python.ops.init_ops import convolutional_orthogonal_3d from tensorflow.python.util.all_util import remove_undocumented -_allowed_symbols = ['nest'] +_allowed_symbols = ['nest', 'broadcast_to'] _nest_allowed_symbols = [ 'assert_same_structure', 'is_sequence', diff --git a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py index a955e21b72..65cb94b5a4 100644 --- a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py +++ b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py @@ -301,8 +301,8 @@ class FusedConv2DBiasActivationTest(test.TestCase): conv = tensors[i] value = values[i] ref_value = ref_values[i] - tf_logging.info("expected = ", ref_value) - tf_logging.info("actual = ", value) + print("expected = ", ref_value) + print("actual = ", value) tol = 1e-5 if value.dtype == np.float16: tol = 1e-3 @@ -843,8 +843,7 @@ class FusedConvInt8Tests(test.TestCase): vertical_stride, padding_type) output_width = CalculateConvolvedOutputDim(input_width, filter_width, horizontal_stride, padding_type) - tf_logging.info("output_height=", output_height, ", output_width=", - output_width) + print("output_height=", output_height, ", output_width=", output_width) side_input, _, _ = gen_array_ops.quantize_v2( random_ops.random_uniform( @@ -881,8 +880,8 @@ class FusedConvInt8Tests(test.TestCase): with self.test_session( use_gpu=True, config=NoMemoryOptimizationConfig()) as sess: actual_y, expected_y = sess.run([actual, expected]) - tf_logging.info("actual_y = ", actual_y) - tf_logging.info("expected_y = ", expected_y) + print("actual_y = ", actual_y) + print("expected_y = ", expected_y) self.assertTrue(np.array_equal(actual_y, expected_y)) def testFusedConvInt8(self): diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_impl/hexagon_controller.c b/tensorflow/contrib/hvx/hexagon_controller/src_impl/hexagon_controller.c index 2e5c84704f..6a5d982dc8 100644 --- a/tensorflow/contrib/hvx/hexagon_controller/src_impl/hexagon_controller.c +++ b/tensorflow/contrib/hvx/hexagon_controller/src_impl/hexagon_controller.c @@ -19,7 +19,7 @@ limitations under the License. #include "hexagon_controller.h" -#include +#include #include #include "adspmsgd.h" diff --git a/tensorflow/contrib/lite/download_dependencies.sh b/tensorflow/contrib/lite/download_dependencies.sh index 840015a7fa..436c3e1d4c 100755 --- a/tensorflow/contrib/lite/download_dependencies.sh +++ b/tensorflow/contrib/lite/download_dependencies.sh @@ -30,7 +30,9 @@ if [ ! -f $BZL_FILE_PATH ]; then fi EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)" -GEMMLOWP_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" +# TODO (yongtang): Replace the following with 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' once +# the archive has been propagated in mirror.bazel.build. +GEMMLOWP_URL="$(grep -o 'https://github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz" ABSL_URL="$(grep -o 'https://github.com/abseil/abseil-cpp/.*tar.gz' "${BZL_FILE_PATH}" | head -n1)" NEON_2_SSE_URL="https://github.com/intel/ARM_NEON_2_x86_SSE/archive/master.zip" diff --git a/tensorflow/contrib/lite/examples/minimal/minimal.cc b/tensorflow/contrib/lite/examples/minimal/minimal.cc index 8b0ace96cc..106e3b0270 100644 --- a/tensorflow/contrib/lite/examples/minimal/minimal.cc +++ b/tensorflow/contrib/lite/examples/minimal/minimal.cc @@ -38,7 +38,7 @@ using namespace tflite; int main(int argc, char *argv[]) { if(argc != 2) { - fprintf(stderr, "minimal \n"); + fprintf(stderr, "Usage: %s \n"); return 1; } const char* filename = argv[1]; diff --git a/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md b/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md index 965273f0f0..bb2e615eac 100644 --- a/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md +++ b/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md @@ -128,6 +128,7 @@ TensorFlow operation not listed above are likely unsupported. Notably, the following common ops are not supported at the moment: * [tf.depth_to_space](https://www.tensorflow.org/api_docs/python/tf/depth_to_space) +* [tf.gather](https://www.tensorflow.org/api_docs/python/tf/gather) * [tf.image.resize_bilinear](https://www.tensorflow.org/api_docs/python/tf/image/resize_bilinear) * [tf.tanh](https://www.tensorflow.org/api_docs/python/tf/tanh) @@ -305,19 +306,6 @@ Options { } ``` -**GATHER** - -``` -Inputs { - 0: params tensor - 1: indices tensor - 2: axis tensor (optional) -} -Outputs { - 0: a tensor with same type as the params tensor. -} -``` - **GREATER** ``` diff --git a/tensorflow/contrib/lite/java/ovic/README.md b/tensorflow/contrib/lite/java/ovic/README.md index 26349347fa..5efa70987e 100644 --- a/tensorflow/contrib/lite/java/ovic/README.md +++ b/tensorflow/contrib/lite/java/ovic/README.md @@ -2,7 +2,7 @@ This folder contains building code for track one of the [Low Power ImageNet Recognition Challenge workshop at CVPR 2018.](https://rebootingcomputing.ieee.org/home/sitemap/14-lpirc/80-low-power-image-recognition-challenge-lpirc-2018) -## Pre-requisite +## Pre-requesits Follow the steps [here](https://www.tensorflow.org/mobile/tflite/demo_android) to install Tensorflow, Bazel, and the Android NDK and SDK. @@ -49,7 +49,7 @@ Once you have a submission that follows the instructions from the [competition s You can call the validator binary below to verify that your model fits the format requirements. This often helps you to catch size mismatches (e.g. output should be [1, 1001] instead of [1,1,1,1001]). Let say the submission file is located at `/path/to/my_model.lite`, then call: ```sh -bazel build --cxxopt=--std=c++11 //tensorflow/contrib/lite/java/ovic:ovic_validator --cxxopt=-Wno-all +bazel build --cxxopt--std=c++11 //tensorflow/contrib/lite/java/ovic:ovic_validator --cxxopt=-Wno-all bazel-bin/tensorflow/contrib/lite/java/ovic/ovic_validator /path/to/my_model.lite ``` diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index 1908f7fa6c..a2f192bbc2 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -1934,7 +1934,7 @@ inline void LstmCell(const float* input_data, const Dims<4>& input_dims, // The quantization of the input, output arrays is as follows: // - The input activations are quantized as uint8 on the interval // [-1, 127/128]. -// The rationale for that is that is the natural interval for output +// The rationale for that is that that is the natural interval for output // activations (see next point) and these need to be concatenated together. // We could accommodate different ranges by re-scaling, but we empirically // found that setting the input activations range to be [-1, 127/128] in the @@ -1999,7 +1999,7 @@ inline void LstmCell(const float* input_data, const Dims<4>& input_dims, // However, for a fixed-point implementation in 16-bit integers, using 5 // integer bits to represent the [-16, 16] range would leave only 11 // fractional bits, giving an increment of 2^-11 = 4.9e-4 between consecutive -// representable values. Notice that is higher than the +// representable values. Notice that that is higher than the // worst-case clamping error with clamping to [-8, 8]: 3.4e-4 for Logistic. // Using [-8, 8] thus seems like the better compromise overall, enjoying // an increment of 2.4e-4 between representable values and a worst-case diff --git a/tensorflow/contrib/lite/python/interpreter.py b/tensorflow/contrib/lite/python/interpreter.py index fd90823425..9400e757b9 100644 --- a/tensorflow/contrib/lite/python/interpreter.py +++ b/tensorflow/contrib/lite/python/interpreter.py @@ -55,7 +55,7 @@ class Interpreter(object): elif model_content and not model_path: self._interpreter = ( _interpreter_wrapper.InterpreterWrapper_CreateWrapperCPPFromBuffer( - model_content)) + model_content, len(model_content))) if not self._interpreter: raise ValueError( 'Failed to create model from {} bytes'.format(len(model_content))) diff --git a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc index b283551c45..f705551fcb 100644 --- a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc +++ b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc @@ -397,14 +397,9 @@ InterpreterWrapper* InterpreterWrapper::CreateWrapperCPPFromFile( } InterpreterWrapper* InterpreterWrapper::CreateWrapperCPPFromBuffer( - PyObject* data) { - char * buf = nullptr; - Py_ssize_t length; - if (PY_TO_CPPSTRING(data, &buf, &length) == -1) { - return nullptr; - } + const char* data, size_t len) { std::unique_ptr model = - tflite::FlatBufferModel::BuildFromBuffer(buf, length); + tflite::FlatBufferModel::BuildFromBuffer(data, len); return model ? new InterpreterWrapper(std::move(model)) : nullptr; } diff --git a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h index cbeb53bee7..b0ed7c4559 100644 --- a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h +++ b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h @@ -40,7 +40,8 @@ class InterpreterWrapper { static InterpreterWrapper* CreateWrapperCPPFromFile(const char* model_path); // SWIG caller takes ownership of pointer. - static InterpreterWrapper* CreateWrapperCPPFromBuffer(PyObject* data); + static InterpreterWrapper* CreateWrapperCPPFromBuffer(const char* data, + size_t len); ~InterpreterWrapper(); bool AllocateTensors(); diff --git a/tensorflow/contrib/lite/python/lite.py b/tensorflow/contrib/lite/python/lite.py index 88dda7290b..0913cd2c5c 100644 --- a/tensorflow/contrib/lite/python/lite.py +++ b/tensorflow/contrib/lite/python/lite.py @@ -34,8 +34,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from six import PY3 - from google.protobuf import text_format as _text_format from google.protobuf.message import DecodeError from tensorflow.contrib.lite.python import lite_constants as constants @@ -56,7 +54,6 @@ from tensorflow.python.framework.importer import import_graph_def from tensorflow.python.ops.variables import global_variables_initializer from tensorflow.python.saved_model import signature_constants from tensorflow.python.saved_model import tag_constants -# from tensorflow.python.util.all_util import remove_undocumented class TocoConverter(object): @@ -206,12 +203,6 @@ class TocoConverter(object): except (_text_format.ParseError, DecodeError): try: print("Ignore 'tcmalloc: large alloc' warnings.") - - if not isinstance(file_content, str): - if PY3: - file_content = file_content.decode('utf-8') - else: - file_content = file_content.encode('utf-8') _text_format.Merge(file_content, graph_def) except (_text_format.ParseError, DecodeError): raise ValueError( @@ -391,5 +382,3 @@ def _freeze_graph(sess, output_tensors): output_arrays) else: return sess.graph_def - -# remove_undocumented(__name__) diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc index 5c7fa09891..e33b430937 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc @@ -178,7 +178,7 @@ ArrayDataType ConvertDataType(tensorflow::DataType dtype) { else if (dtype == DT_STRING) return ArrayDataType::kString; else - LOG(INFO) << "Unsupported data type in placeholder op: " << dtype; + LOG(INFO) << "Unsupported data type in placehoder op: " << dtype; return ArrayDataType::kNone; } diff --git a/tensorflow/contrib/lite/toco/toco_port.cc b/tensorflow/contrib/lite/toco/toco_port.cc index de76fd4032..1b21c8bc60 100644 --- a/tensorflow/contrib/lite/toco/toco_port.cc +++ b/tensorflow/contrib/lite/toco/toco_port.cc @@ -20,12 +20,6 @@ limitations under the License. #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/logging.h" -#if defined(__ANDROID__) && defined(__ARM_ARCH_7A__) -namespace std { -double round(double x) { return ::round(x); } -} // namespace std -#endif - namespace toco { namespace port { void CopyToBuffer(const string& src, char* dest) { diff --git a/tensorflow/contrib/lite/toco/toco_port.h b/tensorflow/contrib/lite/toco/toco_port.h index 17f82b9dd7..5c019cb2bf 100644 --- a/tensorflow/contrib/lite/toco/toco_port.h +++ b/tensorflow/contrib/lite/toco/toco_port.h @@ -34,24 +34,6 @@ limitations under the License. #define TFLITE_PROTO_NS google::protobuf #endif -#ifdef __ANDROID__ -#include -namespace std { - -template -std::string to_string(T value) -{ - std::ostringstream os ; - os << value ; - return os.str() ; -} - -#ifdef __ARM_ARCH_7A__ -double round(double x); -#endif -} -#endif - namespace toco { namespace port { diff --git a/tensorflow/contrib/makefile/compile_nsync.sh b/tensorflow/contrib/makefile/compile_nsync.sh index a28fc3a87f..e8c6edd7ba 100755 --- a/tensorflow/contrib/makefile/compile_nsync.sh +++ b/tensorflow/contrib/makefile/compile_nsync.sh @@ -270,7 +270,7 @@ for arch in $archs; do PLATFORM_LDFLAGS=-pthread MKDEP=${CC} -M -std=c++11 PLATFORM_C=../../platform/c++11/src/nsync_semaphore_mutex.cc \ - ../../platform/posix/src/per_thread_waiter.c \ + ../../platform/c++11/src/per_thread_waiter.cc \ ../../platform/c++11/src/yield.cc \ ../../platform/c++11/src/time_rep_timespec.cc \ ../../platform/c++11/src/nsync_panic.cc diff --git a/tensorflow/contrib/makefile/download_dependencies.sh b/tensorflow/contrib/makefile/download_dependencies.sh index 48953e2e38..eff9081e35 100755 --- a/tensorflow/contrib/makefile/download_dependencies.sh +++ b/tensorflow/contrib/makefile/download_dependencies.sh @@ -27,7 +27,9 @@ if [ ! -f $BZL_FILE_PATH ]; then fi EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)" -GEMMLOWP_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" +# TODO (yongtang): Replace the following with 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' once +# the archive has been propagated in mirror.bazel.build. +GEMMLOWP_URL="$(grep -o 'https://github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz" NSYNC_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/nsync/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)" PROTOBUF_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/protobuf/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)" diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py index a6be2084aa..2ed99d50a4 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py @@ -2503,7 +2503,7 @@ def _compute_recall_at_precision(tp, fp, fn, precision, name): name: An optional variable_scope name. Returns: - The recall at a given `precision`. + The recall at a the given `precision`. """ precisions = math_ops.div(tp, tp + fp + _EPSILON) tf_index = math_ops.argmin( diff --git a/tensorflow/contrib/mpi_collectives/kernels/ring.h b/tensorflow/contrib/mpi_collectives/kernels/ring.h index c001615d3f..1d56d588bc 100644 --- a/tensorflow/contrib/mpi_collectives/kernels/ring.h +++ b/tensorflow/contrib/mpi_collectives/kernels/ring.h @@ -129,7 +129,7 @@ cudaStream_t CudaStreamForMPI(); * has the fully accumulated Segment 1; and so on. The scatter-reduce is * complete. * - * Next, the allgather distributes these fully accumulated chunks across all + * Next, the allgather distributes these fully accumululated chunks across all * nodes. Communication proceeds in the same ring, once again in N-1 steps. At * the ith step, node j will send chunk (j - i + 1) and receive chunk (j - i). * For example, at the first iteration, the following transfers will occur: diff --git a/tensorflow/contrib/opt/python/training/adamax_test.py b/tensorflow/contrib/opt/python/training/adamax_test.py index 915e6504e1..21bf3f5313 100644 --- a/tensorflow/contrib/opt/python/training/adamax_test.py +++ b/tensorflow/contrib/opt/python/training/adamax_test.py @@ -224,10 +224,8 @@ class AdaMaxOptimizerTest(test.TestCase): var1_np, m1, v1 = adamax_update_numpy(var1_np, grads1_np, t, m1, v1) # Validate updated params - self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0), - rtol=1e-2) - self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1), - rtol=1e-2) + self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) + self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) if use_resource: self.assertEqual("var0_%d/AdaMax:0" % (i,), opt.get_slot(var=var0, name="m").name) diff --git a/tensorflow/contrib/opt/python/training/model_average_optimizer.py b/tensorflow/contrib/opt/python/training/model_average_optimizer.py index b6b10e500b..a7c97a1da2 100644 --- a/tensorflow/contrib/opt/python/training/model_average_optimizer.py +++ b/tensorflow/contrib/opt/python/training/model_average_optimizer.py @@ -62,7 +62,7 @@ class ModelAverageCustomGetter(object): """ def __init__(self, worker_device): - """Create a new `ModelAverageCustomGetter`. + """Create a new `ElasticAverageCustomGetter`. Args: worker_device: String. Name of the `worker` job. diff --git a/tensorflow/contrib/periodic_resample/BUILD b/tensorflow/contrib/periodic_resample/BUILD index aad1ca04c5..6ca7fe8b6e 100644 --- a/tensorflow/contrib/periodic_resample/BUILD +++ b/tensorflow/contrib/periodic_resample/BUILD @@ -6,13 +6,12 @@ exports_files(["LICENSE"]) load( "//tensorflow:tensorflow.bzl", - "tf_cc_test", + "py_test", "tf_gen_op_libs", "tf_custom_op_library", "tf_custom_op_py_library", "tf_gen_op_wrapper_py", ) -load("//tensorflow:tensorflow.bzl", "py_test") cc_library( name = "all_ops", @@ -85,23 +84,6 @@ py_test( ":init_py", "//tensorflow/contrib/util:util_py", "//tensorflow/python:framework_test_lib", - "//tensorflow/python:gradient_checker", - ], -) - -tf_cc_test( - name = "periodic_resample_op_cc_test", - size = "small", - srcs = [ - "ops/array_ops_test.cc", - ], - deps = [ - ":all_ops", - "//tensorflow/core:framework", - "//tensorflow/core:protos_all_proto", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", ], ) diff --git a/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.cc b/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.cc index 514689cf45..e18923c8aa 100644 --- a/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.cc +++ b/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.cc @@ -22,9 +22,4 @@ namespace tensorflow { REGISTER_KERNEL_BUILDER(Name("PeriodicResample").Device(DEVICE_CPU), PeriodicResampleOp); - -REGISTER_KERNEL_BUILDER(Name("PeriodicResampleOpGrad") - .Device(DEVICE_CPU), - PeriodicResampleOpGrad); - } // namespace tensorflow diff --git a/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h b/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h index 42fba81a5c..3ab588c458 100644 --- a/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h +++ b/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h @@ -25,202 +25,92 @@ #include "tensorflow/core/framework/shape_inference.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/util/work_sharder.h" namespace { -// Computes input tensor index for given output index during forward -// propagation through periodic_resample operation. -class InputIndexer { - public: - InputIndexer(const std::vector& output_dimensions, - const tensorflow::TensorShape& input_shape, - int adjustable_dimension) - : output_dimensions_(output_dimensions), - adjustable_dimension_(adjustable_dimension), - rank_(input_shape.dims()), - linear_output_index_(0), - linear_input_index_(0), - adjustable_dimension_carriage_sum_(0) { - auto input_dimensions = TensorShapeToVector(input_shape); - // factors by which input_dimensions increases/decreases w.r.t. - // output_dimensions - dimension_ceiling_ = - ComputeDimensionCeiling(output_dimensions, input_dimensions); - cumulative_dimensions_ = ComputeCumulativeDimensions(); - - output_indices_.resize(output_dimensions_.size()); - input_indices_.resize(output_dimensions_.size()); - - // Compute index_factors - index_factors_.resize(rank_); - tensorflow::int64 last_index_factor = 1; - for (auto r = rank_ - 1; r >= 0; --r) { - index_factors_[r] = last_index_factor; - last_index_factor *= input_dimensions[r]; - } - } - - tensorflow::int64 linear_input_index() const { return linear_input_index_; } - - void MoveToOutputIndex(tensorflow::int64 output_index); - void IncrementOutputIndex(); - - private: - void RecomputeInputAdjustableDimensionIndex() { - tensorflow::int64 index = adjustable_dimension_carriage_sum_; - index *= output_dimensions_[adjustable_dimension_]; - index += output_indices_[adjustable_dimension_]; - input_indices_[adjustable_dimension_] = index; - } - - std::vector TensorShapeToVector( - const tensorflow::TensorShape& tensor_shape); - - std::vector ComputeDimensionCeiling( - const std::vector& output_dimensions, - const std::vector& input_dimensions); - - std::vector ComputeCumulativeDimensions(); - - const std::vector output_dimensions_; - std::vector dimension_ceiling_; - std::vector index_factors_; - std::vector cumulative_dimensions_; - std::vector output_indices_; - std::vector input_indices_; - - const int adjustable_dimension_; - const int rank_; - tensorflow::int64 linear_output_index_; - tensorflow::int64 linear_input_index_; - tensorflow::int64 adjustable_dimension_carriage_sum_; -}; - -void InputIndexer::MoveToOutputIndex(tensorflow::int64 output_index) { - linear_output_index_ = output_index; - linear_input_index_ = 0; +template +IndexT compute_input_index( + IndexVecT* target_dimensions, const IndexT& output_index, + const IndexVecT& original_dimensions, const int& adjustable_dimension, + const std::vector& dimension_ceiling, + const std::vector& cumulative_dimensions, IndexT* result, + std::vector* output_indices, const int& rank) { + *result = 0; + output_indices->clear(); // un-rasterize the output index auto last_reduced_i = output_index; - for (auto r = rank_ - 1; r >= 0; --r) { - output_indices_[r] = last_reduced_i % output_dimensions_[r]; + for (auto r = rank - 1; r >= 0; --r) { + (*output_indices)[r] = last_reduced_i % (*target_dimensions)[r]; last_reduced_i = - (last_reduced_i - output_indices_[r]) / output_dimensions_[r]; + (last_reduced_i - (*output_indices)[r]) / (*target_dimensions)[r]; } - tensorflow::int64 carriage_sum = 0; - for (int qi = 0; qi < rank_; ++qi) { - if (qi == adjustable_dimension_) continue; - carriage_sum += cumulative_dimensions_[qi] * - (output_indices_[qi] % dimension_ceiling_[qi]); - } - adjustable_dimension_carriage_sum_ = carriage_sum; - // rasterize the input index - for (auto r = rank_ - 1; r >= 0; --r) { - if (r != adjustable_dimension_) { - input_indices_[r] = output_indices_[r] / dimension_ceiling_[r]; - } else { - RecomputeInputAdjustableDimensionIndex(); - } - } - for (auto r = rank_ - 1; r >= 0; --r) { - linear_input_index_ += index_factors_[r] * input_indices_[r]; - } -} - -void InputIndexer::IncrementOutputIndex() { - linear_output_index_++; - for (auto r = rank_ - 1; r >= 0; --r) { - auto old_carriage_sum_increment = - cumulative_dimensions_[r] * - (output_indices_[r] % dimension_ceiling_[r]); - output_indices_[r] = (output_indices_[r] + 1) % output_dimensions_[r]; - if (r != adjustable_dimension_) { - auto new_input_index = output_indices_[r] / dimension_ceiling_[r]; - linear_input_index_ += - (new_input_index - input_indices_[r]) * index_factors_[r]; - - input_indices_[r] = new_input_index; - - auto new_carriage_sum_increment = - cumulative_dimensions_[r] * - (output_indices_[r] % dimension_ceiling_[r]); - - adjustable_dimension_carriage_sum_ = adjustable_dimension_carriage_sum_ - - old_carriage_sum_increment + - new_carriage_sum_increment; - } - - if (output_indices_[r] != 0) { - // No more carries to higher indices. - break; + IndexT last_index_factor = 1; + for (auto r = rank - 1; r >= 0; --r) { + IndexT index = 0; + if (r != adjustable_dimension) + index = (*output_indices)[r] / dimension_ceiling[r]; + else { + for (int qi = 0; qi < rank; ++qi) { + if (qi == adjustable_dimension) continue; + index += cumulative_dimensions[qi] * + ((*output_indices)[qi] % dimension_ceiling[qi]); + } + index *= (*target_dimensions)[adjustable_dimension]; + index += (*output_indices)[r]; } + *result += last_index_factor * index; + last_index_factor *= original_dimensions[r]; } - auto old_adjustable_dimension_input_index = - input_indices_[adjustable_dimension_]; - RecomputeInputAdjustableDimensionIndex(); - linear_input_index_ += (input_indices_[adjustable_dimension_] - - old_adjustable_dimension_input_index) * - index_factors_[adjustable_dimension_]; -} -std::vector InputIndexer::TensorShapeToVector( - const tensorflow::TensorShape& tensor_shape) { - std::vector result(tensor_shape.dims()); - int count = 0; - for (const auto dim_info : tensor_shape) { - result[count] = dim_info.size; - ++count; - } - return result; + return *result; } -std::vector InputIndexer::ComputeDimensionCeiling( - const std::vector& output_dimensions, - const std::vector& input_dimensions) { - std::vector dimension_ceiling(input_dimensions.size()); - for (size_t i = 0; i < input_dimensions.size(); ++i) { - dimension_ceiling[i] = (output_dimensions[i] + input_dimensions[i] - 1) / - input_dimensions[i]; - } - return dimension_ceiling; -} +template // both types are needed here b/c IndexVecT and + // InputDataT are not related + void + fill_periodic_tensor( + tensorflow::OpKernelContext* context, + const IndexVecT& desired_shape, + const tensorflow::Tensor& input_tensor) { + // input is a strided array (last index is fastest, C-ordered) + auto input = input_tensor.flat(); + const int rank = input_tensor.dims(); + // original and target dimensions + std::vector original_dimensions(rank), + target_dimensions(rank); + tensorflow::int64 total_size(input_tensor.NumElements()), new_sliced_size(1); + // factors by which original_dimensions increases/decreases w.r.t. + // target_dimensions + std::vector dimension_ceiling(rank), + cumulative_dimensions(rank); + // index of adjustable dimension + int adjustable_dimension; + tensorflow::TensorShape output_shape; -std::vector InputIndexer::ComputeCumulativeDimensions() { - std::vector cumulative_dimensions(rank_); - int count = 0; - for (int i = 0; i < rank_; ++i) { - if (count == 0) { - cumulative_dimensions[count] = 1; - } else { - cumulative_dimensions[count] = - cumulative_dimensions[count - 1] * dimension_ceiling_[count - 1]; - } - ++count; - } - return cumulative_dimensions; -} + // requires that the rank of the input tensor and length of the desired shape + // are equal + OP_REQUIRES(context, rank == desired_shape.size(), + tensorflow::errors::InvalidArgument( + "periodic_resample expects the rank of the input tensor, ", + rank, ", to be the same as the length of the desired shape, ", + desired_shape.size(), ".")); -template -void process_desired_shape(tensorflow::OpKernelContext* context, - const tensorflow::TensorShape& input_tensor_shape, - const IndexVecT& desired_shape, - int* adjustable_dimension, - std::vector* target_dimensions, - tensorflow::int64* output_size) { - tensorflow::int64 new_sliced_size = 1; bool found = false; - const int rank = input_tensor_shape.dims(); + const auto& input_tensor_shape = input_tensor.shape(); + for (int i = 0; i < rank; ++i) { + // if (desired_shape(i) < 1) { if (desired_shape[i] < 1) { // only one index can be adjustable OP_REQUIRES(context, !found, tensorflow::errors::InvalidArgument( "periodic_resample expects only " "one index to be marked as adjustable.")); - *adjustable_dimension = i; + adjustable_dimension = i; found = true; } else { OP_REQUIRES( @@ -232,8 +122,9 @@ void process_desired_shape(tensorflow::OpKernelContext* context, i, " input tensor has size ", input_tensor_shape.dim_size(i), ", desired shape has size ", desired_shape[i], ".")); - (*target_dimensions)[i] = desired_shape[i]; - new_sliced_size *= (*target_dimensions)[i]; + // target_dimensions[i] = desired_shape(i); + target_dimensions[i] = desired_shape[i]; + new_sliced_size *= target_dimensions[i]; } } // at least one index needs to be adjustable @@ -241,50 +132,26 @@ void process_desired_shape(tensorflow::OpKernelContext* context, tensorflow::errors::InvalidArgument( "periodic_resample expects at least " "one index to be marked as adjustable.")); - (*target_dimensions)[*adjustable_dimension] = - input_tensor_shape.num_elements() / new_sliced_size; - - *output_size = new_sliced_size * (*target_dimensions)[*adjustable_dimension]; -} - -// Heuristic number based on measurements on -// Intel(R) Core(TM) i7-4930K CPU @ 3.40GHz -const tensorflow::int64 costPerFillIndex = 35; -enum class Mode { - kForward, - kGradient -}; - -// Computes either periodic_resample operation output or gradients for it, -// depending on |mode|. -// |original_shape| is always shape of input to periodic_resample operation. -// |source_tensor| is either source for periodic_resample (for forward mode) -// or gradients tensor. -// |desired_shape| is always shape, provided by user, to which forward -// propagation attempts resample input tensor. -template -void -do_periodic_resample_op(tensorflow::OpKernelContext* context, - const tensorflow::TensorShape& original_shape, - const tensorflow::PartialTensorShape& desired_shape, - const tensorflow::Tensor& source_tensor) { - const int rank = source_tensor.dims(); + int count = 0; + for (const auto dim_info : input_tensor.shape()) { + original_dimensions[count] = dim_info.size; + ++count; + } - // requires that the rank of the input tensor and length of the desired shape - // are equal - OP_REQUIRES(context, rank == desired_shape.dims(), - tensorflow::errors::InvalidArgument( - "periodic_resample expects the rank of the input tensor, ", - rank, ", to be the same as the length of the desired shape, ", - desired_shape.dims(), ".")); + target_dimensions[adjustable_dimension] = total_size / new_sliced_size; - std::vector target_dimensions(rank); - tensorflow::int64 new_size = 0; - // index of adjustable dimension - int adjustable_dimension = 0; - process_desired_shape(context, original_shape, desired_shape.dim_sizes(), - &adjustable_dimension, &target_dimensions, &new_size); + count = 0; + for (int i = 0; i < input_tensor.shape().dims(); ++i) { + dimension_ceiling[count] = tensorflow::int64(std::ceil( + float(target_dimensions[count]) / float(original_dimensions[count]))); + if (count == 0) + cumulative_dimensions[count] = 1; + else + cumulative_dimensions[count] = + cumulative_dimensions[count - 1] * dimension_ceiling[count - 1]; + ++count; + } // ensure that the new dimension is greater than zero OP_REQUIRES(context, target_dimensions[adjustable_dimension] > 0, @@ -293,14 +160,11 @@ do_periodic_resample_op(tensorflow::OpKernelContext* context, "adjustable dimension, ", adjustable_dimension, ", isn't greater than zero, ", target_dimensions[adjustable_dimension], ".")); - tensorflow::TensorShape output_shape; - if (mode == Mode::kForward) { - for (int i = 0; i < rank; ++i) { - output_shape.AddDim(target_dimensions[i]); - } - } else { - output_shape = original_shape; + for (int i = 0; i < rank; ++i) { + output_shape.AddDim(target_dimensions[i]); } + const auto new_size = + new_sliced_size * target_dimensions[adjustable_dimension]; // Create an output tensor and attach it to the current context tensorflow::Tensor* output_tensor = nullptr; @@ -308,73 +172,47 @@ do_periodic_resample_op(tensorflow::OpKernelContext* context, context->allocate_output(0, output_shape, &output_tensor)); auto output = output_tensor->flat(); - // input is a strided array (last index is fastest, C-ordered) - auto input = source_tensor.flat(); + // memory is allocated for these variables outside the inner loop for + // efficiency (although, I could create a separate class scope for + // this purpose instead) + tensorflow::int64 result = 0; + std::vector output_indices(target_dimensions.size()); // Fill output tensor with periodically resampled input tensor values - InputIndexer input_indexer(target_dimensions, original_shape, - adjustable_dimension); - - auto worker_threads = *(context->device()->tensorflow_cpu_worker_threads()); - auto fill_output_tensor = [&input_indexer, &output, &input]( - tensorflow::int64 start, tensorflow::int64 limit) { - InputIndexer local_indexer(input_indexer); - local_indexer.MoveToOutputIndex(start); - for (tensorflow::int64 output_index = start; output_index < limit; - ++output_index) { - if (mode == Mode::kForward) { - output(output_index) = input(local_indexer.linear_input_index()); - } else { - output(local_indexer.linear_input_index()) = input(output_index); - } - local_indexer.IncrementOutputIndex(); - } - }; - ::tensorflow::Shard(worker_threads.num_threads, worker_threads.workers, - new_size, costPerFillIndex, fill_output_tensor); -} - -#define DATA_TYPE_SWITCH(data_type, context, CASE) \ - switch (data_type) { \ - CASE(float) \ - CASE(double) \ - CASE(tensorflow::int32) \ - CASE(tensorflow::int64) \ - default: \ - context->CtxFailure(__FILE__, __LINE__, \ - tensorflow::errors::InvalidArgument( \ - "Unsuppored tensor elements type")); \ - break; \ + for (tensorflow::int64 output_index = 0; output_index < new_size; + ++output_index) { + output(output_index) = input(compute_input_index( + &target_dimensions, output_index, original_dimensions, + adjustable_dimension, dimension_ceiling, cumulative_dimensions, &result, + &output_indices, rank)); } +} void create_output_tensor( tensorflow::OpKernelContext* context, const tensorflow::Tensor& input_tensor, const tensorflow::DataType& input_tensor_type, - const tensorflow::PartialTensorShape& desired_shape) { -#define CASE(type) \ - case tensorflow::DataTypeToEnum::value: \ - do_periodic_resample_op( \ - context, input_tensor.shape(), desired_shape, input_tensor); \ - break; + const tensorflow::PartialTensorShape& desired_shape_tensor) { + auto desired_shape = desired_shape_tensor.dim_sizes(); - DATA_TYPE_SWITCH(input_tensor_type, context, CASE); -#undef CASE -} - -void create_grad_tensor(tensorflow::OpKernelContext* context, - const tensorflow::Tensor& grad_tensor, - const tensorflow::DataType& grad_tensor_type, - const tensorflow::TensorShape& original_shape, - const tensorflow::PartialTensorShape& desired_shape) { -#define CASE(type) \ - case tensorflow::DataTypeToEnum::value: \ - do_periodic_resample_op( \ - context, original_shape, desired_shape, grad_tensor); \ + // obligatory type switch + switch (input_tensor_type) { + case tensorflow::DataTypeToEnum::value: + fill_periodic_tensor(context, desired_shape, input_tensor); break; - - DATA_TYPE_SWITCH(grad_tensor_type, context, CASE); -#undef CASE + case tensorflow::DataTypeToEnum::value: + fill_periodic_tensor(context, desired_shape, input_tensor); + break; + case tensorflow::DataTypeToEnum::value: + fill_periodic_tensor(context, desired_shape, + input_tensor); + break; + case tensorflow::DataTypeToEnum::value: + fill_periodic_tensor(context, desired_shape, + input_tensor); + break; + default:; + } } } // namespace @@ -400,25 +238,4 @@ class PeriodicResampleOp : public tensorflow::OpKernel { tensorflow::PartialTensorShape desired_shape; }; -class PeriodicResampleOpGrad : public tensorflow::OpKernel { - public: - explicit PeriodicResampleOpGrad(tensorflow::OpKernelConstruction* context) - : tensorflow::OpKernel(context) { - OP_REQUIRES_OK(context, - context->GetAttr("original_shape", &original_shape)); - OP_REQUIRES_OK(context, context->GetAttr("desired_shape", &desired_shape)); - } - - void Compute(tensorflow::OpKernelContext* context) override { - const tensorflow::Tensor& grad_tensor = context->input(0); - const tensorflow::DataType grad_tensor_type = context->input_dtype(0); - create_grad_tensor(context, grad_tensor, grad_tensor_type, original_shape, - desired_shape); - } - - private: - tensorflow::TensorShape original_shape; - tensorflow::PartialTensorShape desired_shape; -}; - #endif // TENSORFLOW_KERNELS_PERIODICRESAMPLE_OP_H_ diff --git a/tensorflow/contrib/periodic_resample/ops/array_ops.cc b/tensorflow/contrib/periodic_resample/ops/array_ops.cc index fd38cd09b4..82bd796956 100644 --- a/tensorflow/contrib/periodic_resample/ops/array_ops.cc +++ b/tensorflow/contrib/periodic_resample/ops/array_ops.cc @@ -26,42 +26,7 @@ REGISTER_OP("PeriodicResample") .Input("values: T") .Attr("shape: shape") .Output("output: T") - .SetShapeFn([](shape_inference::InferenceContext* c) { - tensorflow::PartialTensorShape desired_shape; - TF_RETURN_IF_ERROR(c->GetAttr("shape", &desired_shape)); - shape_inference::ShapeHandle input_tensor_shape = c->input(0); - shape_inference::DimensionHandle num_input_elements = - c->NumElements(input_tensor_shape); - shape_inference::ShapeHandle result_shape_handle; - if (!shape_inference::InferenceContext::ValueKnown(num_input_elements)) { - TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape( - desired_shape, &result_shape_handle)); - } else { - const int rank = c->Rank(input_tensor_shape); - std::vector target_dimensions(rank); - tensorflow::int64 new_sliced_size = 1; - int adjustable_dimension = 0; - for (int i = 0; i < rank; ++i) { - if (desired_shape.dim_size(i) < 1) { - adjustable_dimension = i; - } else { - target_dimensions[i] = desired_shape.dim_size(i); - new_sliced_size *= target_dimensions[i]; - } - } - target_dimensions[adjustable_dimension] = - shape_inference::InferenceContext::Value( - num_input_elements) / new_sliced_size; - tensorflow::TensorShape result_shape; - for (int i = 0; i < rank; ++i) { - result_shape.AddDim(target_dimensions[i]); - } - TF_RETURN_IF_ERROR(c->MakeShapeFromTensorShape( - result_shape, &result_shape_handle)); - } - c->set_output(0, result_shape_handle); - return Status::OK(); - }) + .SetShapeFn(shape_inference::ExplicitShape) .Doc(R"doc( Periodically resample elements of a tensor to conform to `shape`. @@ -136,20 +101,4 @@ output: Periodically resampled tensor that has dimensions specified as in )doc"); - -REGISTER_OP("PeriodicResampleOpGrad") - .Attr("T: numbertype") - .Input("grad: T") - .Attr("original_shape: shape") - .Attr("desired_shape: shape") - .Output("grad_values: T") - .SetShapeFn([](shape_inference::InferenceContext* c) { - tensorflow::TensorShape original_shape; - TF_RETURN_IF_ERROR(c->GetAttr("original_shape", &original_shape)); - shape_inference::ShapeHandle s; - TF_RETURN_IF_ERROR(c->MakeShapeFromTensorShape(original_shape, &s)); - c->set_output(0, s); - return Status::OK(); -}); - } // namespace tensorflow diff --git a/tensorflow/contrib/periodic_resample/ops/array_ops_test.cc b/tensorflow/contrib/periodic_resample/ops/array_ops_test.cc deleted file mode 100644 index 43b7c1799f..0000000000 --- a/tensorflow/contrib/periodic_resample/ops/array_ops_test.cc +++ /dev/null @@ -1,41 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/core/framework/node_def_builder.h" -#include "tensorflow/core/framework/shape_inference_testutil.h" -#include "tensorflow/core/framework/tensor_shape.pb.h" -#include "tensorflow/core/framework/tensor_testutil.h" -#include "tensorflow/core/lib/core/status_test_util.h" -#include "tensorflow/core/platform/test.h" - -namespace tensorflow { - -TEST(ArrayOpsTest, PeriodicResample_ShapeFn) { - ShapeInferenceTestOp op("PeriodicResample"); - // Case 1: output shape can be fully inferreed. - PartialTensorShape shape({4, 4, -1}); - TensorShapeProto shape_proto; - shape.AsProto(&shape_proto); - - TF_ASSERT_OK(NodeDefBuilder("test", "PeriodicResample") - .Input({"values", 0, DT_INT32}) - .Attr("shape", shape_proto) - .Finalize(&op.node_def)); - INFER_OK(op, "[2,2,4]", "[4,4,1]"); - // Case 2: output shape can not be inferred - report desired shape. - INFER_OK(op, "[2,2,?]", "[4,4,?]"); -} - -} // end namespace tensorflow diff --git a/tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py b/tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py index 31a6fe1d94..a25de55e18 100644 --- a/tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py +++ b/tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py @@ -21,11 +21,8 @@ from __future__ import print_function import numpy from tensorflow.contrib.periodic_resample import periodic_resample -from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors_impl from tensorflow.python.framework import test_util -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import variables from tensorflow.python.platform import googletest @@ -96,6 +93,7 @@ class PeriodicResampleTest(test_util.TensorFlowTestCase): def testPeriodicResampleErrors(self): input_tensor = numpy.zeros(shape=[1, 2, 2, 4]) with self.test_session(): + variables.global_variables_initializer().run() with self.assertRaisesWithPredicateMatch( errors_impl.InvalidArgumentError, 'Dimension 3 input tensor has size 4, desired shape has size 1'): @@ -105,29 +103,6 @@ class PeriodicResampleTest(test_util.TensorFlowTestCase): '4, to be the same as the length of the desired shape, 3'): periodic_resample(input_tensor, [None, 4, 4]).eval() - def testPeriodicResampleGradient(self): - desired_shape = numpy.array([4, 4, None]) - result_shape = (4, 4, 1) - input_shape = (2, 2, 4) - with self.test_session() as sess: - x = array_ops.placeholder(dtypes.float32, shape=input_shape) - output = periodic_resample(x, desired_shape) - error = gradient_checker.compute_gradient_error( - x, input_shape, output, result_shape) - self.assertLess(error, 1e-4) - - def testPeriodicResampleShapeInference(self): - with self.test_session() as sess: - # Case 1: output shape can be fully inferreed. - x = array_ops.placeholder(dtypes.float32, shape=(2, 2, 4)) - output = periodic_resample(x, [4, 4, None]) - self.assertEqual(output.shape, [4, 4, 1]) - # Case 2: output shape can not be inferred - report desired shape. - x = array_ops.placeholder(dtypes.float32, shape=(2, 2, None)) - output = periodic_resample(x, [4, 4, None]) - self.assertTrue(output.shape.is_compatible_with([4, 4, None])) - self.assertEqual(output.shape[2].value, None) - if __name__ == '__main__': googletest.main() diff --git a/tensorflow/contrib/periodic_resample/python/ops/periodic_resample_op.py b/tensorflow/contrib/periodic_resample/python/ops/periodic_resample_op.py index 470e300ccb..348623d8f8 100644 --- a/tensorflow/contrib/periodic_resample/python/ops/periodic_resample_op.py +++ b/tensorflow/contrib/periodic_resample/python/ops/periodic_resample_op.py @@ -21,17 +21,11 @@ from __future__ import print_function # pylint: disable=unused-import from tensorflow.contrib.periodic_resample.python.ops import gen_periodic_resample_op -from tensorflow.contrib.periodic_resample.python.ops.gen_periodic_resample_op import periodic_resample, periodic_resample_op_grad +from tensorflow.contrib.periodic_resample.python.ops.gen_periodic_resample_op import periodic_resample from tensorflow.contrib.util import loader -from tensorflow.python.framework import ops from tensorflow.python.platform import resource_loader # pylint: enable=unused-import _periodic_resample_op = loader.load_op_library( resource_loader.get_path_to_datafile('_periodic_resample_op.so')) - -@ops.RegisterGradient("PeriodicResample") -def _periodic_resample_grad_cc(op, grad): - return periodic_resample_op_grad( - grad, op.inputs[0].shape, op.get_attr('shape')) diff --git a/tensorflow/contrib/predictor/contrib_estimator_predictor.py b/tensorflow/contrib/predictor/contrib_estimator_predictor.py index af3b2ad1b5..b7a98c68e2 100644 --- a/tensorflow/contrib/predictor/contrib_estimator_predictor.py +++ b/tensorflow/contrib/predictor/contrib_estimator_predictor.py @@ -34,8 +34,7 @@ class ContribEstimatorPredictor(predictor.Predictor): prediction_input_fn, input_alternative_key=None, output_alternative_key=None, - graph=None, - config=None): + graph=None): """Initialize a `ContribEstimatorPredictor`. Args: @@ -49,7 +48,6 @@ class ContribEstimatorPredictor(predictor.Predictor): multi-headed models. graph: Optional. The Tensorflow `graph` in which prediction should be done. - config: `ConfigProto` proto used to configure the session. """ self._graph = graph or ops.Graph() with self._graph.as_default(): @@ -60,7 +58,6 @@ class ContribEstimatorPredictor(predictor.Predictor): checkpoint_path = saver.latest_checkpoint(estimator.model_dir) self._session = monitored_session.MonitoredSession( session_creator=monitored_session.ChiefSessionCreator( - config=config, checkpoint_filename_with_path=checkpoint_path)) input_alternative_key = ( diff --git a/tensorflow/contrib/predictor/core_estimator_predictor.py b/tensorflow/contrib/predictor/core_estimator_predictor.py index a725072e72..d78d94c269 100644 --- a/tensorflow/contrib/predictor/core_estimator_predictor.py +++ b/tensorflow/contrib/predictor/core_estimator_predictor.py @@ -51,8 +51,7 @@ class CoreEstimatorPredictor(predictor.Predictor): estimator, serving_input_receiver_fn, output_key=None, - graph=None, - config=None): + graph=None): """Initialize a `CoreEstimatorPredictor`. Args: @@ -63,7 +62,6 @@ class CoreEstimatorPredictor(predictor.Predictor): `None`, then `DEFAULT_SERVING_SIGNATURE_DEF_KEY` is used. graph: Optional. The Tensorflow `graph` in which prediction should be done. - config: `ConfigProto` proto used to configure the session. """ self._graph = graph or ops.Graph() with self._graph.as_default(): @@ -73,7 +71,6 @@ class CoreEstimatorPredictor(predictor.Predictor): checkpoint_dir = estimator.model_dir self._session = monitored_session.MonitoredSession( session_creator=monitored_session.ChiefSessionCreator( - config=config, checkpoint_dir=checkpoint_dir)) feed_tensor_info = signature_def.inputs diff --git a/tensorflow/contrib/predictor/predictor_factories.py b/tensorflow/contrib/predictor/predictor_factories.py index f275bc15ad..6e77e934fe 100644 --- a/tensorflow/contrib/predictor/predictor_factories.py +++ b/tensorflow/contrib/predictor/predictor_factories.py @@ -30,8 +30,7 @@ def from_contrib_estimator(estimator, prediction_input_fn, input_alternative_key=None, output_alternative_key=None, - graph=None, - config=None): + graph=None): """Constructs a `Predictor` from a `tf.contrib.learn.Estimator`. Args: @@ -45,7 +44,6 @@ def from_contrib_estimator(estimator, multi-headed models. graph: Optional. The Tensorflow `graph` in which prediction should be done. - config: `ConfigProto` proto used to configure the session. Returns: An initialized `Predictor`. @@ -64,15 +62,13 @@ def from_contrib_estimator(estimator, prediction_input_fn, input_alternative_key=input_alternative_key, output_alternative_key=output_alternative_key, - graph=graph, - config=config) + graph=graph) def from_estimator(estimator, serving_input_receiver_fn, output_key=None, - graph=None, - config=None): + graph=None): """Constructs a `Predictor` from a `tf.python.estimator.Estimator`. Args: @@ -83,7 +79,6 @@ def from_estimator(estimator, `None`, then `DEFAULT_SERVING_SIGNATURE_DEF_KEY` is used. graph: Optional. The Tensorflow `graph` in which prediction should be done. - config: `ConfigProto` proto used to configure the session. Returns: An initialized `Predictor`. @@ -98,19 +93,14 @@ def from_estimator(estimator, 'tf.contrib.learn.Estimator. You likely want to call ' 'from_contrib_estimator.') return core_estimator_predictor.CoreEstimatorPredictor( - estimator, - serving_input_receiver_fn, - output_key=output_key, - graph=graph, - config=config) + estimator, serving_input_receiver_fn, output_key=output_key, graph=graph) def from_saved_model(export_dir, signature_def_key=None, signature_def=None, tags=None, - graph=None, - config=None): + graph=None): """Constructs a `Predictor` from a `SavedModel` on disk. Args: @@ -125,7 +115,6 @@ def from_saved_model(export_dir, `SignatureDef`. Defaults to `DEFAULT_TAGS`. graph: Optional. The Tensorflow `graph` in which prediction should be done. - config: `ConfigProto` proto used to configure the session. Returns: An initialized `Predictor`. @@ -139,5 +128,4 @@ def from_saved_model(export_dir, signature_def_key=signature_def_key, signature_def=signature_def, tags=tags, - graph=graph, - config=config) + graph=graph) diff --git a/tensorflow/contrib/predictor/predictor_factories_test.py b/tensorflow/contrib/predictor/predictor_factories_test.py index a2ef1dc3af..578d9424b2 100644 --- a/tensorflow/contrib/predictor/predictor_factories_test.py +++ b/tensorflow/contrib/predictor/predictor_factories_test.py @@ -20,7 +20,6 @@ from __future__ import print_function from tensorflow.contrib.predictor import predictor_factories from tensorflow.contrib.predictor import testing_common -from tensorflow.core.protobuf import config_pb2 from tensorflow.python.platform import test MODEL_DIR_NAME = 'contrib/predictor/test_export_dir' @@ -42,11 +41,6 @@ class PredictorFactoriesTest(test.TestCase): """Test loading from_saved_model with tags.""" predictor_factories.from_saved_model(self._export_dir, tags='serve') - def testFromSavedModelWithSessionConfig(self): - """Test loading from_saved_model with session config.""" - predictor_factories.from_saved_model( - self._export_dir, config=config_pb2.ConfigProto()) - def testFromSavedModelWithBadTags(self): """Test that loading fails for bad tags.""" bad_tags_regex = ('.*? could not be found in SavedModel') @@ -59,13 +53,6 @@ class PredictorFactoriesTest(test.TestCase): predictor_factories.from_contrib_estimator( estimator, input_fn, output_alternative_key='sum') - def testFromContribEstimatorWithSessionConfig(self): - estimator = testing_common.get_arithmetic_estimator(core=False) - input_fn = testing_common.get_arithmetic_input_fn(core=False) - predictor_factories.from_contrib_estimator( - estimator, input_fn, output_alternative_key='sum', - config=config_pb2.ConfigProto()) - def testFromContribEstimatorWithCoreEstimatorRaises(self): estimator = testing_common.get_arithmetic_estimator(core=True) input_fn = testing_common.get_arithmetic_input_fn(core=True) @@ -77,12 +64,6 @@ class PredictorFactoriesTest(test.TestCase): input_fn = testing_common.get_arithmetic_input_fn(core=True) predictor_factories.from_estimator(estimator, input_fn) - def testFromCoreEstimatorWithSessionConfig(self): - estimator = testing_common.get_arithmetic_estimator(core=True) - input_fn = testing_common.get_arithmetic_input_fn(core=True) - predictor_factories.from_estimator( - estimator, input_fn, config=config_pb2.ConfigProto()) - def testFromCoreEstimatorWithContribEstimatorRaises(self): estimator = testing_common.get_arithmetic_estimator(core=False) input_fn = testing_common.get_arithmetic_input_fn(core=False) diff --git a/tensorflow/contrib/predictor/saved_model_predictor.py b/tensorflow/contrib/predictor/saved_model_predictor.py index 95da6d04ed..0dbca0f813 100644 --- a/tensorflow/contrib/predictor/saved_model_predictor.py +++ b/tensorflow/contrib/predictor/saved_model_predictor.py @@ -121,8 +121,7 @@ class SavedModelPredictor(predictor.Predictor): input_names=None, output_names=None, tags=None, - graph=None, - config=None): + graph=None): """Initialize a `CoreEstimatorPredictor`. Args: @@ -143,7 +142,6 @@ class SavedModelPredictor(predictor.Predictor): the correct `SignatureDef`. Defaults to `DEFAULT_TAGS`. graph: Optional. The Tensorflow `graph` in which prediction should be done. - config: `ConfigProto` proto used to configure the session. Raises: ValueError: If more than one of signature_def_key OR signature_def OR (input_names AND output_names) is specified. @@ -154,7 +152,7 @@ class SavedModelPredictor(predictor.Predictor): self._graph = graph or ops.Graph() with self._graph.as_default(): - self._session = session.Session(config=config) + self._session = session.Session() loader.load(self._session, tags.split(','), export_dir) if input_names is None: diff --git a/tensorflow/contrib/quantize/README.md b/tensorflow/contrib/quantize/README.md index 27a933c0f9..c83623ec94 100644 --- a/tensorflow/contrib/quantize/README.md +++ b/tensorflow/contrib/quantize/README.md @@ -6,7 +6,7 @@ inference. The details of the transformation implemented in this package is described here [1]. This is done using the -[fake quantization op](https://www.tensorflow.org/api_guides/python/array_ops#Fake_quantization). +[fake quantization op](https://www.tensorflow.org/versions/r0.12/api_docs/python/array_ops/fake_quantization). Literature has shown that fixed point networks provide comparable performance to floating point networks [2]. This is achieved by modeling the quantization diff --git a/tensorflow/contrib/slim/python/slim/evaluation_test.py b/tensorflow/contrib/slim/python/slim/evaluation_test.py index 3d0308aaf3..94fc12ca81 100644 --- a/tensorflow/contrib/slim/python/slim/evaluation_test.py +++ b/tensorflow/contrib/slim/python/slim/evaluation_test.py @@ -26,6 +26,7 @@ import time import numpy as np from tensorflow.contrib.framework.python.ops import variables as variables_lib +from tensorflow.contrib.metrics.python.ops import metric_ops from tensorflow.contrib.slim.python.slim import evaluation from tensorflow.contrib.training.python.training import evaluation as evaluation_lib from tensorflow.core.protobuf import saver_pb2 @@ -36,7 +37,6 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops -from tensorflow.python.ops import metrics from tensorflow.python.ops import variables from tensorflow.python.platform import flags from tensorflow.python.platform import gfile @@ -89,8 +89,8 @@ class EvaluationTest(test.TestCase): self._predictions, self._scale = TestModel(self._inputs) def testFinalOpsOnEvaluationLoop(self): - value_op, update_op = metrics.accuracy( - labels=self._labels, predictions=self._predictions) + value_op, update_op = metric_ops.streaming_accuracy(self._predictions, + self._labels) init_op = control_flow_ops.group(variables.global_variables_initializer(), variables.local_variables_initializer()) # Create checkpoint and log directories: @@ -136,10 +136,9 @@ class EvaluationTest(test.TestCase): self.assertTrue(obj.hook_was_run) def _create_names_to_metrics(self, predictions, labels): - accuracy0, update_op0 = metrics.accuracy( - labels=labels, predictions=predictions) - accuracy1, update_op1 = metrics.accuracy( - labels=labels, predictions=predictions + 1) + accuracy0, update_op0 = metric_ops.streaming_accuracy(predictions, labels) + accuracy1, update_op1 = metric_ops.streaming_accuracy(predictions + 1, + labels) names_to_values = {'Accuracy': accuracy0, 'Another_accuracy': accuracy1} names_to_updates = {'Accuracy': update_op0, 'Another_accuracy': update_op1} @@ -199,8 +198,8 @@ class EvaluationTest(test.TestCase): predictions_limited = input.limit_epochs(self._predictions, num_epochs=1) labels_limited = input.limit_epochs(self._labels, num_epochs=1) - value_op, update_op = metrics.accuracy( - labels=labels_limited, predictions=predictions_limited) + value_op, update_op = metric_ops.streaming_accuracy( + predictions_limited, labels_limited) init_op = control_flow_ops.group(variables.global_variables_initializer(), variables.local_variables_initializer()) @@ -261,8 +260,8 @@ class SingleEvaluationTest(test.TestCase): self._prepareCheckpoint(checkpoint_path) # Next, determine the metric to evaluate: - value_op, update_op = metrics.accuracy( - labels=self._labels, predictions=self._predictions) + value_op, update_op = metric_ops.streaming_accuracy(self._predictions, + self._labels) # Run the evaluation and verify the results: accuracy_value = evaluation.evaluate_once( @@ -277,8 +276,8 @@ class SingleEvaluationTest(test.TestCase): self._prepareCheckpoint(checkpoint_path) # Next, determine the metric to evaluate: - value_op, update_op = metrics.accuracy( - labels=self._labels, predictions=self._predictions) + value_op, update_op = metric_ops.streaming_accuracy(self._predictions, + self._labels) dumping_root = os.path.join(self.get_temp_dir(), 'tfdbg_dump_dir') dumping_hook = hooks.DumpingDebugHook(dumping_root, log_usage=False) diff --git a/tensorflow/contrib/summary/summary.py b/tensorflow/contrib/summary/summary.py index d22b80ac88..99ced53e11 100644 --- a/tensorflow/contrib/summary/summary.py +++ b/tensorflow/contrib/summary/summary.py @@ -21,7 +21,6 @@ from @{tf.summary.merge_all} to @{tf.summary.FileWriter}. To use with eager execution enabled, write your code as follows: -```python global_step = tf.train.get_or_create_global_step() summary_writer = tf.contrib.summary.create_file_writer( train_dir, flush_millis=10000) @@ -31,11 +30,9 @@ with summary_writer.as_default(), tf.contrib.summary.always_record_summaries(): tf.contrib.summary.scalar("loss", my_loss) # In this case every call to tf.contrib.summary.scalar will generate a record # ... -``` To use it with graph execution, write your code as follows: -```python global_step = tf.train.get_or_create_global_step() summary_writer = tf.contrib.summary.create_file_writer( train_dir, flush_millis=10000) @@ -56,7 +53,7 @@ with tf.Session(...) as sess: while not_done_training: sess.run([train_op, tf.contrib.summary.all_summary_ops()]) # ... -``` + """ from __future__ import absolute_import diff --git a/tensorflow/contrib/tensor_forest/client/eval_metrics.py b/tensorflow/contrib/tensor_forest/client/eval_metrics.py index d8236a0a6f..e893e1d1c8 100644 --- a/tensorflow/contrib/tensor_forest/client/eval_metrics.py +++ b/tensorflow/contrib/tensor_forest/client/eval_metrics.py @@ -21,10 +21,10 @@ import numpy as np from tensorflow.contrib import losses from tensorflow.contrib.learn.python.learn.estimators import prediction_key +from tensorflow.contrib.metrics.python.ops import metric_ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops -from tensorflow.python.ops import metrics from tensorflow.python.ops import nn INFERENCE_PROB_NAME = prediction_key.PredictionKey.PROBABILITIES @@ -38,13 +38,12 @@ def _top_k_generator(k): targets = math_ops.to_int32(targets) if targets.get_shape().ndims > 1: targets = array_ops.squeeze(targets, axis=[1]) - return metrics.mean(nn.in_top_k(probabilities, targets, k)) + return metric_ops.streaming_mean(nn.in_top_k(probabilities, targets, k)) return _top_k def _accuracy(predictions, targets, weights=None): - return metrics.accuracy( - labels=targets, predictions=predictions, weights=weights) + return metric_ops.streaming_accuracy(predictions, targets, weights=weights) def _r2(probabilities, targets, weights=None): @@ -54,7 +53,7 @@ def _r2(probabilities, targets, weights=None): squares_residuals = math_ops.reduce_sum( math_ops.square(targets - probabilities), 0) score = 1 - math_ops.reduce_sum(squares_residuals / squares_total) - return metrics.mean(score, weights=weights) + return metric_ops.streaming_mean(score, weights=weights) def _squeeze_and_onehot(targets, depth): @@ -63,7 +62,7 @@ def _squeeze_and_onehot(targets, depth): def _sigmoid_entropy(probabilities, targets, weights=None): - return metrics.mean( + return metric_ops.streaming_mean( losses.sigmoid_cross_entropy(probabilities, _squeeze_and_onehot( targets, @@ -72,7 +71,7 @@ def _sigmoid_entropy(probabilities, targets, weights=None): def _softmax_entropy(probabilities, targets, weights=None): - return metrics.mean( + return metric_ops.streaming_mean( losses.sparse_softmax_cross_entropy(probabilities, math_ops.to_int32(targets)), weights=weights) @@ -83,7 +82,7 @@ def _predictions(predictions, unused_targets, **unused_kwargs): def _class_log_loss(probabilities, targets, weights=None): - return metrics.mean( + return metric_ops.streaming_mean( losses.log_loss(probabilities, _squeeze_and_onehot(targets, array_ops.shape(probabilities)[1])), @@ -91,36 +90,34 @@ def _class_log_loss(probabilities, targets, weights=None): def _precision(predictions, targets, weights=None): - return metrics.precision( - labels=targets, predictions=predictions, weights=weights) + return metric_ops.streaming_precision(predictions, targets, weights=weights) def _precision_at_thresholds(predictions, targets, weights=None): - return metrics.precision_at_thresholds( - labels=targets, - predictions=array_ops.slice(predictions, [0, 1], [-1, 1]), - thresholds=np.arange(0, 1, 0.01, dtype=np.float32), + return metric_ops.streaming_precision_at_thresholds( + array_ops.slice(predictions, [0, 1], [-1, 1]), + targets, + np.arange( + 0, 1, 0.01, dtype=np.float32), weights=weights) def _recall(predictions, targets, weights=None): - return metrics.recall( - labels=targets, predictions=predictions, weights=weights) + return metric_ops.streaming_recall(predictions, targets, weights=weights) def _recall_at_thresholds(predictions, targets, weights=None): - return metrics.recall_at_thresholds( - labels=targets, - predictions=array_ops.slice(predictions, [0, 1], [-1, 1]), - thresholds=np.arange(0, 1, 0.01, dtype=np.float32), + return metric_ops.streaming_recall_at_thresholds( + array_ops.slice(predictions, [0, 1], [-1, 1]), + targets, + np.arange( + 0, 1, 0.01, dtype=np.float32), weights=weights) def _auc(probs, targets, weights=None): - return metrics.auc( - labels=targets, - predictions=array_ops.slice(probs, [0, 1], [-1, 1]), - weights=weights) + return metric_ops.streaming_auc(array_ops.slice(probs, [0, 1], [-1, 1]), + targets, weights=weights) _EVAL_METRICS = { diff --git a/tensorflow/contrib/tensor_forest/python/tensor_forest.py b/tensorflow/contrib/tensor_forest/python/tensor_forest.py index 6f62cd11a9..7a35a70bbe 100644 --- a/tensorflow/contrib/tensor_forest/python/tensor_forest.py +++ b/tensorflow/contrib/tensor_forest/python/tensor_forest.py @@ -295,7 +295,7 @@ def get_epoch_variable(): # A simple container to hold the training variables for a single tree. -class TreeVariables(object): +class TreeTrainingVariables(object): """Stores tf.Variables for training a single random tree. Uses tf.get_variable to get tree-specific names so that this can be used @@ -303,7 +303,7 @@ class TreeVariables(object): then relies on restoring that model to evaluate). """ - def __init__(self, params, tree_num, training, tree_config='', tree_stat=''): + def __init__(self, params, tree_num, training): if (not hasattr(params, 'params_proto') or not isinstance(params.params_proto, _params_proto.TensorForestParams)): @@ -315,28 +315,27 @@ class TreeVariables(object): # TODO(gilberth): Manually shard this to be able to fit it on # multiple machines. self.stats = stats_ops.fertile_stats_variable( - params, tree_stat, self.get_tree_name('stats', tree_num)) + params, '', self.get_tree_name('stats', tree_num)) self.tree = model_ops.tree_variable( - params, tree_config, self.stats, self.get_tree_name('tree', tree_num)) + params, '', self.stats, self.get_tree_name('tree', tree_num)) def get_tree_name(self, name, num): return '{0}-{1}'.format(name, num) -class ForestVariables(object): +class ForestTrainingVariables(object): """A container for a forests training data, consisting of multiple trees. - Instantiates a TreeVariables object for each tree. We override the + Instantiates a TreeTrainingVariables object for each tree. We override the __getitem__ and __setitem__ function so that usage looks like this: - forest_variables = ForestVariables(params) + forest_variables = ForestTrainingVariables(params) ... forest_variables.tree ... """ def __init__(self, params, device_assigner, training=True, - tree_variables_class=TreeVariables, - tree_configs=None, tree_stats=None): + tree_variables_class=TreeTrainingVariables): self.variables = [] # Set up some scalar variables to run through the device assigner, then # we can use those to colocate everything related to a tree. @@ -348,13 +347,7 @@ class ForestVariables(object): for i in range(params.num_trees): with ops.device(self.device_dummies[i].device): - kwargs = {} - if tree_configs is not None: - kwargs.update(dict(tree_config=tree_configs[i])) - if tree_stats is not None: - kwargs.update(dict(tree_stat=tree_stats[i])) - self.variables.append(tree_variables_class( - params, i, training, **kwargs)) + self.variables.append(tree_variables_class(params, i, training)) def __setitem__(self, t, val): self.variables[t] = val @@ -368,11 +361,9 @@ class RandomForestGraphs(object): def __init__(self, params, - tree_configs=None, - tree_stats=None, device_assigner=None, variables=None, - tree_variables_class=TreeVariables, + tree_variables_class=TreeTrainingVariables, tree_graphs=None, training=True): self.params = params @@ -380,10 +371,9 @@ class RandomForestGraphs(object): device_assigner or framework_variables.VariableDeviceChooser()) logging.info('Constructing forest with params = ') logging.info(self.params.__dict__) - self.variables = variables or ForestVariables( + self.variables = variables or ForestTrainingVariables( self.params, device_assigner=self.device_assigner, training=training, - tree_variables_class=tree_variables_class, - tree_configs=tree_configs, tree_stats=tree_stats) + tree_variables_class=tree_variables_class) tree_graph_class = tree_graphs or RandomTreeGraphs self.trees = [ tree_graph_class(self.variables[i], self.params, i) diff --git a/tensorflow/contrib/tensor_forest/python/tensor_forest_test.py b/tensorflow/contrib/tensor_forest/python/tensor_forest_test.py index 1c9c81827e..bbe627b157 100644 --- a/tensorflow/contrib/tensor_forest/python/tensor_forest_test.py +++ b/tensorflow/contrib/tensor_forest/python/tensor_forest_test.py @@ -18,14 +18,10 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from google.protobuf.json_format import ParseDict -from tensorflow.contrib.decision_trees.proto import generic_tree_model_pb2 as _tree_proto from tensorflow.contrib.tensor_forest.python import tensor_forest from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import test_util -from tensorflow.python.ops import resources -from tensorflow.python.ops import variables from tensorflow.python.platform import googletest @@ -114,47 +110,6 @@ class TensorForestTest(test_util.TensorFlowTestCase): self.assertTrue(isinstance(paths, ops.Tensor)) self.assertTrue(isinstance(var, ops.Tensor)) - def testInfrenceFromRestoredModel(self): - input_data = [[-1., 0.], [-1., 2.], # node 1 - [1., 0.], [1., -2.]] # node 2 - expected_prediction = [[0.0, 1.0], [0.0, 1.0], - [0.0, 1.0], [0.0, 1.0]] - hparams = tensor_forest.ForestHParams( - num_classes=2, - num_features=2, - num_trees=1, - max_nodes=1000, - split_after_samples=25).fill() - tree_weight = {'decisionTree': - {'nodes': - [{'binaryNode': - {'rightChildId': 2, - 'leftChildId': 1, - 'inequalityLeftChildTest': - {'featureId': {'id': '0'}, - 'threshold': {'floatValue': 0}}}}, - {'leaf': {'vector': - {'value': [{'floatValue': 0.0}, - {'floatValue': 1.0}]}}, - 'nodeId': 1}, - {'leaf': {'vector': - {'value': [{'floatValue': 0.0}, - {'floatValue': 1.0}]}}, - 'nodeId': 2}]}} - restored_tree_param = ParseDict(tree_weight, - _tree_proto.Model()).SerializeToString() - graph_builder = tensor_forest.RandomForestGraphs(hparams, - [restored_tree_param]) - probs, paths, var = graph_builder.inference_graph(input_data) - self.assertTrue(isinstance(probs, ops.Tensor)) - self.assertTrue(isinstance(paths, ops.Tensor)) - self.assertTrue(isinstance(var, ops.Tensor)) - with self.test_session(): - variables.global_variables_initializer().run() - resources.initialize_resources(resources.shared_resources()).run() - self.assertEquals(probs.eval().shape, (4, 2)) - self.assertEquals(probs.eval().tolist(), expected_prediction) - def testTrainingConstructionClassificationSparse(self): input_data = sparse_tensor.SparseTensor( indices=[[0, 0], [0, 3], [1, 0], [1, 7], [2, 1], [3, 9]], diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index da4dd5a14c..b7b26cfb1c 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -91,11 +91,8 @@ void GetSubGraphIncomingEdges(const tensorflow::Graph& graph, if (!subgraph_node_ids.count(edge->src()->id()) && !edge->src()->IsSource() && !edge->IsControlEdge()) { incoming_edges->insert(edge); - VLOG(2) << "INCOMING " << edge->src()->name() << " -> " << node->name() - << " Y, "; } else { - VLOG(2) << "INCOMING " << edge->src()->name() << " -> " << node->name() - << " N, "; + VLOG(2) << node->name() << " -> " << edge->src()->name() << " N, "; } } } @@ -109,12 +106,10 @@ void GetSubGraphOutgoingEdges(const tensorflow::Graph& graph, for (const tensorflow::Edge* edge : node->out_edges()) { if (!subgraph_node_ids.count(edge->dst()->id()) && !edge->dst()->IsSink() && !edge->IsControlEdge()) { - VLOG(2) << "OUTGOING " << node->name() << " -> " << edge->dst()->name() - << " Y, "; + VLOG(2) << node->name() << " -> " << edge->dst()->name() << " Y, "; outgoing_edges->insert(edge); } else { - VLOG(2) << "OUTGOING " << node->name() << " -> " << edge->dst()->name() - << " N, "; + VLOG(2) << node->name() << " -> " << edge->dst()->name() << " N, "; } } } @@ -186,27 +181,29 @@ struct ConvertGraphParams { static tensorflow::Status FillSubGraphEdgeSets(ConvertGraphParams* p) { GetSubGraphIncomingEdges(p->graph, p->subgraph_node_ids, &p->subgraph_incoming_edges); - - std::set> unique_tensors; - // Add only unique input source nodes. If output of an outside node is shared - // between multiple nodes inside the engine, only one edge should be created for (const tensorflow::Edge* edge : p->subgraph_incoming_edges) { - unique_tensors.insert({edge->src()->id(), edge->src_output()}); + p->subgraph_inputs.push_back({edge->src()->id(), edge->src_output()}); + } + auto output_name_to_index_map = BuildTensorNameMap(p->output_names); + std::set> subgraph_outputs_set; + // Collect outputs referenced from output_names + for (int node_id : p->subgraph_node_ids) { + tensorflow::Node* node = p->graph.FindNodeId(node_id); + if (output_name_to_index_map.count(node->name())) { + for (int index : output_name_to_index_map.at(node->name())) { + subgraph_outputs_set.insert({node_id, index}); + } + } } - p->subgraph_inputs.insert(p->subgraph_inputs.begin(), unique_tensors.begin(), - unique_tensors.end()); GetSubGraphOutgoingEdges(p->graph, p->subgraph_node_ids, &p->subgraph_outgoing_edges); - unique_tensors.clear(); - // Similar to above, if multiple ouside nodes are sharing the output of an - // internal node only one output port should be created and shared between - // outputs for (const tensorflow::Edge* edge : p->subgraph_outgoing_edges) { - unique_tensors.insert({edge->src()->id(), edge->src_output()}); + subgraph_outputs_set.insert({edge->src()->id(), edge->src_output()}); } - p->subgraph_outputs.reserve(unique_tensors.size()); + p->subgraph_outputs.reserve(subgraph_outputs_set.size()); p->subgraph_outputs.insert(p->subgraph_outputs.begin(), - unique_tensors.begin(), unique_tensors.end()); + subgraph_outputs_set.begin(), + subgraph_outputs_set.end()); return tensorflow::Status::OK(); } @@ -228,6 +225,7 @@ tensorflow::Status GetCalibNode(ConvertGraphParams* params) { for (auto in_edge : params->subgraph_incoming_edges) { // loop over incoming edges and // attach them to calib node + // tensorflow::Node* src_node = in_edge->src(); auto src_output = in_edge->src_output(); auto dst_node = in_edge->dst(); auto dst_input = in_edge->dst_input(); @@ -259,24 +257,19 @@ tensorflow::Status ConvertSubGraphToTensorRT(ConvertGraphParams* params) { for (size_t i = 0; i < params->subgraph_inputs.size(); ++i) { subgraph_edge_to_input_map.insert({params->subgraph_inputs.at(i), i}); } - std::set> unique_tensors; for (const tensorflow::Edge* edge : params->subgraph_incoming_edges) { std::pair old_src = {edge->src()->id(), edge->src_output()}; - if (unique_tensors.count(old_src)) continue; - unique_tensors.insert(old_src); int new_src_output = subgraph_edge_to_input_map.at(old_src); params->graph.AddEdge(edge->src(), edge->src_output(), trt_node, new_src_output); - VLOG(1) << "Wire " << edge->src()->name() << ":" << edge->src_output() - << " -> " << trt_node->name() << ":" << new_src_output; params->graph.RemoveEdge(edge); } - if (VLOG_IS_ON(2)) { - VLOG(2) << "new edge count: " << trt_node->in_edges().size(); - for (const tensorflow::Edge* edge : trt_node->in_edges()) { - VLOG(2) << edge->src()->name() << " port: " << edge->src_output(); - } + + VLOG(2) << "new wiring edges: " << trt_node->in_edges().size(); + for (const tensorflow::Edge* edge : trt_node->in_edges()) { + VLOG(2) << edge->src()->name() << " port: " << edge->src_output(); } + TF_RETURN_IF_ERROR(status); // Re-map outgoing edges to use the new TRT node instead of the orig subgraph @@ -290,8 +283,6 @@ tensorflow::Status ConvertSubGraphToTensorRT(ConvertGraphParams* params) { int new_src_output = subgraph_edge_to_output_map.at(old_src); TF_RETURN_IF_ERROR(params->graph.UpdateEdge( trt_node, new_src_output, edge->dst(), edge->dst_input())); - VLOG(1) << "Wire " << trt_node->name() << ":" << new_src_output << " -> " - << edge->dst()->name() << ":" << edge->dst_input(); } // Remove the original subgraph for (int node_id : params->subgraph_node_ids) { @@ -326,12 +317,9 @@ tensorflow::Status ConvertCalibGraphToInferGraph( tensorflow::GraphConstructorOptions(), graph_def, &graph)); // get calib nodes std::vector calib_nodes; - std::vector topo_order; - tensorflow::GetPostOrder(graph, &topo_order); - for (auto rit = topo_order.rbegin(); rit != topo_order.rend(); ++rit) { - auto node = *rit; + for (auto node : graph.op_nodes()) { if (node->type_string() == "TRTCalibOp") { - VLOG(1) << "Found Calib Node " << node->name(); + VLOG(1) << "Found Calib Node"; calib_nodes.push_back(node); } } diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 4e4d295538..96e0700862 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -362,11 +362,10 @@ void ReorderCKtoKC(const TRT_ShapedWeights& iweights, break; } case tensorflow::DataType::DT_HALF: { - Reorder2( - {k, c}, static_cast(iweights.GetValues()), - istrides, - static_cast(const_cast(oweights->GetValues())), - ostrides); + Reorder2({k, c}, static_cast(iweights.GetValues()), + istrides, static_cast( + const_cast(oweights->GetValues())), + ostrides); break; } default: @@ -1180,9 +1179,9 @@ tensorflow::Status BinaryTensorOpTensor( CHECK_EQ_TYPE(tensor_r->getType(), dtype); auto op_pair = ops.find(node_def.op()); if (op_pair == ops.end()) - return tensorflow::errors::Unimplemented( - "binary op: " + node_def.op() + - " not supported at: " + node_def.name()); + return tensorflow::errors::Unimplemented("binary op: " + node_def.op() + + " not supported at: " + + node_def.name()); nvinfer1::IElementWiseLayer* layer = ctx.network()->addElementWise( *const_cast(tensor_l), @@ -2139,7 +2138,9 @@ void Converter::register_op_converters() { } } // namespace - +tensorflow::Status GetTensorRTGraph(tensorrt::convert::SubGraphParams& s) { + return tensorflow::errors::Unimplemented("Not implemented yet"); +} tensorflow::Status ConvertCalibrationNodeToEngineNode( tensorflow::Graph& graph, tensorflow::Node* c_node) { const auto ndef = c_node->def(); @@ -2163,23 +2164,9 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode( for (auto n : graph.op_nodes()) { node_maps.insert({n->name(), n}); } - std::set subgraph_ids; - for (const auto internal_node : segment_nodes) { - subgraph_ids.insert(node_maps.at(internal_node)->id()); - } - if (VLOG_IS_ON(2)) { - string node_names = StrCat(c_node->name(), " segment nodes= "); - - for (const auto& node_name : segment_nodes) { - StrAppend(&node_names, node_name, ", "); - } - VLOG(2) << node_names; - } - VLOG(1) << "Output Nodes:"; std::vector out_types; std::vector out_edges; - for (auto& i : output_nodes) { auto node_port = tensorflow::str_util::Split(i, ":"); VLOG(1) << " " << i << " in graph " << node_maps.count(i); @@ -2199,24 +2186,18 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode( out_types.push_back(out_node->output_type(0)); } for (auto out_edge : out_node->out_edges()) { - if (subgraph_ids.count(out_edge->dst()->id())) - continue; // skip internal edges; if (out_edge->src_output() == port) { out_edges.push_back(out_edge); - VLOG(1) << "OUTPUT EDGE " << out_edge->src()->name() << ":" - << out_edge->src_output() << " -> " << out_edge->dst()->name() - << ":" << out_edge->dst_input(); + break; } } } else { LOG(WARNING) << " couldn't find output node " << out_node_name; } } - if (VLOG_IS_ON(1)) { - VLOG(1) << c_node->name() << " Input Nodes:"; - for (auto& i : input_names) { - VLOG(1) << " Input " << i << " in graph " << node_maps.count(i); - } + VLOG(1) << "Input Nodes:"; + for (auto& i : input_names) { + VLOG(1) << " " << i << " in graph " << node_maps.count(i); } auto trt_rm = tensorflow::tensorrt::TRTResourceManager::instance(); auto resmgr = trt_rm->getManager("TRTCalibOps"); @@ -2250,24 +2231,14 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode( calib_res->builder_ = nullptr; tensorflow::NodeDefBuilder op_builder(engine_name, "TRTEngineOp"); std::vector income_edges; - income_edges.resize(c_node->num_inputs()); for (const auto in_edge : c_node->in_edges()) { auto src = in_edge->src(); int dest_port = in_edge->dst_input(); - VLOG(1) << "Incoming connection " << src->name() << ":" - << in_edge->src_output() << " -> " << c_node->name() << ":" - << dest_port; - income_edges.at(dest_port) = {src->name(), in_edge->src_output(), - c_node->input_type(dest_port)}; + income_edges.emplace_back(src->name(), in_edge->src_output(), + c_node->input_type(dest_port)); } tensorflow::gtl::ArraySlice input_list( income_edges); - if (VLOG_IS_ON(2)) { - for (const auto& inp : input_list) { - VLOG(2) << " Input from inputlist " << inp.node << ":" << inp.index << " " - << tensorflow::DataTypeString(inp.data_type); - } - } op_builder.Input(input_list); tensorflow::NodeDef engine_node; const char* engine_plan_data = static_cast(engine_plan->data()); @@ -2284,26 +2255,13 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode( } auto trt_engine_node = graph.AddNode(engine_node, &status); TF_RETURN_IF_ERROR(status); - std::map port_map; - for (size_t t = 0; t < output_nodes.size(); t++) { - port_map.insert({output_nodes.at(t), t}); - } - for (auto& i : out_edges) { - string s(i->src()->name()); - if (i->src_output()) StrAppend(&s, ":", i->src_output()); - int out_port = port_map.at(s); - VLOG(1) << "Connecting " << trt_engine_node->name() << ":" << out_port - << " -> " << i->dst()->name() << ":" << i->dst_input(); - TF_RETURN_IF_ERROR( - graph.UpdateEdge(trt_engine_node, out_port, i->dst(), i->dst_input())); - } - for (const auto ed : trt_engine_node->in_edges()) { - VLOG(1) << "In Edge " << ed->src()->name() << ":" << ed->src_output() - << " -> " << ed->dst()->name() << ":" << ed->dst_input(); - } - for (const auto ed : trt_engine_node->out_edges()) { - VLOG(1) << "Out Edge " << ed->src()->name() << ":" << ed->src_output() - << " -> " << ed->dst()->name() << ":" << ed->dst_input(); + for (size_t i = 0; i < out_edges.size(); i++) { + VLOG(1) << "Connecting trt_engine_node output " << i << " with " + << out_edges.at(i)->dst()->name() << " port " + << out_edges.at(i)->dst_input(); + TF_RETURN_IF_ERROR(graph.UpdateEdge(trt_engine_node, i, + out_edges.at(i)->dst(), + out_edges.at(i)->dst_input())); } VLOG(1) << "Segment nodes:"; for (auto& i : segment_nodes) { @@ -2374,7 +2332,6 @@ tensorflow::Status ConvertSubgraph( std::vector* output_names, std::vector* output_dtypes, const string& engine_name) { - std::set added_tensors; for (const std::pair& input : s.input_inds) { VLOG(2) << "parsing input. Node id= " << input.first; int node_id = input.first; @@ -2417,6 +2374,7 @@ tensorflow::Status ConvertSubgraph( auto op_info = op_info_vec.at(shape_inference_output_idx); tensorflow::DataType tf_dtype = op_info.dtype(); + input_dtypes->push_back(tf_dtype); nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); auto type_status = ConvertDType(tf_dtype, &dtype); @@ -2452,10 +2410,8 @@ tensorflow::Status ConvertSubgraph( if (output_idx != 0) { input_tensor_name = StrCat(node_name, ":", output_idx); } - if (added_tensors.count(input_tensor_name)) continue; - added_tensors.insert(input_tensor_name); + input_names->push_back(input_tensor_name); - input_dtypes->push_back(tf_dtype); nvinfer1::ITensor* input_tensor = converter.network()->addInput( input_tensor_name.c_str(), dtype, input_dim_pseudo_chw); @@ -2479,7 +2435,6 @@ tensorflow::Status ConvertSubgraph( // Gather output metadata int trt_engine_op_output_idx = 0; - added_tensors.clear(); for (const std::pair& output : s.output_inds) { int node_id = output.first; int output_idx = output.second; @@ -2496,8 +2451,6 @@ tensorflow::Status ConvertSubgraph( if (output_idx != 0) tensorflow::strings::StrAppend(&tensor_name, ":", output_idx); VLOG(2) << "Output tensor name: " << tensor_name; - if (added_tensors.count(tensor_name)) continue; - added_tensors.insert(tensor_name); output_names->push_back(tensor_name); auto tensor_or_weights = converter.get_tensor(tensor_name); if (!tensor_or_weights.is_tensor()) { diff --git a/tensorflow/contrib/tpu/python/tpu/datasets.py b/tensorflow/contrib/tpu/python/tpu/datasets.py index d879170b68..2e472a2805 100644 --- a/tensorflow/contrib/tpu/python/tpu/datasets.py +++ b/tensorflow/contrib/tpu/python/tpu/datasets.py @@ -166,21 +166,11 @@ def StreamingFilesDataset(files, return remote_iterator.get_next() def MapFn(unused_input): - if isinstance(source_dataset.output_types, dtypes.DType): - output_types = [source_dataset.output_types] - elif isinstance(source_dataset.output_types, (list, tuple)): - output_types = source_dataset.output_types - else: - raise ValueError('source dataset has invalid output types') - remote_calls = functional_ops.remote_call( + return functional_ops.remote_call( args=[source_handle], - Tout=output_types, + Tout=[dtypes.string], f=LoadingFunc, - target='/job:%s/replica:0/task:0/cpu:0' % file_reader_job) - if len(remote_calls) == 1: - return remote_calls[0] - else: - return remote_calls + target='/job:%s/replica:0/task:0/cpu:0' % file_reader_job)[0] with ops.device('/job:%s' % worker_job): output_dataset = dataset_ops.Dataset.range(2).repeat().map( diff --git a/tensorflow/contrib/tpu/python/tpu/datasets_test.py b/tensorflow/contrib/tpu/python/tpu/datasets_test.py index b58d05eac5..918cf0ed8e 100644 --- a/tensorflow/contrib/tpu/python/tpu/datasets_test.py +++ b/tensorflow/contrib/tpu/python/tpu/datasets_test.py @@ -26,8 +26,6 @@ from tensorflow.core.protobuf import config_pb2 from tensorflow.python.client import session from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.ops import readers -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import tensor_shape from tensorflow.python.lib.io import python_io from tensorflow.python.platform import test from tensorflow.python.training import server_lib @@ -164,30 +162,6 @@ class DatasetsTest(test.TestCase): self.assertEqual(set(all_contents), set(retrieved_values)) - def testArbitraryReaderFuncFromDatasetGenerator(self): - - def my_generator(): - yield (1, [1] * 10) - - def gen_dataset(dummy): - return dataset_ops.Dataset.from_generator( - my_generator, (dtypes.int64, dtypes.int64), - (tensor_shape.TensorShape([]), tensor_shape.TensorShape([10]))) - - dataset = datasets.StreamingFilesDataset( - dataset_ops.Dataset.range(10), filetype=gen_dataset) - - iterator = dataset.make_initializable_iterator() - self._sess.run(iterator.initializer) - get_next = iterator.get_next() - - retrieved_values = self._sess.run(get_next) - - self.assertIsInstance(retrieved_values, (list, tuple)) - self.assertEqual(len(retrieved_values), 2) - self.assertEqual(retrieved_values[0], 1) - self.assertItemsEqual(retrieved_values[1], [1] * 10) - def testUnexpectedFiletypeString(self): with self.assertRaises(ValueError): datasets.StreamingFilesDataset( diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index b1c224a345..d89633199d 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -699,9 +699,7 @@ cc_library( srcs = ["platform/stacktrace_handler.cc"], hdrs = ["platform/stacktrace_handler.h"], deps = [ - ":abi", ":lib_platform", - ":stacktrace", ], ) @@ -3091,8 +3089,6 @@ cc_library( # we now need at least "str_util". ":lib", ":lib_platform", - ":stacktrace_handler", - ":test_lite", "//tensorflow/core/platform/default/build_config:test_lite_main", ], alwayslink = 1, @@ -3573,10 +3569,7 @@ tf_cc_tests_gpu( tf_cc_test_mkl( name = "mkl_runtime_tests", size = "small", - srcs = [ - "common_runtime/mkl_cpu_allocator_test.cc", - "common_runtime/mkl_threadpool_device_test.cc", - ], + srcs = ["common_runtime/mkl_cpu_allocator_test.cc"], linkstatic = 1, deps = [ ":core", diff --git a/tensorflow/core/api_def/base_api/api_def_Selu.pbtxt b/tensorflow/core/api_def/base_api/api_def_Selu.pbtxt index 985f09312f..cbe76de415 100644 --- a/tensorflow/core/api_def/base_api/api_def_Selu.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_Selu.pbtxt @@ -4,10 +4,6 @@ op { description: < 0`, limit of the split of the result. -END - } - summary: "Split elements of `source` based on `sep` into a `SparseTensor`." - description: <2<><>3"` and -sep of `"<>"` returns `["1", "2", "", "3"]`. If `sep` is None or an empty -string, consecutive whitespace are regarded as a single separator, and the -result will contain no empty strings at the startor end if the string has -leading or trailing whitespace. - -Note that the above mentioned behavior matches python's str.split. -END -} diff --git a/tensorflow/core/api_def/python_api/api_def_StringSplitV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_StringSplitV2.pbtxt deleted file mode 100644 index 0e8576fb01..0000000000 --- a/tensorflow/core/api_def/python_api/api_def_StringSplitV2.pbtxt +++ /dev/null @@ -1,4 +0,0 @@ -op { - graph_op_name: "StringSplitV2" - visibility: HIDDEN -} diff --git a/tensorflow/core/common_runtime/bfc_allocator.cc b/tensorflow/core/common_runtime/bfc_allocator.cc index 9cda17867b..8f2a419756 100644 --- a/tensorflow/core/common_runtime/bfc_allocator.cc +++ b/tensorflow/core/common_runtime/bfc_allocator.cc @@ -86,7 +86,7 @@ BFCAllocator::Chunk* BFCAllocator::ChunkFromHandle(ChunkHandle h) { return &(chunks_[h]); } -bool BFCAllocator::Extend(size_t alignment, size_t rounded_bytes) { +bool BFCAllocator::Extend(size_t rounded_bytes) { size_t available_bytes = memory_limit_ - total_region_allocated_bytes_; // Rounds available_bytes down to the nearest multiple of kMinAllocationSize. available_bytes = (available_bytes / kMinAllocationSize) * kMinAllocationSize; @@ -108,7 +108,7 @@ bool BFCAllocator::Extend(size_t alignment, size_t rounded_bytes) { // Try allocating. size_t bytes = std::min(curr_region_allocation_bytes_, available_bytes); - void* mem_addr = suballocator_->Alloc(alignment, bytes); + void* mem_addr = suballocator_->Alloc(32, bytes); if (mem_addr == nullptr && !started_backpedal_) { // Only backpedal once. started_backpedal_ = true; @@ -119,7 +119,7 @@ bool BFCAllocator::Extend(size_t alignment, size_t rounded_bytes) { while (mem_addr == nullptr) { bytes = RoundedBytes(bytes * kBackpedalFactor); if (bytes < rounded_bytes) break; - mem_addr = suballocator_->Alloc(alignment, bytes); + mem_addr = suballocator_->Alloc(32, bytes); } } @@ -261,7 +261,7 @@ void* BFCAllocator::AllocateRawInternal(size_t unused_alignment, } // Try to extend - if (Extend(unused_alignment, rounded_bytes)) { + if (Extend(rounded_bytes)) { ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes); if (ptr != nullptr) { return ptr; diff --git a/tensorflow/core/common_runtime/bfc_allocator.h b/tensorflow/core/common_runtime/bfc_allocator.h index 52aedb1e9c..ba5a3eea3a 100644 --- a/tensorflow/core/common_runtime/bfc_allocator.h +++ b/tensorflow/core/common_runtime/bfc_allocator.h @@ -305,8 +305,7 @@ class BFCAllocator : public VisitableAllocator { // Try to add a new memory region that can satisfy an allocation of // 'rounded_bytes' bytes. Returns true on success and false on // failure. - bool Extend(size_t alignment, size_t rounded_bytes) - EXCLUSIVE_LOCKS_REQUIRED(lock_); + bool Extend(size_t rounded_bytes) EXCLUSIVE_LOCKS_REQUIRED(lock_); // Returns a pointer to an underlying allocated chunk of size // 'rounded_bytes'. diff --git a/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc b/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc index 9028e6298c..c21a1ea9f2 100644 --- a/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc +++ b/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc @@ -102,25 +102,9 @@ TEST(DirectSessionWithTrackingAllocTest, CostModelTest) { EXPECT_EQ(2, shape.dim(0).size()); EXPECT_EQ(1, shape.dim(1).size()); if (node->name() == y->name()) { -#ifdef INTEL_MKL - // if MKL is used, it goes through various additional - // graph rewrite pass. In TF, everytime a graph pass - // happens, "constant" nodes are allocated - // and deallocated. Each allocation calls the - // (FindChunkPtr of BFCAllocator), - // which increments the value of AllocationId. - // Thus AllocationId becomes more than 3 and 4 if - // MKL is used. Now they are 9 and 10 for MKL. - EXPECT_EQ(19, cm->AllocationId(node, 0)); -#else EXPECT_EQ(21, cm->AllocationId(node, 0)); -#endif } else { -#ifdef INTEL_MKL - EXPECT_EQ(20, cm->AllocationId(node, 0)); -#else EXPECT_EQ(22, cm->AllocationId(node, 0)); -#endif } } EXPECT_LE(0, cm->MaxExecutionTime(node)); diff --git a/tensorflow/core/common_runtime/mkl_threadpool_device_test.cc b/tensorflow/core/common_runtime/mkl_threadpool_device_test.cc deleted file mode 100644 index 5d583a8360..0000000000 --- a/tensorflow/core/common_runtime/mkl_threadpool_device_test.cc +++ /dev/null @@ -1,53 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifdef INTEL_MKL - -#include "tensorflow/core/common_runtime/threadpool_device.h" - -#include "tensorflow/core/lib/core/status_test_util.h" -#include "tensorflow/core/platform/cpu_info.h" -#include "tensorflow/core/platform/logging.h" -#include "tensorflow/core/platform/test.h" -#include "tensorflow/core/public/session_options.h" - -namespace tensorflow { - -#ifdef _OPENMP -TEST(MKLThreadPoolDeviceTest, TestOmpDefaults) { - SessionOptions options; - unsetenv("OMP_NUM_THREADS"); - - ThreadPoolDevice* tp = new ThreadPoolDevice( - options, "/device:CPU:0", Bytes(256), DeviceLocality(), cpu_allocator()); - - const int ht = port::NumHyperthreadsPerCore(); - EXPECT_EQ(omp_get_max_threads(), (port::NumSchedulableCPUs() + ht - 1) / ht); -} - -TEST(MKLThreadPoolDeviceTest, TestOmpPreSets) { - SessionOptions options; - setenv("OMP_NUM_THREADS", "314", 1); - - ThreadPoolDevice* tp = new ThreadPoolDevice( - options, "/device:CPU:0", Bytes(256), DeviceLocality(), cpu_allocator()); - - EXPECT_EQ(omp_get_max_threads(), 314); -} -#endif // _OPENMP - -} // namespace tensorflow - -#endif // INTEL_MKL diff --git a/tensorflow/core/common_runtime/process_util.cc b/tensorflow/core/common_runtime/process_util.cc index a5d31b75c7..21912236d0 100644 --- a/tensorflow/core/common_runtime/process_util.cc +++ b/tensorflow/core/common_runtime/process_util.cc @@ -16,10 +16,8 @@ limitations under the License. #include "tensorflow/core/common_runtime/process_util.h" #ifdef INTEL_MKL -#ifdef _OPENMP #include -#endif // _OPENMP -#endif // INTEL_MKL +#endif #include #include "tensorflow/core/lib/core/threadpool.h" @@ -59,10 +57,7 @@ int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) { // MKL library executes ops in parallel using OMP threads // Set inter_op conservatively to avoid thread oversubscription that could // lead to severe perf degradations and OMP resource exhaustion - int mkl_intra_op = 1; -#ifdef _OPENMP - mkl_intra_op = omp_get_max_threads(); -#endif // _OPENMP + const int mkl_intra_op = omp_get_max_threads(); CHECK_GE(mkl_intra_op, 1); const int32 mkl_inter_op = std::max( (port::NumSchedulableCPUs() + mkl_intra_op - 1) / mkl_intra_op, 2); @@ -73,7 +68,7 @@ int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) { #else // Default to using the number of cores available in the process. return port::NumSchedulableCPUs(); -#endif // INTEL_MKL +#endif } thread::ThreadPool* NewThreadPoolFromSessionOptions( diff --git a/tensorflow/core/common_runtime/threadpool_device.cc b/tensorflow/core/common_runtime/threadpool_device.cc index 74a87215e1..f7a07fe503 100644 --- a/tensorflow/core/common_runtime/threadpool_device.cc +++ b/tensorflow/core/common_runtime/threadpool_device.cc @@ -31,11 +31,7 @@ limitations under the License. #include "tensorflow/core/public/session_options.h" #ifdef INTEL_MKL -#ifdef _OPENMP -#include -#endif #include "tensorflow/core/common_runtime/mkl_cpu_allocator.h" -#include "tensorflow/core/platform/cpu_info.h" #endif namespace tensorflow { @@ -47,26 +43,7 @@ ThreadPoolDevice::ThreadPoolDevice(const SessionOptions& options, : LocalDevice(options, Device::BuildDeviceAttributes( name, DEVICE_CPU, memory_limit, locality)), allocator_(allocator), - scoped_allocator_mgr_(new ScopedAllocatorMgr(name)) { -#ifdef INTEL_MKL -#ifdef _OPENMP - const char* user_omp_threads = getenv("OMP_NUM_THREADS"); - if (user_omp_threads == nullptr) { - // OMP_NUM_THREADS controls MKL's intra-op parallelization - // Default to available physical cores - const int mkl_intra_op = port::NumSchedulableCPUs(); - const int ht = port::NumHyperthreadsPerCore(); - omp_set_num_threads((mkl_intra_op + ht - 1) / ht); - } else { - uint64 user_val = 0; - if (strings::safe_strtou64(user_omp_threads, &user_val)) { - // Superflous but triggers OpenMP loading - omp_set_num_threads(user_val); - } - } -#endif // _OPENMP -#endif // INTEL_MKL -} + scoped_allocator_mgr_(new ScopedAllocatorMgr(name)) {} ThreadPoolDevice::~ThreadPoolDevice() {} diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.cc b/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.cc index 770a0fcf14..1cea1b1462 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.cc @@ -147,9 +147,7 @@ MasterService::Stub::Stub( } MasterService::AsyncService::AsyncService() { - int method_len = sizeof(grpcMasterService_method_names) / - sizeof(grpcMasterService_method_names[0]); - for (int i = 0; i < method_len; ++i) { + for (int i = 0; i < 10; ++i) { AddMethod(new ::grpc::internal::RpcServiceMethod( grpcMasterService_method_names[i], ::grpc::internal::RpcMethod::NORMAL_RPC, nullptr)); diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_testlib.cc b/tensorflow/core/distributed_runtime/rpc/grpc_testlib.cc index a8508d2d4f..89f83f9f24 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_testlib.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_testlib.cc @@ -17,7 +17,6 @@ limitations under the License. #include "tensorflow/core/distributed_runtime/rpc/grpc_session.h" #include "tensorflow/core/lib/strings/str_util.h" -#include "tensorflow/core/platform/env.h" #include "tensorflow/core/util/device_name_utils.h" namespace tensorflow { @@ -51,14 +50,9 @@ Status TestCluster::MakeTestCluster(const SessionOptions& options, int n, } for (int i = 0; i < n; ++i) { - string server_file = - strings::StrCat(testing::TensorFlowSrcRoot(), - "/core/distributed_runtime/rpc/grpc_testlib_server"); - if (!options.env->FileExists(server_file).ok()) { - return errors::Internal("Could not find grpc_testlib_server"); - } const std::vector argv( - {server_file, + {strings::StrCat(testing::TensorFlowSrcRoot(), + "/core/distributed_runtime/rpc/grpc_testlib_server"), /* see grpc_testlib_server.cc for flags */ tf_jobs, "--tf_job=localhost", strings::StrCat("--tf_task=", i), strings::StrCat("--num_cpus=", num_cpus), diff --git a/tensorflow/core/framework/allocator.h b/tensorflow/core/framework/allocator.h index 2bb4d32d57..2c87156dca 100644 --- a/tensorflow/core/framework/allocator.h +++ b/tensorflow/core/framework/allocator.h @@ -67,8 +67,13 @@ struct AllocatorStats { // device memory. class Allocator { public: +#ifdef EIGEN_VECTORIZE_AVX512 // Align to 64 byte boundary. static constexpr size_t kAllocatorAlignment = 64; +#else + // Align to 32 byte boundary. + static constexpr size_t kAllocatorAlignment = 32; +#endif virtual ~Allocator(); diff --git a/tensorflow/core/framework/op_gen_lib.cc b/tensorflow/core/framework/op_gen_lib.cc index 4b56d807df..3d7920a6e2 100644 --- a/tensorflow/core/framework/op_gen_lib.cc +++ b/tensorflow/core/framework/op_gen_lib.cc @@ -15,7 +15,6 @@ limitations under the License. #include "tensorflow/core/framework/op_gen_lib.h" -#include #include #include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/lib/core/errors.h" diff --git a/tensorflow/core/framework/remote_fused_graph_execute_info.proto b/tensorflow/core/framework/remote_fused_graph_execute_info.proto index 10072724d2..eb689ec1e6 100644 --- a/tensorflow/core/framework/remote_fused_graph_execute_info.proto +++ b/tensorflow/core/framework/remote_fused_graph_execute_info.proto @@ -5,7 +5,7 @@ option cc_enable_arenas = true; option java_outer_classname = "RemoteFusedGraphExecuteInfoProto"; option java_multiple_files = true; option java_package = "org.tensorflow.framework"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework"; +//add go_package externally import "tensorflow/core/framework/graph.proto"; import "tensorflow/core/framework/tensor_shape.proto"; import "tensorflow/core/framework/types.proto"; diff --git a/tensorflow/core/framework/tensor_test.cc b/tensorflow/core/framework/tensor_test.cc index 80e168df97..b613effd18 100644 --- a/tensorflow/core/framework/tensor_test.cc +++ b/tensorflow/core/framework/tensor_test.cc @@ -1147,29 +1147,29 @@ TEST(Tensor, FailureToAllocate) { // On the alignment. // -// As of 2018/5, tensorflow::Tensor allocates its buffer with 64-byte +// As of 2015/8, tensorflow::Tensor allocates its buffer with 32-byte // alignment. Tensor::tensor/flat/vec/matrix methods requires the // buffer satisfies Eigen::Aligned (e.g., 16-bytes aligned usually, -// 32-bytes for AVX, and 64-bytes for AVX512). Tensor::Slice requires -// the caller to ensure its result is aligned if the caller intends -// to use those methods. In this test case, we simply make sure each -// slice is 64-byte aligned: sizeof(float) * 4 * 36 = 576. 576 % 64 = 0. +// and 32-bytes for AVX). Tensor::Slice requires the caller to ensure +// its result is aligned if the caller intends to use those methods. +// In this test case, we simply make sure each slice is 32-byte +// aligned: sizeof(float) * 4 * 2 = 32. TEST(Tensor, Slice_Basic) { Tensor saved; { // General - Tensor x(DT_FLOAT, TensorShape({10, 4, 36})); + Tensor x(DT_FLOAT, TensorShape({10, 4, 34})); // Fills in known values. for (int i = 0; i < 10; ++i) { x.Slice(i, i + 1).flat().setConstant(i * 1.f); } // A simple slice along dim0. Tensor y = x.Slice(4, 8); - EXPECT_TRUE(y.shape().IsSameSize(TensorShape({4, 4, 36}))); + EXPECT_TRUE(y.shape().IsSameSize(TensorShape({4, 4, 34}))); auto tx = x.tensor(); auto ty = y.tensor(); for (int i = 0; i < 4; ++i) { for (int j = 0; j < 4; ++j) { - for (int k = 0; k < 36; ++k) { + for (int k = 0; k < 34; ++k) { EXPECT_EQ(ty(i, j, k), 4.0 + i); EXPECT_EQ(&tx(4 + i, j, k), &ty(i, j, k)); } @@ -1186,7 +1186,7 @@ TEST(Tensor, Slice_Basic) { auto tz = z.tensor(); EXPECT_EQ(1, z.dim_size(0)); for (int j = 0; j < 4; ++j) { - for (int k = 0; k < 36; ++k) { + for (int k = 0; k < 34; ++k) { EXPECT_EQ(tz(0, j, k), 6.0); } } @@ -1198,16 +1198,16 @@ TEST(Tensor, Slice_Basic) { EXPECT_EQ(1, saved.dim_size(0)); auto tsaved = saved.tensor(); for (int j = 0; j < 4; ++j) { - for (int k = 0; k < 36; ++k) { + for (int k = 0; k < 34; ++k) { EXPECT_EQ(tsaved(0, j, k), 6.0); } } } { // Empty - Tensor x(DT_FLOAT, TensorShape({10, 0, 36})); + Tensor x(DT_FLOAT, TensorShape({10, 0, 34})); x.flat().setRandom(); Tensor y = x.Slice(4, 8); - EXPECT_TRUE(y.shape().IsSameSize(TensorShape({4, 0, 36}))); + EXPECT_TRUE(y.shape().IsSameSize(TensorShape({4, 0, 34}))); } { diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index b9667998d6..72a13d4da7 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -2691,14 +2691,14 @@ class MklLayoutRewritePass : public GraphOptimizationPass { // If Op has been specifically assigned to a non-CPU device, then No. if (!n->assigned_device_name().empty() && - !str_util::StrContains(n->assigned_device_name(), kCPUDeviceSubStr)) { + !str_util::StrContains(n->assigned_device_name(),kCPUDeviceSubStr)) { result = false; reason = "Op has been assigned a runtime device that is not CPU."; } // If user has specifically assigned this op to a non-CPU device, then No. if (!n->def().device().empty() && - !str_util::StrContains(n->def().device(), kCPUDeviceSubStr)) { + !str_util::StrContains(n->def().device(),kCPUDeviceSubStr)) { result = false; reason = "User has assigned a device that is not CPU."; } @@ -2865,9 +2865,9 @@ class MklLayoutRewritePass : public GraphOptimizationPass { return false; } - // If the depth_radius of LRN is not 2, then MKL DNN takes unoptimized - // path. The unoptimized path is slow. Thus we dont rewrite the node - // and use default Eigen. But for depth_radius=2, MKL DNN optimized + // If the depth_radius of LRN is not 2, then MKL DNN takes unoptimized + // path. The unoptimized path is slow. Thus we dont rewrite the node + // and use default Eigen. But for depth_radius=2, MKL DNN optimized // path is taken, i.e., eigen node is rewritten by MKl DNN node. static bool LrnRewrite(const Node* n) { CHECK_NOTNULL(n); @@ -2876,13 +2876,13 @@ class MklLayoutRewritePass : public GraphOptimizationPass { CHECK_EQ(GetNodeAttr(n->def(), "depth_radius", &depth_radius).ok(), true); // if the depth_radius of LRN is not 2, don't rewrite the node by MKL DNN - // and use eigen node instead + // and use eigen node instead if (depth_radius == 2) { return true; } VLOG(1) << "LrnRewrite: The model sets depth_radius as not 2 which" << "case is not optimized by Intel MKL, thus using Eigen op" - << "for LRN "; + << "for LRN " ; return false; } @@ -3015,35 +3015,6 @@ class MklLayoutRewritePass : public GraphOptimizationPass { std::vector* ws_tensors, bool* are_ws_tensors_added); - // Helper function used by FixMklMetaDataEdges. Fixes the metadata edge - // pointed by 'e_metadata' corresponding to the data edge 'e_data' in graph - // 'g'. Returns true is fixup was done; otherwise, it returns false. - bool FixMklMetaDataEdgeIfNeeded(std::unique_ptr* g, - const Edge* e_data, const Edge* e_metadata); - - // Are the input Mkl metadata edges for node 'n' in graph 'g' correctly - // connected? If not, then fix them. This is needed because a graph may have - // some input Mkl metadata edges incorrectly setup after node merge and - // rewrite passes. This could happen because GetReversePostOrder function may - // not provide topologically sorted order if a graph contains cycles. The - // function returns true if at least one Mkl metadata edge for node 'n' was - // fixed. Otherwise, it returns false. - // - // Example: - // - // X = MklConv2D(_, _, _) - // Y = MklConv2DWithBias(_, _, _, _, _, _) - // Z = MklAdd(X, Y, DummyMklTensor, Y:1) - // - // For a graph such as shown above, note that 3rd argument of MklAdd contains - // DummyMklTensor. Actually, it should be getting the Mkl metadata from - // MklConv2D op (specifically, X:2). This incorrect plumbing could be possible - // (although rare) if the Mkl NodeMerge + NodeRewrite passes visit Z before X - // (possible if X, Y, Z are part of a loop.) This function fixes the Mkl - // metadata edges only - it does not rewrite nodes nor does it modify the Mkl - // data edges (1st and 2nd arguments of MklAdd). - bool FixMklMetaDataEdges(std::unique_ptr* g, Node* n); - // Functions specific to operators to copy attributes // We need operator-specific function to copy attributes because the framework // does not provide any generic function for it. @@ -4270,92 +4241,6 @@ MklLayoutRewritePass::CheckForNodeRewrite(const Node* n) const { return nullptr; } -/////////////////////////////////////////////////////////////////////////////// -// Post-rewrite Mkl metadata fixup pass -/////////////////////////////////////////////////////////////////////////////// -bool MklLayoutRewritePass::FixMklMetaDataEdgeIfNeeded(std::unique_ptr* g, - const Edge* e_data, const Edge* e_metadata) { - if (g == nullptr || e_data == nullptr || e_metadata == nullptr) { - return false; - } - - Node* n_data = e_data->src(); - int n_data_op_slot = e_data->src_output(); - int n_metadata_op_slot = GetTensorMetaDataIndex(n_data_op_slot, - n_data->num_outputs()); - - // If the source of meta edge is a constant node (producing dummy Mkl metadata - // tensor), then we will need to fix. - if (IsConstant(e_metadata->src())) { - Node* e_metadata_dst = e_metadata->dst(); - int e_metadata_in_slot = e_metadata->dst_input(); - CHECK_NOTNULL((*g)->AddEdge(n_data, n_metadata_op_slot, - e_metadata_dst, e_metadata_in_slot)); - - (*g)->RemoveEdge(e_metadata); - return true; - } - - return false; -} - -bool MklLayoutRewritePass::FixMklMetaDataEdges(std::unique_ptr* g, - Node* n) { - bool result = false; - - // If graph node is not Mkl node, then return. - DataType T = DT_INVALID; - if (!GetNodeAttr(n->def(), "T", &T).ok() || - !mkl_op_registry::IsMklOp(n->type_string(), T)) { - return result; - } - - // If it is Mkl node, then check if the input edges to this node that carry - // Mkl metadata are linked up correctly with the source node. - - // For Mkl nodes, we generate twice the number of input tensors (n for Mkl - // data tensors + n for Mkl metadata tensors). We need to check for correct - // connection of n metadata tensors only. - int num_data_inputs = n->num_inputs() / 2; - for (int idx = 0; idx < num_data_inputs; idx++) { - // Get the edge connecting input slot with index (idx). - const Edge* e = nullptr; - TF_CHECK_OK(n->input_edge(idx, &e)); - - // If e is control edge, then skip. - if (e->IsControlEdge()) { - continue; - } - - // Check that the source node for edge 'e' is Mkl node. If it is not an Mkl - // node, then we don't need to do anything. - Node* e_src = e->src(); - if (GetNodeAttr(e_src->def(), "T", &T).ok() && - mkl_op_registry::IsMklOp(e_src->type_string(), T)) { - // Source node for edge 'e' is Mkl node. - // Destination node and destination input slot of e is node 'n' and 'idx' - // resp. - CHECK_EQ(e->dst(), n); - CHECK_EQ(e->dst_input(), idx); - - // Let's get edge that carries Mkl metadata corresponding to Mkl data edge - // 'e'. For that, let's first get the input slot of 'n' where the meta - // edge will feed the value. - int e_meta_in_slot = GetTensorMetaDataIndex(e->dst_input(), - n->num_inputs()); - const Edge* e_meta = nullptr; - TF_CHECK_OK(n->input_edge(e_meta_in_slot, &e_meta)); - - // Let's check if we need to fix this meta edge. - if (FixMklMetaDataEdgeIfNeeded(g, e, e_meta)) { - result = true; - } - } - } - - return result; -} - /////////////////////////////////////////////////////////////////////////////// // Run function for the pass /////////////////////////////////////////////////////////////////////////////// @@ -4422,25 +4307,6 @@ bool MklLayoutRewritePass::RunPass(std::unique_ptr* g) { DumpGraph("After running MklLayoutRewritePass(NodeMerge+Rewrite)", &**g); - order.clear(); - GetReversePostOrder(**g, &order); // This will give us topological sort. - for (Node* n : order) { - // If node is not an op or it cannot run on CPU device, then skip. - if (!n->IsOp() || !CanOpRunOnCPUDevice(n)) { - continue; - } - if (FixMklMetaDataEdges(g, n)) { - string node_name = n->name(); - string op_name = n->type_string(); - - VLOG(1) << "MklLayoutRewritePass: fixed metadata edges for node " - << node_name << " with op " << op_name; - result = true; - } - } - DumpGraph("After running MklLayoutRewritePass(NodeMerge+Rewrite+Fixup)", - &**g); - return result; } diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc index 7645b4a7f0..029cdcf94a 100644 --- a/tensorflow/core/graph/mkl_layout_pass_test.cc +++ b/tensorflow/core/graph/mkl_layout_pass_test.cc @@ -3518,37 +3518,6 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_DeviceTest) { "B->C:1;C->E;D->E:1;E->Z;M->C:2;N->C:3;Y->Z:1"); } -///////////////////////////////////////////////////////////////////// -// Post-rewrite fixup pass test - -TEST_F(MklLayoutPassTest, PostRewriteFixUpPass) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'M' op: '_MklInput'}" - "node { name: 'N' op: '_MklInput'}" - "node { name: 'C' op: '_MklConv2D'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" - " input: ['A', 'B', 'M', 'N']}" - "node { name: 'D' op: 'Const' " - " attr { key: 'dtype' value { type: DT_UINT8 } }" - " attr { key: 'value' value { " - " tensor { dtype: DT_UINT8 tensor_shape { dim { size: 1 } } " - " int_val: 0 } } } }" - "node { name: 'E' op: '_MklAdd'" - " attr {key: 'T' value { type: DT_FLOAT } }" - " input: ['C', 'A', 'D', 'D']}"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(_MklConv2D);D(Const);E(_MklAdd);" - "M(_MklInput);N(_MklInput)|A->C;A->E:1;B->C:1;C->E;C:2->E:2;" - "D->E:3;M->C:2;N->C:3"); -} - ///////////////////////////////////////////////////////////////////// static void BM_MklLayoutRewritePass(int iters, int op_nodes) { diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc index 0c02876ac5..6749a7c571 100644 --- a/tensorflow/core/grappler/costs/graph_properties.cc +++ b/tensorflow/core/grappler/costs/graph_properties.cc @@ -610,6 +610,7 @@ class SymbolicShapeRefiner { } }; + // Compute the shape of the tensors outputed by node 'node' at output port // 'port_index' as the union of shape1 and shape2. ShapeHandle OutputAsUnion(const NodeDef* node, int port_index, ShapeHandle shape1, ShapeHandle shape2) { diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 8ca726df0b..1b18087cdf 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -679,7 +679,6 @@ cc_library( deps = [ ":constant_folding", ":graph_optimizer", - "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core/grappler:graph_view", "//tensorflow/core/grappler:grappler_item", @@ -781,6 +780,7 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", + "//tensorflow/core:scoped_allocator_ops_op_lib", "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:op_types", "//tensorflow/core/grappler:utils", diff --git a/tensorflow/core/grappler/optimizers/remapper.cc b/tensorflow/core/grappler/optimizers/remapper.cc index 03e36a7b9c..4dde7ed1b4 100644 --- a/tensorflow/core/grappler/optimizers/remapper.cc +++ b/tensorflow/core/grappler/optimizers/remapper.cc @@ -22,7 +22,6 @@ limitations under the License. #include "tensorflow/core/grappler/op_types.h" #include "tensorflow/core/grappler/optimizers/constant_folding.h" #include "tensorflow/core/grappler/utils.h" -#include "tensorflow/core/platform/logging.h" namespace tensorflow { namespace grappler { @@ -201,7 +200,8 @@ Status Remapper::Optimize(Cluster* /*cluster*/, const GrapplerItem& item, } } if (optimizable) { - VLOG(1) << "Optimizing fused batch norm node " << node.DebugString(); + VLOG(2) << "Optimizing fused batch norm node " << node.DebugString() + << std::endl; AddBatchNormNodes(optimized_graph, node); continue; } diff --git a/tensorflow/core/kernels/as_string_op.cc b/tensorflow/core/kernels/as_string_op.cc index a7757d1361..66c4aff3e3 100644 --- a/tensorflow/core/kernels/as_string_op.cc +++ b/tensorflow/core/kernels/as_string_op.cc @@ -73,7 +73,6 @@ class AsStringOp : public OpKernel { } switch (dtype) { case DT_INT8: - case DT_INT16: case DT_INT32: strings::Appendf(&format_, "d"); break; @@ -130,7 +129,6 @@ class AsStringOp : public OpKernel { ENCODE_TYPE(DT_FLOAT, float, format_); ENCODE_TYPE(DT_DOUBLE, double, format_); ENCODE_TYPE(DT_INT8, int8, format_); - ENCODE_TYPE(DT_INT16, int16, format_); case (DT_BOOL): { const auto& input_flat = input_tensor->flat(); for (int i = 0; i < input_flat.size(); ++i) { diff --git a/tensorflow/core/kernels/cwise_op_clip.cc b/tensorflow/core/kernels/cwise_op_clip.cc index 49b90e855b..14d889e8e3 100644 --- a/tensorflow/core/kernels/cwise_op_clip.cc +++ b/tensorflow/core/kernels/cwise_op_clip.cc @@ -33,41 +33,52 @@ class ClipOp : public OpKernel { const Tensor& in0 = ctx->input(0); const Tensor& in1 = ctx->input(1); const Tensor& in2 = ctx->input(2); - OP_REQUIRES(ctx, (in0.shape() == in1.shape() || - TensorShapeUtils::IsScalar(in1.shape())) && - (in0.shape() == in2.shape() || - TensorShapeUtils::IsScalar(in2.shape())), - errors::InvalidArgument( - "clip_value_min and clip_value_max must be either of " - "the same shape as input, or a scalar. ", - "input shape: ", in0.shape().DebugString(), - "clip_value_min shape: ", in1.shape().DebugString(), - "clip_value_max shape: ", in2.shape().DebugString())); - - Tensor* out = nullptr; - OP_REQUIRES_OK( - ctx, ctx->forward_input_or_allocate_output({0}, 0, in0.shape(), &out)); - if (out->NumElements() == 0) return; // Nothing to do for empty output auto in0_flat = in0.flat(); auto in1_flat = in1.flat(); auto in2_flat = in2.flat(); - auto out_flat = out->flat(); const Device& d = ctx->eigen_device(); + Tensor* out = nullptr; + OP_REQUIRES_OK( + ctx, ctx->forward_input_or_allocate_output({0}, 0, in0.shape(), &out)); + auto out_flat = out->flat(); if (in1.shape() == in2.shape()) { if (in0.shape() == in1.shape()) { functor::TernaryClipOp()(d, in0_flat, in1_flat, in2_flat, out_flat); } else { + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(in1.shape()), + errors::InvalidArgument( + "clip_value_min and clip_value_max must be either of " + "the same shape as input, or a scalar. ", + "input shape: ", in0.shape().DebugString(), + "clip_value_min shape: ", in1.shape().DebugString(), + "clip_value_max shape: ", in2.shape().DebugString())); functor::UnaryClipOp()(d, in0_flat, in1_flat, in2_flat, out_flat); } } else { if (in0.shape() == in1.shape()) { + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(in2.shape()), + errors::InvalidArgument( + "clip_value_min and clip_value_max must be either of " + "the same shape as input, or a scalar. ", + "input shape: ", in0.shape().DebugString(), + "clip_value_min shape: ", in1.shape().DebugString(), + "clip_value_max shape: ", in2.shape().DebugString())); functor::BinaryLeftClipOp()(d, in0_flat, in1_flat, in2_flat, out_flat); } else { + OP_REQUIRES(ctx, + (in0.shape() == in2.shape() && + TensorShapeUtils::IsScalar(in1.shape())), + errors::InvalidArgument( + "clip_value_min and clip_value_max must be either of " + "the same shape as input, or a scalar. ", + "input shape: ", in0.shape().DebugString(), + "clip_value_min shape: ", in1.shape().DebugString(), + "clip_value_max shape: ", in2.shape().DebugString())); functor::BinaryRightClipOp()(d, in0_flat, in1_flat, in2_flat, out_flat); } diff --git a/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc b/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc index 17a85d9773..9a3b2303a3 100644 --- a/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc +++ b/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc @@ -57,7 +57,6 @@ struct DenseUpdate { template struct functor::DenseUpdate; \ template struct functor::DenseUpdate; TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS); -TF_CALL_int32(DEFINE_GPU_KERNELS); TF_CALL_int64(DEFINE_GPU_KERNELS); #undef DEFINE_GPU_KERNELS diff --git a/tensorflow/core/kernels/gather_functor.cc b/tensorflow/core/kernels/gather_functor.cc index 5cd8e04927..e6fefe643b 100644 --- a/tensorflow/core/kernels/gather_functor.cc +++ b/tensorflow/core/kernels/gather_functor.cc @@ -37,7 +37,6 @@ namespace functor { DECLARE_GPU_SPECS_INDEX(T, int32); \ DECLARE_GPU_SPECS_INDEX(T, int64) -TF_CALL_int64(DECLARE_GPU_SPECS); TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS); TF_CALL_complex64(DECLARE_GPU_SPECS); TF_CALL_complex128(DECLARE_GPU_SPECS); diff --git a/tensorflow/core/kernels/gather_functor_gpu.cu.cc b/tensorflow/core/kernels/gather_functor_gpu.cu.cc index 4563fc6353..39b6924d74 100644 --- a/tensorflow/core/kernels/gather_functor_gpu.cu.cc +++ b/tensorflow/core/kernels/gather_functor_gpu.cu.cc @@ -31,7 +31,6 @@ typedef Eigen::GpuDevice GPUDevice; DEFINE_GPU_SPECS_INDEX(T, int32); \ DEFINE_GPU_SPECS_INDEX(T, int64); -TF_CALL_int64(DEFINE_GPU_SPECS); TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_SPECS); TF_CALL_complex64(DEFINE_GPU_SPECS); TF_CALL_complex128(DEFINE_GPU_SPECS); diff --git a/tensorflow/core/kernels/gather_nd_op.cc b/tensorflow/core/kernels/gather_nd_op.cc index 4e53291b7f..7e5a9e1ec5 100644 --- a/tensorflow/core/kernels/gather_nd_op.cc +++ b/tensorflow/core/kernels/gather_nd_op.cc @@ -228,8 +228,6 @@ namespace functor { DECLARE_GPU_SPECS_INDEX(T, int32); \ DECLARE_GPU_SPECS_INDEX(T, int64) -TF_CALL_int32(DECLARE_GPU_SPECS); -TF_CALL_int64(DECLARE_GPU_SPECS); TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS); TF_CALL_complex64(DECLARE_GPU_SPECS); TF_CALL_complex128(DECLARE_GPU_SPECS); @@ -241,8 +239,6 @@ TF_CALL_complex128(DECLARE_GPU_SPECS); // Registration of the GPU implementations. #define REGISTER_GATHER_ND_GPU(type) REGISTER_GATHER_ND_ALL_INDICES(GPU, type) -TF_CALL_int32(REGISTER_GATHER_ND_GPU); -TF_CALL_int64(REGISTER_GATHER_ND_GPU); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GATHER_ND_GPU); TF_CALL_complex64(REGISTER_GATHER_ND_GPU); TF_CALL_complex128(REGISTER_GATHER_ND_GPU); diff --git a/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc b/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc index da8d2e9e3c..b03efc684f 100644 --- a/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc +++ b/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc @@ -119,8 +119,6 @@ struct GatherNdSlice { DEFINE_GPU_SPECS_INDEX(T, int32); \ DEFINE_GPU_SPECS_INDEX(T, int64); -TF_CALL_int32(DEFINE_GPU_SPECS); -TF_CALL_int64(DEFINE_GPU_SPECS); TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_SPECS); TF_CALL_complex64(DEFINE_GPU_SPECS); TF_CALL_complex128(DEFINE_GPU_SPECS); diff --git a/tensorflow/core/kernels/gather_op.cc b/tensorflow/core/kernels/gather_op.cc index 094504d6b9..ef332ebee3 100644 --- a/tensorflow/core/kernels/gather_op.cc +++ b/tensorflow/core/kernels/gather_op.cc @@ -153,7 +153,6 @@ TF_CALL_uint64(REGISTER_GATHER_CPU); // Registration of the GPU implementations. #define REGISTER_GATHER_GPU(type) REGISTER_GATHER_ALL_INDICES(GPU, type) -TF_CALL_int64(REGISTER_GATHER_GPU); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GATHER_GPU); TF_CALL_complex64(REGISTER_GATHER_GPU); TF_CALL_complex128(REGISTER_GATHER_GPU); diff --git a/tensorflow/core/kernels/mkl_concat_op.cc b/tensorflow/core/kernels/mkl_concat_op.cc index 31d1b949ef..5eeb23d810 100644 --- a/tensorflow/core/kernels/mkl_concat_op.cc +++ b/tensorflow/core/kernels/mkl_concat_op.cc @@ -14,7 +14,6 @@ limitations under the License. #include #include -#include #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/op_kernel.h" @@ -591,8 +590,8 @@ class MklConcatOp : public OpKernel { const int N = input_tensors.size(); // Get Tensor shapes. - std::vector mkl_input_shapes(N); - GetMklShapeList(context, "values", &mkl_input_shapes); + std::vector input_shapes(N); + GetMklShapeList(context, "values", &input_shapes); const Tensor& concat_dim_tensor = (AxisArgName == NAME_IS_CONCAT_DIM) ? MklGetInput(context, 0) @@ -611,14 +610,19 @@ class MklConcatOp : public OpKernel { int i = 0; bool invoke_eigen = false; bool are_all_mkl_inputs = true, are_all_tf_inputs = true; - const TensorShape expected_shape = mkl_input_shapes[0].IsMklTensor() - ? mkl_input_shapes[0].GetTfShape() - : input_tensors[0].shape(); + const TensorShape expected_shape = input_shapes[0].IsMklTensor() + ? input_shapes[0].GetTfShape() + : input_tensors[0].shape(); size_t expected_dims = expected_shape.dims(); if (concat_dim < 0) concat_dim = expected_dims + concat_dim; - for (auto& s : mkl_input_shapes) { + for (auto& s : input_shapes) { + if (s == expected_shape) { + ++i; + continue; + } + TensorShape s_shape = s.IsMklTensor() ? s.GetTfShape() : input_tensors[i].shape(); size_t s_dims = s_shape.dims(); @@ -661,14 +665,21 @@ class MklConcatOp : public OpKernel { // Call Eigen library if (invoke_eigen) { - CallEigenVersion(context, input_tensors, mkl_input_shapes); + TensorShapeList tf_input_shapes; + i = 0; + for (auto& s : input_shapes) { + TensorShape s_shape = + s.IsMklTensor() ? s.GetTfShape() : input_tensors[i].shape(); + tf_input_shapes.push_back(s_shape); + ++i; + } + CallEigenVersion(context, input_tensors, tf_input_shapes); return; } memory::dims dst_dims; - if (are_all_mkl_inputs) - dst_dims = TFShapeToMklDnnDims(mkl_input_shapes[0].GetTfShape()); + dst_dims = TFShapeToMklDnnDims(input_shapes[0].GetTfShape()); else // When all the inputs are in Tensorflow format, we don't know // what is the input data format. In that case, we just use @@ -678,61 +689,26 @@ class MklConcatOp : public OpKernel { std::vector srcs_pd; std::vector> srcs(N, MklDnnData(&cpu_engine)); int64 dst_concat_dim_size = 0; - - bool isMklReorderNeeded = false; - memory::format mkl_common_format = memory::format::any; - if (are_all_mkl_inputs) { - mkl_common_format = - FindMklCommonFormat(mkl_input_shapes, concat_dim, - &isMklReorderNeeded, &dst_concat_dim_size); - - if (!isMklReorderNeeded) { - // All MKL tensors have a same format. Reorder is not needed. - for (int k = 0; k < N; k++) { - if (input_tensors[k].NumElements() == 0) - continue; - - auto src_md = mkl_input_shapes[k].GetMklLayout(); - srcs[k].SetUsrMem(src_md, &input_tensors[k]); - auto src_mpd = srcs[k].GetUsrMemPrimDesc(); - srcs_pd.push_back(src_mpd); - } - } else { - // MKL tensors have different formats. - // Reorder them to most common format. - for (int k = 0; k < N; k++) { - if (input_tensors[k].NumElements() == 0) - continue; - - auto src_dims = TFShapeToMklDnnDims( - mkl_input_shapes[k].GetTfShape()); - auto src_md = mkl_input_shapes[k].GetMklLayout(); - srcs[k].SetUsrMem(src_md, &input_tensors[k]); - - if (src_md.data.format != mkl_common_format) - src_md = memory::desc(src_dims, MklDnnType(), - mkl_common_format); - - srcs_pd.push_back(memory::primitive_desc(src_md, cpu_engine)); - } - } - } else { // All TF inputs - for (int k = 0; k < N; k++) { - if (input_tensors[k].NumElements() == 0) - continue; - - memory::dims src_dims = TFShapeToMklDnnDims(input_tensors[k].shape()); - dst_concat_dim_size += src_dims[concat_dim]; - - // It does not matter what data format to be used (NHWC versus NCHW). - // We just need to ensure that output uses same data format as inputs. - auto src_md = - memory::desc(src_dims, MklDnnType(), memory::format::nchw); - - srcs[k].SetUsrMem(src_md, &input_tensors[k]); - auto src_mpd = srcs[k].GetUsrMemPrimDesc(); - srcs_pd.push_back(src_mpd); - } + for (int k = 0; k < N; k++) { + bool is_mkl_tensor = input_shapes[k].IsMklTensor(); + memory::dims src_dims; + + // Same comment as dst_dims for src_dims. + src_dims = (is_mkl_tensor) + ? TFShapeToMklDnnDims(input_shapes[k].GetTfShape()) + : TFShapeToMklDnnDims(input_tensors[k].shape()); + + dst_concat_dim_size += src_dims[concat_dim]; + auto src_md = + is_mkl_tensor ? input_shapes[k].GetMklLayout() : + // It does not matter what data format we use here + // (NHWC or NCHW). We just need to ensure that output + // of Concat uses same data format as input. + memory::desc(src_dims, MklDnnType(), memory::format::nchw); + + srcs[k].SetUsrMem(src_md, &input_tensors[k]); + auto src_mpd = srcs[k].GetUsrMemPrimDesc(); + srcs_pd.push_back(src_mpd); } dst_dims[concat_dim] = dst_concat_dim_size; @@ -742,33 +718,25 @@ class MklConcatOp : public OpKernel { if (are_all_mkl_inputs) { // Since we are passing a specific format for destination, // we need to have dst_dims in MklDnn order (NCHW). - auto orig_tf_format = mkl_input_shapes[0].GetTfDataFormat(); + auto orig_tf_format = input_shapes[0].GetTfDataFormat(); dst_dims_in_nchw = MklDnnDimsInNCHW( dst_dims, MklDnnDataFormatToTFDataFormat(orig_tf_format)); - // Set the output format same as the most common format of inputs - // to avoid layout conversions. + // We will set the output in the same format as input to avoid layout + // conversions. + // Currently we are setting dst format same as input format. + // See if we can make this choice in a better way. dst_md = memory::desc( - dst_dims_in_nchw, MklDnnType(), mkl_common_format); + dst_dims_in_nchw, MklDnnType(), + (memory::format)input_shapes[0].GetMklLayout().data.format); } else { - // All inputs are TF tensors. - // Set the output format same as input format (nchw). + // Again, format does not matter here. We just need to make it same as + // input format. dst_md = memory::desc(dst_dims, MklDnnType(), memory::format::nchw); } std::vector inputs; - std::vector net; - if (isMklReorderNeeded) { - for (int k = 0; k < input_tensors.size(); k++) { - if (input_tensors[k].NumElements() > 0) { - srcs[k].CheckReorderToOpMem(srcs_pd[k], &net); - } - } - } - for (int k = 0; k < input_tensors.size(); k++) { - if (input_tensors[k].NumElements() > 0) { - inputs.push_back(srcs[k].GetOpMem()); - } - } + for (int k = 0; k < input_tensors.size(); k++) + inputs.push_back(srcs[k].GetOpMem()); // If all inputs are in MKL format, then meaning of concat_dim needs to // change. Value of concat_dim is tied to input Tensorflow data format @@ -777,8 +745,7 @@ class MklConcatOp : public OpKernel { // But ifinput tensors are in NHWC order, then semantics need to change. // E.g., if we are concatinating over Channel (dimension 3 for NHWC), // then since MklDnn order is NCHW, concat_dim needs to be 1. - if (are_all_mkl_inputs) - concat_dim = mkl_input_shapes[0].TfDimIdx(concat_dim); + if (are_all_mkl_inputs) concat_dim = input_shapes[0].TfDimIdx(concat_dim); auto concat_pd = concat::primitive_desc(dst_md, concat_dim, srcs_pd); @@ -791,7 +758,7 @@ class MklConcatOp : public OpKernel { dnn_shape_dst.SetMklLayout(&dst_pd); dnn_shape_dst.SetElemType(MklDnnType()); dnn_shape_dst.SetTfLayout(dst_dims.size(), dst_dims_in_nchw, - mkl_input_shapes[0].GetTfDataFormat()); + input_shapes[0].GetTfDataFormat()); tf_shape_dst.AddDim((dst_pd.get_size() / sizeof(T))); } else { dnn_shape_dst.SetMklTensor(false); @@ -806,6 +773,7 @@ class MklConcatOp : public OpKernel { dst.SetUsrMem(dst_md, dst_tensor); auto concat_op = concat(concat_pd, inputs, dst.GetOpMem()); + std::vector net; net.push_back(concat_op); stream(stream::kind::eager).submit(net).wait(); } catch (mkldnn::error& e) { @@ -819,27 +787,15 @@ class MklConcatOp : public OpKernel { } void CallEigenVersion(OpKernelContext* context, const OpInputList& values, - const MklDnnShapeList& mkl_input_shapes) { - CHECK_EQ(values.size(), mkl_input_shapes.size()); + const TensorShapeList& input_shapes) { + CHECK_EQ(values.size(), input_shapes.size()); std::vector converted_values; - TensorShapeList tf_input_shapes; - for (int i = 0; i < mkl_input_shapes.size(); i++) { - if (mkl_input_shapes[i].IsMklTensor()) { - // do conversion from MKL to TF - Tensor tmp_tensor = - ConvertMklToTF(context, values[i], mkl_input_shapes[i]); - converted_values.push_back(tmp_tensor); - tf_input_shapes.push_back(mkl_input_shapes[i].GetTfShape()); - } else { - // no conversion since it is TF tensor already - converted_values.push_back(values[i]); - tf_input_shapes.push_back(values[i].shape()); - } - } + for (int i = 0; i < input_shapes.size(); i++) + converted_values.push_back(values[i]); // Call Eigen concat. - eigen_concat_op_.Compute(context, converted_values, tf_input_shapes); + eigen_concat_op_.Compute(context, converted_values, input_shapes); // Set output Mkl tensor for this op. MklDnnShape dnn_shape_output; @@ -856,55 +812,6 @@ class MklConcatOp : public OpKernel { output_tensor->flat().data(), output_tensor->flat().size() * sizeof(uint8)); } - - // This method finds the most commom format accross all MKL inputs - // Inputs: - // 1. input_shapes: shapes of input (MKL) tensors. - // 2. concat_dim: concat dimension. - // Outputs: - // 1. is_reorder_needed is set to true if inputs have difference formats - // It is set to false otherwise. - // 2. concat_dim_size is the size of concat_dim. - // Return: - // return the common MKL format. - memory::format FindMklCommonFormat(const MklDnnShapeList& input_shapes, - int concat_dim, bool* is_reorder_needed, int64* concat_dim_size) { - *is_reorder_needed = false; - *concat_dim_size = 0; - std::unordered_map occurrence_map; - if (input_shapes.size() == 0) - return memory::format::any; - - // Compute ocurrences of each format of all inputs. - for (int k=0; k ( - input_shapes[k].GetMklLayout().data.format); - occurrence_map[fmt] += 1; - } - - if (occurrence_map.size() == 1) { - // this means that all inputs have a same format - // return it with is_reorder_needed set false. - return static_cast( - input_shapes[0].GetMklLayout().data.format); - } - - // Input tensors have different formats. Thus, reorder is needed. - // We pick up the most common format to minimize the total - // number of input reorder. - memory::format commonest_format = memory::format::any; - int max_occurrence = 0; - *is_reorder_needed = true; - for (auto item : occurrence_map) { - if (item.second > max_occurrence) { - commonest_format = static_cast(item.first); - max_occurrence = item.second; - } - } - return commonest_format; - } }; #endif diff --git a/tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc index f857be6c32..c1da0ded1d 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc @@ -18,7 +18,6 @@ limitations under the License. // bias. #ifdef INTEL_MKL -#ifdef INTEL_MKL_ML #define USE_EIGEN_TENSOR #define EIGEN_USE_THREADS @@ -265,5 +264,4 @@ class MklConv2DCustomBackpropBiasOp : public OpKernel { TF_CALL_float(REGISTER_CPU_KERNELS); #undef REGISTER_CPU_KERNELS } /* namespace tensorflow */ -#endif /* INTEL_MKL_ML */ #endif /* INTEL_MKL */ diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.h b/tensorflow/core/kernels/mkl_pooling_ops_common.h index c0dfed7d7d..279167aba2 100644 --- a/tensorflow/core/kernels/mkl_pooling_ops_common.h +++ b/tensorflow/core/kernels/mkl_pooling_ops_common.h @@ -199,15 +199,13 @@ class MklPoolingForwardOpBase : public MklPoolingOpBase { CHECK_NOTNULL(pool_params); CHECK_NOTNULL(dnn_data_input); TensorShape input_tensor_shape = input_tensor.shape(); - if (input_tensor.NumElements() != 0) { - memory::desc input_md = + memory::desc input_md = input_mkl_shape.IsMklTensor() ? input_mkl_shape.GetMklLayout() : memory::desc(TFShapeToMklDnnDimsInNCHW(input_tensor_shape, this->data_format_tf_), MklDnnType(), this->data_format_mkldnn_); - dnn_data_input->SetUsrMem(input_md, &input_tensor); - } + dnn_data_input->SetUsrMem(input_md, &input_tensor); this->InitMklPoolParameters(context, pool_params, input_mkl_shape, input_tensor_shape); } diff --git a/tensorflow/core/kernels/scatter_nd_op.cc b/tensorflow/core/kernels/scatter_nd_op.cc index e1fc2ea128..43c5b29509 100644 --- a/tensorflow/core/kernels/scatter_nd_op.cc +++ b/tensorflow/core/kernels/scatter_nd_op.cc @@ -292,7 +292,6 @@ TF_CALL_string(REGISTER_SCATTER_ND_CPU); REGISTER_SCATTER_ND_UPDATE_GPU(type); \ REGISTER_SCATTER_ND_GPU(type); -TF_CALL_int32(REGISTER_SCATTER_ND_ALL_GPU); // TODO(b/66916790): Support half types in ScatterNd. TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ND_ALL_GPU); TF_CALL_complex64(REGISTER_SCATTER_ND_ALL_GPU); @@ -307,8 +306,6 @@ TF_CALL_complex128(REGISTER_SCATTER_ND_ALL_GPU); #define REGISTER_SCATTER_ND_UPDATE_SYCL(type) \ REGISTER_SCATTER_ND_UPDATE(type, SYCL); -TF_CALL_int32(REGISTER_SCATTER_ND_ADD_SUB_SYCL); -TF_CALL_int32(REGISTER_SCATTER_ND_UPDATE_SYCL); TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ND_ADD_SUB_SYCL); TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ND_UPDATE_SYCL); #undef REGISTER_SCATTER_ND_ADD_SUB_SYCL @@ -579,7 +576,6 @@ namespace functor { DECLARE_GPU_SPECS_INDEX(T, int32); \ DECLARE_GPU_SPECS_INDEX(T, int64) -TF_CALL_int32(DECLARE_GPU_SPECS); // TODO(b/66916790): Support half types in ScatterNd. TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS); TF_CALL_complex64(DECLARE_GPU_SPECS); diff --git a/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc b/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc index 08b657f4c3..a3c21edc15 100644 --- a/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc +++ b/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc @@ -170,7 +170,6 @@ struct ScatterNdFunctor { DECLARE_GPU_SPECS_INDEX(T, int32); \ DECLARE_GPU_SPECS_INDEX(T, int64) -TF_CALL_int32(DECLARE_GPU_SPECS); TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS); TF_CALL_complex64(DECLARE_GPU_SPECS); TF_CALL_complex128(DECLARE_GPU_SPECS); diff --git a/tensorflow/core/kernels/scoped_allocator_ops_test.cc b/tensorflow/core/kernels/scoped_allocator_ops_test.cc index 634f9ba887..bb0129fa6f 100644 --- a/tensorflow/core/kernels/scoped_allocator_ops_test.cc +++ b/tensorflow/core/kernels/scoped_allocator_ops_test.cc @@ -216,13 +216,8 @@ TEST_F(ScopedAllocatorConcatOpTest, Success3) { } TEST_F(ScopedAllocatorConcatOpTest, Reshape) { - MakeOp({2, 2, 4}, DT_DOUBLE, true, "test", 120, 2); - - // The elements of the third parameter to ExecOp must be multiples of - // Allocator::kAllocatorAlignment in size. If they are not, the backing - // tensor allocated by PrepOp will have too many elements and reshaping - // will fail. - ExecOp(DT_DOUBLE, 120, {{2, 4}, {2, 4}}); + MakeOp({2, 2, 2}, DT_DOUBLE, true, "test", 120, 2); + ExecOp(DT_DOUBLE, 120, {{2, 2}, {2, 2}}); } TEST_F(ScopedAllocatorConcatOpTest, NoReshapeAttr) { diff --git a/tensorflow/core/kernels/segment_reduction_ops.h b/tensorflow/core/kernels/segment_reduction_ops.h index d65692a552..7796bf3587 100644 --- a/tensorflow/core/kernels/segment_reduction_ops.h +++ b/tensorflow/core/kernels/segment_reduction_ops.h @@ -16,14 +16,6 @@ limitations under the License. #ifndef TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ #define TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ - -// This file requires the following include because it uses CudaAtomicMax: -// #include "tensorflow/core/util/cuda_kernel_helper.h" - -// Unfortunately we can't add the #include, since it breaks compilation for -// non-GPU targets. This only breaks in clang, because it's more strict for -// template code and CudaAtomicMax is used in template context. - // This file requires the following include because it uses CudaAtomicMax: // #include "tensorflow/core/util/cuda_kernel_helper.h" @@ -138,4 +130,4 @@ struct Highest { } // namespace functor } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ +#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ diff --git a/tensorflow/core/kernels/sparse_matmul_op.cc b/tensorflow/core/kernels/sparse_matmul_op.cc index 866c5dcd52..a1f9667b78 100644 --- a/tensorflow/core/kernels/sparse_matmul_op.cc +++ b/tensorflow/core/kernels/sparse_matmul_op.cc @@ -1490,7 +1490,7 @@ inline void LibxsmmSparseMatMul::Compute( #endif // TENSORFLOW_USE_LIBXSMM -// Here is an overview of the SparseMatMul code. Note that we assume that the +// Here is a an overview of the SparseMatMul code. Note that we assume that the // left matrix is sparse. // // The matrix "left" is divided into a grid with blocksize of (M, KL). Each diff --git a/tensorflow/core/kernels/string_split_op.cc b/tensorflow/core/kernels/string_split_op.cc index 26ab72f12e..4c2b312c34 100644 --- a/tensorflow/core/kernels/string_split_op.cc +++ b/tensorflow/core/kernels/string_split_op.cc @@ -22,7 +22,6 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/strings/str_util.h" namespace tensorflow { @@ -44,63 +43,6 @@ std::vector Split(const string& str, const string& delimiter, return char_vector; } -std::vector SplitV2(const string& str, StringPiece sep, int maxsplit) { - // This SplitV2 method matches the behavior of python's str.split: - // If sep is given, consecutive delimiters are not grouped together - // and are deemed to delimit empty strings (for example, '1,,2'.split(',') - // returns ['1', '', '2']). The sep argument may consist of multiple - // characters (for example, '1<>2<>3'.split('<>') returns ['1', '2', '3']). - // Splitting an empty string with a specified separator returns ['']. - // - // If sep is not specified or is None, a different splitting algorithm is - // applied: runs of consecutive whitespace are regarded as a single - // separator, and the result will contain no empty strings at the start or - // end if the string has leading or trailing whitespace. Consequently, - // splitting an empty string or a string consisting of just whitespace - // with a None separator returns []. - - std::vector result; - - StringPiece text(str); - if (maxsplit == 0) { - result.emplace_back(std::string(text)); - return result; - } - - if (sep.empty()) { - StringPiece token; - // Remove leading whitespaces. - str_util::RemoveLeadingWhitespace(&text); - int split = 0; - while (str_util::ConsumeNonWhitespace(&text, &token)) { - result.emplace_back(std::string(token)); - str_util::RemoveLeadingWhitespace(&text); - ++split; - if (maxsplit > 0 && split == maxsplit) { - result.emplace_back(std::string(text)); - return result; - } - } - return result; - } - auto p = std::search(text.begin(), text.end(), sep.begin(), sep.end()); - int split = 0; - while (p != text.end()) { - StringPiece token = text.substr(0, p - text.begin()); - result.emplace_back(std::string(token)); - text.remove_prefix(token.size()); - text.remove_prefix(sep.size()); - ++split; - if (maxsplit > 0 && split == maxsplit) { - result.emplace_back(std::string(text)); - return result; - } - p = std::search(text.begin(), text.end(), sep.begin(), sep.end()); - } - result.emplace_back(std::string(text)); - return result; -} - } // namespace class StringSplitOp : public OpKernel { @@ -180,78 +122,6 @@ class StringSplitOp : public OpKernel { bool skip_empty_; }; -class StringSplitV2Op : public OpKernel { - public: - explicit StringSplitV2Op(OpKernelConstruction* context) - : OpKernel(context), maxsplit_(-1) { - OP_REQUIRES_OK(context, context->GetAttr("maxsplit", &maxsplit_)); - } - - void Compute(OpKernelContext* ctx) override { - const Tensor* input_tensor; - OP_REQUIRES_OK(ctx, ctx->input("input", &input_tensor)); - OP_REQUIRES(ctx, TensorShapeUtils::IsVector(input_tensor->shape()), - errors::InvalidArgument("input must be a vector, got shape: ", - input_tensor->shape().DebugString())); - - const auto input_vec = input_tensor->vec(); - const int64 batch_size = input_vec.dimension(0); - - const Tensor* sep_tensor; - OP_REQUIRES_OK(ctx, ctx->input("sep", &sep_tensor)); - OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(sep_tensor->shape()), - errors::InvalidArgument("sep must be a scalar, got shape: ", - sep_tensor->shape().DebugString())); - const auto sep_vec = sep_tensor->flat(); - StringPiece sep(sep_vec(0)); - std::vector tokens; - // Guess that we'll be unpacking a handful of tokens per example. - static constexpr int kReserveSize = 4; - tokens.reserve(batch_size * kReserveSize); - - int64 output_size = 0; - int64 max_num_entries = 0; - std::vector num_indices(batch_size); - for (int64 i = 0; i < batch_size; ++i) { - std::vector parts = SplitV2(input_vec(i), sep, maxsplit_); - int64 n_entries = parts.size(); - num_indices[i] = n_entries; - output_size += n_entries; - max_num_entries = std::max(max_num_entries, n_entries); - tokens.insert(tokens.end(), parts.begin(), parts.end()); - } - - Tensor* sp_indices_t; - OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({output_size, 2}), - &sp_indices_t)); - Tensor* sp_tokens_t; - OP_REQUIRES_OK( - ctx, ctx->allocate_output(1, TensorShape({output_size}), &sp_tokens_t)); - Tensor* sp_shape_t; - OP_REQUIRES_OK(ctx, ctx->allocate_output(2, TensorShape({2}), &sp_shape_t)); - - auto sp_indices = sp_indices_t->matrix(); - auto sp_tokens = sp_tokens_t->vec(); - auto sp_shape = sp_shape_t->vec(); - sp_shape(0) = batch_size; - sp_shape(1) = max_num_entries; - size_t c = 0; - for (size_t i = 0; i < batch_size; ++i) { - for (size_t j = 0; j < num_indices[i]; ++j) { - sp_indices(c, 0) = i; - sp_indices(c, 1) = j; - sp_tokens(c) = tokens[c]; - ++c; - } - } - } - - private: - int maxsplit_; -}; - REGISTER_KERNEL_BUILDER(Name("StringSplit").Device(DEVICE_CPU), StringSplitOp); -REGISTER_KERNEL_BUILDER(Name("StringSplitV2").Device(DEVICE_CPU), - StringSplitV2Op); } // namespace tensorflow diff --git a/tensorflow/core/ops/candidate_sampling_ops.cc b/tensorflow/core/ops/candidate_sampling_ops.cc index 6e589c8d1c..6e4d100b04 100644 --- a/tensorflow/core/ops/candidate_sampling_ops.cc +++ b/tensorflow/core/ops/candidate_sampling_ops.cc @@ -145,15 +145,12 @@ REGISTER_OP("ComputeAccidentalHits") int64 num_true; TF_RETURN_IF_ERROR(c->GetAttr("num_true", &num_true)); - // Validate true_classes, must be a matrix. + // Validate true_classes. ShapeHandle true_classes; TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &true_classes)); DimensionHandle unused; TF_RETURN_IF_ERROR( c->WithValue(c->Dim(true_classes, 1), num_true, &unused)); - // Validate sampled_candidates, must be a vector. - ShapeHandle sampled_candidates; - TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &sampled_candidates)); // All three outputs are the same shape. ShapeHandle v = c->Vector(InferenceContext::kUnknownDim); diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc index 9dca5f53ce..15e0ca8af9 100644 --- a/tensorflow/core/ops/dataset_ops.cc +++ b/tensorflow/core/ops/dataset_ops.cc @@ -218,17 +218,7 @@ REGISTER_OP("MapAndBatchDataset") .Attr("Targuments: list(type) >= 0") .Attr("output_types: list(type) >= 1") .Attr("output_shapes: list(shape) >= 1") - .SetShapeFn([](shape_inference::InferenceContext* c) { - // Use index from the end to retrieve the Input shapes, - // so that to avoid guessing the length of "other_arguments". - // batch_size, num_parallel_batches, and drop_remainder are 0-D scalars. - shape_inference::ShapeHandle unused; - TF_RETURN_IF_ERROR(c->WithRank(c->input(c->num_inputs() - 3), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(c->num_inputs() - 2), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(c->num_inputs() - 1), 0, &unused)); - - return shape_inference::ScalarShape(c); - }); + .SetShapeFn(shape_inference::ScalarShape); REGISTER_OP("MapAndBatchDatasetV2") .Input("input_dataset: variant") @@ -241,17 +231,7 @@ REGISTER_OP("MapAndBatchDatasetV2") .Attr("Targuments: list(type) >= 0") .Attr("output_types: list(type) >= 1") .Attr("output_shapes: list(shape) >= 1") - .SetShapeFn([](shape_inference::InferenceContext* c) { - // Use index from the end to retrieve the Input shapes, - // so that to avoid guessing the length of "other_arguments". - // batch_size, num_parallel_calls, and drop_remainder are 0-D scalars. - shape_inference::ShapeHandle unused; - TF_RETURN_IF_ERROR(c->WithRank(c->input(c->num_inputs() - 3), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(c->num_inputs() - 2), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(c->num_inputs() - 1), 0, &unused)); - - return shape_inference::ScalarShape(c); - }); + .SetShapeFn(shape_inference::ScalarShape); REGISTER_OP("PrefetchDataset") .Input("input_dataset: variant") diff --git a/tensorflow/core/ops/image_ops.cc b/tensorflow/core/ops/image_ops.cc index 87f4991134..d949e70c66 100644 --- a/tensorflow/core/ops/image_ops.cc +++ b/tensorflow/core/ops/image_ops.cc @@ -454,9 +454,7 @@ REGISTER_OP("DrawBoundingBoxes") DimensionHandle unused; TF_RETURN_IF_ERROR(c->WithValue(c->Dim(boxes, 2), 4, &unused)); - // The rank of the input image (rank = 4) has already been restricted - // above, and the output is of the same shape as the input. - return shape_inference::UnchangedShape(c); + return shape_inference::UnchangedShapeWithRankAtLeast(c, 3); }); // -------------------------------------------------------------------------- diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index b3487122e2..1740fa152c 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -1084,7 +1084,7 @@ REGISTER_OP("UnsortedSegmentProd") .Input("segment_ids: Tindices") .Input("num_segments: Tnumsegments") .Output("output: T") - .Attr("T: numbertype") + .Attr("T: realnumbertype") .Attr("Tindices: {int32,int64}") .Attr("Tnumsegments: {int32,int64} = DT_INT32") .SetShapeFn(UnsortedSegmentReductionShapeFn); diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index 41efa49ce3..fc60e807b9 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -1453,7 +1453,6 @@ REGISTER_OP("QuantizedReluX") ShapeHandle unused; TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); c->set_output(1, c->Scalar()); c->set_output(2, c->Scalar()); return Status::OK(); diff --git a/tensorflow/core/ops/string_ops.cc b/tensorflow/core/ops/string_ops.cc index 4423062362..1d5c743a56 100644 --- a/tensorflow/core/ops/string_ops.cc +++ b/tensorflow/core/ops/string_ops.cc @@ -78,7 +78,7 @@ REGISTER_OP("ReduceJoin") REGISTER_OP("AsString") .Input("input: T") .Output("output: string") - .Attr("T: {int8, int16, int32, int64, complex64, float, double, bool}") + .Attr("T: {int32, int64, complex64, float, double, bool, int8}") .Attr("precision: int = -1") .Attr("scientific: bool = false") .Attr("shortest: bool = false") @@ -134,24 +134,6 @@ REGISTER_OP("StringSplit") return Status::OK(); }); -REGISTER_OP("StringSplitV2") - .Input("input: string") - .Input("sep: string") - .Output("indices: int64") - .Output("values: string") - .Output("shape: int64") - .Attr("maxsplit: int = -1") - .SetShapeFn([](InferenceContext* c) { - ShapeHandle unused; - TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); - - c->set_output(0, c->Matrix(InferenceContext::kUnknownDim, 2)); - c->set_output(1, c->Vector(InferenceContext::kUnknownDim)); - c->set_output(2, c->Vector(2)); - return Status::OK(); - }); - REGISTER_OP("StringStrip") .Input("input: string") .Output("output: string") diff --git a/tensorflow/core/platform/cpu_info.cc b/tensorflow/core/platform/cpu_info.cc index e9da3d8e32..99de364042 100644 --- a/tensorflow/core/platform/cpu_info.cc +++ b/tensorflow/core/platform/cpu_info.cc @@ -344,28 +344,5 @@ int CPUModelNum() { #endif } -int CPUIDNumSMT() { -#ifdef PLATFORM_IS_X86 - // https://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration - // https://software.intel.com/en-us/articles/intel-sdm (Vol 3A) - // Section: Detecting Hardware Multi-threads Support and Topology - // Uses CPUID Leaf 11 to enumerate system topology on Intel x86 architectures - // Other cases not supported - uint32 eax, ebx, ecx, edx; - // Check if system supports Leaf 11 - GETCPUID(eax, ebx, ecx, edx, 0, 0); - if (eax >= 11) { - // 1) Leaf 11 available? CPUID.(EAX=11, ECX=0):EBX != 0 - // 2) SMT_Mask_Width = CPUID.(EAX=11, ECX=0):EAX[4:0] if CPUID.(EAX=11, - // ECX=0):ECX[15:8] is 1 - GETCPUID(eax, ebx, ecx, edx, 11, 0); - if (ebx != 0 && ((ecx & 0xff00) >> 8) == 1) { - return 1 << (eax & 0x1f); // 2 ^ SMT_Mask_Width - } - } -#endif // PLATFORM_IS_X86 - return 0; -} - } // namespace port } // namespace tensorflow diff --git a/tensorflow/core/platform/cpu_info.h b/tensorflow/core/platform/cpu_info.h index 175c9ae8b1..b5be7e8b54 100644 --- a/tensorflow/core/platform/cpu_info.h +++ b/tensorflow/core/platform/cpu_info.h @@ -35,10 +35,6 @@ namespace port { // software can change it dynamically. int NumSchedulableCPUs(); -// Returns an estimate of the number of hyperthreads per physical core -// on the CPU -int NumHyperthreadsPerCore(); - // Mostly ISA related features that we care about enum CPUFeature { // Do not change numeric assignments. @@ -111,9 +107,6 @@ int CPUModelNum(); // Returns nominal core processor cycles per second of each processor. double NominalCPUFrequency(); -// Returns num of hyperthreads per physical core -int CPUIDNumSMT(); - } // namespace port } // namespace tensorflow diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl index a319ccbdbe..ae81f9b5b3 100644 --- a/tensorflow/core/platform/default/build_config.bzl +++ b/tensorflow/core/platform/default/build_config.bzl @@ -71,8 +71,6 @@ def pyx_library( name = filename + "_cython_translation", srcs = [filename], outs = [filename.split(".")[0] + ".cpp"], - # Optionally use PYTHON_BIN_PATH on Linux platforms so that python 3 - # works. Windows has issues with cython_binary so skip PYTHON_BIN_PATH. cmd = "PYTHONHASHSEED=0 $(location @cython//:cython_binary) --cplus $(SRCS) --output-file $(OUTS)", tools = ["@cython//:cython_binary"] + pxd_srcs, ) diff --git a/tensorflow/core/platform/hadoop/hadoop_file_system.cc b/tensorflow/core/platform/hadoop/hadoop_file_system.cc index ff4b4436bb..72c12318ca 100644 --- a/tensorflow/core/platform/hadoop/hadoop_file_system.cc +++ b/tensorflow/core/platform/hadoop/hadoop_file_system.cc @@ -115,17 +115,18 @@ class LibHDFS { const char* kLibHdfsDso = "libhdfs.so"; #endif char* hdfs_home = getenv("HADOOP_HDFS_HOME"); - if (hdfs_home != nullptr) { - string path = io::JoinPath(hdfs_home, "lib", "native", kLibHdfsDso); - status_ = TryLoadAndBind(path.c_str(), &handle_); - if (status_.ok()) { - return; - } + if (hdfs_home == nullptr) { + status_ = errors::FailedPrecondition( + "Environment variable HADOOP_HDFS_HOME not set"); + return; + } + string path = io::JoinPath(hdfs_home, "lib", "native", kLibHdfsDso); + status_ = TryLoadAndBind(path.c_str(), &handle_); + if (!status_.ok()) { + // try load libhdfs.so using dynamic loader's search path in case + // libhdfs.so is installed in non-standard location + status_ = TryLoadAndBind(kLibHdfsDso, &handle_); } - - // Try to load the library dynamically in case it has been installed - // to a in non-standard location. - status_ = TryLoadAndBind(kLibHdfsDso, &handle_); } Status status_; diff --git a/tensorflow/core/platform/posix/port.cc b/tensorflow/core/platform/posix/port.cc index 708f32ba80..8e316472fe 100644 --- a/tensorflow/core/platform/posix/port.cc +++ b/tensorflow/core/platform/posix/port.cc @@ -74,11 +74,6 @@ int NumSchedulableCPUs() { return kDefaultCores; } -int NumHyperthreadsPerCore() { - static const int ht_per_core = tensorflow::port::CPUIDNumSMT(); - return (ht_per_core > 0) ? ht_per_core : 1; -} - void* AlignedMalloc(size_t size, int minimum_alignment) { #if defined(__ANDROID__) return memalign(minimum_alignment, size); diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index cb1fd09dbb..522a9d84fd 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -19,12 +19,12 @@ limitations under the License. // TensorFlow uses semantic versioning, see http://semver.org/. #define TF_MAJOR_VERSION 1 -#define TF_MINOR_VERSION 9 +#define TF_MINOR_VERSION 8 #define TF_PATCH_VERSION 0 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1", // "-beta", "-rc", "-rc.1") -#define TF_VERSION_SUFFIX "-rc0" +#define TF_VERSION_SUFFIX "" #define TF_STR_HELPER(x) #x #define TF_STR(x) TF_STR_HELPER(x) diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index 90b6533690..dffc965b14 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -42,7 +42,6 @@ limitations under the License. #ifndef INTEL_MKL_ML #include "mkldnn.hpp" -#include "tensorflow/core/lib/core/stringpiece.h" using mkldnn::engine; using mkldnn::memory; @@ -713,48 +712,15 @@ inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor, return output_tensor; } #else -using mkldnn::stream; -template class MklDnnData; - template inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor, const MklDnnShape& mkl_shape) { Tensor output_tensor; - try { - if (!mkl_shape.IsMklTensor()) - return mkl_tensor; // return input since it is already TF tensor - - TensorShape output_shape = mkl_shape.GetTfShape();; - - // Allocate output tensor. - context->allocate_temp(DataTypeToEnum::v(), - output_shape, &output_tensor); - - auto cpu_engine = engine(engine::cpu, 0); - MklDnnData input(&cpu_engine); - - // Get Mkl layout of input tensor. - auto input_mkl_md = mkl_shape.GetMklLayout(); - auto output_tf_md = mkl_shape.GetTfLayout(); - auto output_tf_pd = memory::primitive_desc(output_tf_md, cpu_engine); - input.SetUsrMem(input_mkl_md, &mkl_tensor); - - // reorder - if (input.IsReorderNeeded(output_tf_pd)) { - std::vector net; - CHECK_EQ(input.CheckReorderToOpMem(output_tf_pd, &output_tensor, &net), - true); - stream(stream::kind::eager).submit(net).wait(); - } else { - // If not, just forward input tensor to output tensor. - CHECK(output_tensor.CopyFrom(mkl_tensor, output_shape)); - } - } catch (mkldnn::error& e) { - string error_msg = "Status: " + std::to_string(e.status) + - ", message: " + string(e.message) + ", in file " + - string(__FILE__) + ":" + std::to_string(__LINE__); - LOG(FATAL) << "Operation received an exception: " << error_msg; - } + TensorShape output_shape; + + TF_CHECK_OK( + Status(error::Code::UNIMPLEMENTED, "Unimplemented conversion function")); + return output_tensor; } #endif @@ -1877,7 +1843,7 @@ class FactoryKeyCreator { template void AddAsKey(const T data) { auto buffer = reinterpret_cast(&data); - Append(StringPiece(buffer, sizeof(T))); + Append(absl::string_view(buffer, sizeof(T))); } std::string GetKey() { @@ -1888,8 +1854,8 @@ class FactoryKeyCreator { string key_; const char delimiter = 'x'; const int kMaxKeyLength = 256; - void Append(StringPiece s) { - key_.append(s.ToString()); + void Append(absl::string_view s) { + key_.append(string(s)); key_.append(1, delimiter); } }; diff --git a/tensorflow/docs_src/community/groups.md b/tensorflow/docs_src/community/groups.md index 0b07d413da..d92f5775fa 100644 --- a/tensorflow/docs_src/community/groups.md +++ b/tensorflow/docs_src/community/groups.md @@ -1,38 +1,17 @@ # User Groups -TensorFlow has communities around the world. [Submit your community!](https://docs.google.com/forms/d/e/1FAIpQLSc_RQIUYtVgLLihzATaO_WUXkEyBDE_OoRoOXYDPmBEvHuEBA/viewform) +TensorFlow has communities around the world. ## Asia -* [TensorFlow China community](https://www.tensorflowers.cn) -* [TensorFlow Korea (TF-KR) User Group](https://www.facebook.com/groups/TensorFlowKR/) -* [TensorFlow User Group Tokyo](https://tfug-tokyo.connpass.com/) -* [Soleil Data Dojo](https://soleildatadojo.connpass.com/) +* [TensorFlow Korea (TF-KR) User Group](https://www.facebook.com/groups/TensorFlowKR/) _(Korean language)_ +* [TensorFlow User Group Tokyo](https://tfug-tokyo.connpass.com/) _(Japanese Language)_ +* [Soleil Data Dojo](https://soleildatadojo.connpass.com/) _(Japanese language)_ * [TensorFlow User Group Utsunomiya](https://tfug-utsunomiya.connpass.com/) -* [TensorFlow Philippines Community](https://www.facebook.com/groups/TensorFlowPH/) -* [TensorFlow and Deep Learning Singapore](https://www.meetup.com/TensorFlow-and-Deep-Learning-Singapore/) -* [TensorFlow India](https://www.facebook.com/tensorflowindia) ## Europe * [TensorFlow Barcelona](https://www.meetup.com/Barcelona-Machine-Learning-Meetup/) * [TensorFlow Madrid](https://www.meetup.com/TensorFlow-Madrid/) -* [Tensorflow Belgium](https://www.meetup.com/TensorFlow-Belgium) -* [TensorFlow x Rome Meetup](https://www.meetup.com/it-IT/TensorFlow-x-Rome-Meetup) -* [TensorFlow London](https://www.meetup.com/TensorFlow-London/) -* [TensorFlow Edinburgh](https://www.meetup.com/tensorflow-edinburgh/) - -## America - -* [TensorFlow Buenos Aires](https://www.meetup.com/TensorFlow-Buenos-Aires/) - - -## Oceania -* [Melbourne TensorFlow Meetup](https://www.meetup.com/Melbourne-TensorFlow-Meetup) - - -## Africa - -* [TensorFlow Tunis Meetup](https://www.meetup.com/fr-FR/TensorFlow-Tunis-Meetup/) diff --git a/tensorflow/docs_src/get_started/eager.md b/tensorflow/docs_src/get_started/eager.md index bbb25e20c6..f08ac74425 100644 --- a/tensorflow/docs_src/get_started/eager.md +++ b/tensorflow/docs_src/get_started/eager.md @@ -1,3 +1,3 @@ # Get Started with Eager Execution -[Colab notebook](https://colab.research.google.com/github/tensorflow/models/blob/r1.9.0/samples/core/get_started/eager.ipynb) +[Colab notebook](https://colab.research.google.com/github/tensorflow/models/blob/r1.8.0/samples/core/get_started/eager.ipynb) diff --git a/tensorflow/docs_src/get_started/index.md b/tensorflow/docs_src/get_started/index.md index 232d2f1547..55579d52fb 100644 --- a/tensorflow/docs_src/get_started/index.md +++ b/tensorflow/docs_src/get_started/index.md @@ -10,9 +10,9 @@ course prior to diving into TensorFlow documentation: TensorFlow is a tool for machine learning. While it contains a wide range of functionality, TensorFlow is mainly designed for deep neural network models. -The easiest way to get started with TensorFlow is by using Eager Execution. +The easiest way to get started with TensorFlow is using Eager Execution. - * @{$get_started/eager}, is for anyone new to machine learning or TensorFlow. + * @{$get_started/eager}, is for anyone new to machine learning or TensorFlow. TensorFlow provides many APIs. The remainder of this section focuses on the Estimator API which provide scalable, high-performance models. See the diff --git a/tensorflow/docs_src/install/install_c.md b/tensorflow/docs_src/install/install_c.md index 2901848745..1abd840ab3 100644 --- a/tensorflow/docs_src/install/install_c.md +++ b/tensorflow/docs_src/install/install_c.md @@ -38,7 +38,7 @@ enable TensorFlow for C: OS="linux" # Change to "darwin" for macOS TARGET_DIRECTORY="/usr/local" curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.9.0-rc0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.8.0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md index 55bc0f64e7..52a2a3f8a6 100644 --- a/tensorflow/docs_src/install/install_go.md +++ b/tensorflow/docs_src/install/install_go.md @@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go: TF_TYPE="cpu" # Change to "gpu" for GPU support TARGET_DIRECTORY='/usr/local' curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.9.0-rc0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.8.0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md index 637231da12..1256fb99c4 100644 --- a/tensorflow/docs_src/install/install_java.md +++ b/tensorflow/docs_src/install/install_java.md @@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs: org.tensorflow tensorflow - 1.9.0-rc0 + 1.8.0 ``` @@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow: org.tensorflow tensorflow - 1.9.0-rc0 + 1.8.0 @@ -124,12 +124,12 @@ instead: org.tensorflow libtensorflow - 1.9.0-rc0 + 1.8.0 org.tensorflow libtensorflow_jni_gpu - 1.9.0-rc0 + 1.8.0 ``` @@ -148,7 +148,7 @@ refer to the simpler instructions above instead. Take the following steps to install TensorFlow for Java on Linux or macOS: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.9.0-rc0.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0.jar), which is the TensorFlow Java Archive (JAR). 2. Decide whether you will run TensorFlow for Java on CPU(s) only or with @@ -167,7 +167,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: OS=$(uname -s | tr '[:upper:]' '[:lower:]') mkdir -p ./jni curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.9.0-rc0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.8.0.tar.gz" | tar -xz -C ./jni ### Install on Windows @@ -175,13 +175,13 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: Take the following steps to install TensorFlow for Java on Windows: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.9.0-rc0.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0.jar), which is the TensorFlow Java Archive (JAR). 2. Download the following Java Native Interface (JNI) file appropriate for - [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.9.0-rc0.zip). + [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.8.0.zip). 3. Extract this .zip file. -__Note__: The native library (`tensorflow_jni.dll`) requires `msvcp140.dll` at runtime, which is included in the [Visual C++ 2015 Redistributable](https://www.microsoft.com/en-us/download/details.aspx?id=48145) package. + ### Validate the installation @@ -227,7 +227,7 @@ must be part of your `classpath`. For example, you can include the downloaded `.jar` in your `classpath` by using the `-cp` compilation flag as follows: -
javac -cp libtensorflow-1.9.0-rc0.jar HelloTF.java
+
javac -cp libtensorflow-1.8.0.jar HelloTF.java
### Running @@ -241,11 +241,11 @@ two files are available to the JVM: For example, the following command line executes the `HelloTF` program on Linux and macOS X: -
java -cp libtensorflow-1.9.0-rc0.jar:. -Djava.library.path=./jni HelloTF
+
java -cp libtensorflow-1.8.0.jar:. -Djava.library.path=./jni HelloTF
And the following command line executes the `HelloTF` program on Windows: -
java -cp libtensorflow-1.9.0-rc0.jar;. -Djava.library.path=jni HelloTF
+
java -cp libtensorflow-1.8.0.jar;. -Djava.library.path=jni HelloTF
If the program prints Hello from version, you've successfully installed TensorFlow for Java and are ready to use the API. If the program diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index c8d706cf3c..0ed8160027 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -339,7 +339,9 @@ Docker will download the TensorFlow binary image the first time you launch it. #### GPU support -To launch a Docker container with NVidia GPU support, enter a command of the following format (this [does not require any local CUDA installation](https://github.com/nvidia/nvidia-docker/wiki/CUDA#requirements)): +Prior to installing TensorFlow with GPU support, ensure that your system meets all +[NVIDIA software requirements](#NVIDIARequirements). To launch a Docker container +with NVidia GPU support, enter a command of the following format:
 $ nvidia-docker run -it -p hostPort:containerPort TensorFlowGPUImage
@@ -436,7 +438,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
 
      
      (tensorflow)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc0-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp34-cp34m-linux_x86_64.whl
## Validate your installation @@ -515,7 +517,7 @@ on your system: from source. To use the TensorFlow binaries, version 3.5 or higher is required. See the [NVIDIA documentation](https://developer.nvidia.com/cuda-gpus) for a list of supported GPU cards. -* [GPU drivers](http://nvidia.com/drivers) that support your version of the CUDA +* [GPU drivers](http://nvidia.com/driver) that support your version of the CUDA Toolkit. * The `libcupti-dev` library is the NVIDIA CUDA Profile Tools Interface. This library provides advanced profiling support. To install this library, @@ -682,14 +684,14 @@ This section documents the relevant values for Linux installations. CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp27-none-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.9.0rc0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp27-none-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -701,14 +703,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp34-cp34m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.9.0rc0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp34-cp34m-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -720,14 +722,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp35-cp35m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.9.0rc0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp35-cp35m-linux_x86_64.whl
 
@@ -739,14 +741,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp36-cp36m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.9.0rc0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp36-cp36m-linux_x86_64.whl
 
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index 9d01271c5a..29a867a9e3 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -119,7 +119,7 @@ Take the following steps to install TensorFlow with Virtualenv: TensorFlow in the active Virtualenv is as follows:
 $ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py3-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py3-none-any.whl If you encounter installation problems, see [Common Installation Problems](#common-installation-problems). @@ -242,7 +242,7 @@ take the following steps: issue the following command:
 $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py3-none-any.whl 
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py3-none-any.whl If the preceding command fails, see [installation problems](#common-installation-problems). @@ -350,7 +350,7 @@ Take the following steps to install TensorFlow in an Anaconda environment: TensorFlow for Python 2.7:
 (targetDirectory)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py2-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py2-none-any.whl @@ -522,7 +522,7 @@ The value you specify depends on your Python version.
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py2-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py2-none-any.whl
 
@@ -530,5 +530,5 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py2-none-a
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py3-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py3-none-any.whl
 
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index dc6c1e36fc..5ba522b436 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -81,7 +81,7 @@ or [macOS](#PrepareMac) - + ## Prepare environment for Linux Before building TensorFlow on Linux, install the following build @@ -328,10 +328,10 @@ Invoke `pip install` to install that pip package. The filename of the `.whl` file depends on your platform. For example, the following command will install the pip package -for TensorFlow 1.9.0rc0 on Linux: +for TensorFlow 1.8.0 on Linux:
-$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.9.0rc0-py2-none-any.whl
+$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.8.0-py2-none-any.whl
 
## Validate your installation @@ -373,9 +373,9 @@ The build and installation problems you encounter typically depend on the operating system. See the "Common installation problems" section of one of the following guides: - * @{$install_linux#common_installation_problems$Installing TensorFlow on Linux} - * @{$install_mac#common_installation_problems$Installing TensorFlow on Mac OS} - * @{$install_windows#common_installation_problems$Installing TensorFlow on Windows} + * @{$install_linux#CommonInstallationProblems$Installing TensorFlow on Linux} + * @{$install_mac#CommonInstallationProblems$Installing TensorFlow on Mac OS} + * @{$install_windows#CommonInstallationProblems$Installing TensorFlow on Windows} Beyond the errors documented in those two guides, the following table notes additional errors specific to building TensorFlow. Note that we @@ -433,8 +433,6 @@ Stack Overflow and specify the `tensorflow` tag. **Linux** - - @@ -458,7 +456,6 @@ Stack Overflow and specify the `tensorflow` tag. **Mac**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.9.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.11.0N/AN/A
tensorflow_gpu-1.9.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.11.079
tensorflow-1.8.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.10.0N/AN/A
tensorflow_gpu-1.8.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.079
tensorflow-1.7.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.10.0N/AN/A
- @@ -475,8 +472,6 @@ Stack Overflow and specify the `tensorflow` tag. **Windows**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.9.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.11.0N/AN/A
tensorflow-1.8.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.10.1N/AN/A
tensorflow-1.7.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.10.1N/AN/A
tensorflow-1.6.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.8.1N/AN/A
- - diff --git a/tensorflow/docs_src/mobile/linking_libs.md b/tensorflow/docs_src/mobile/linking_libs.md index efef5dd0da..cf0db59021 100644 --- a/tensorflow/docs_src/mobile/linking_libs.md +++ b/tensorflow/docs_src/mobile/linking_libs.md @@ -27,7 +27,7 @@ called `libandroid_tensorflow_inference_java.jar`. There are three ways to include this functionality in your program: 1. Include the jcenter AAR which contains it, as in this - [example app](https://github.com/googlecodelabs/tensorflow-for-poets-2/blob/master/android/tfmobile/build.gradle#L59-L65) + [example app](https://github.com/googlecodelabs/tensorflow-for-poets-2/blob/master/android/build.gradle#L59-L65) 2. Download the nightly precompiled version from [ci.tensorflow.org](http://ci.tensorflow.org/view/Nightly/job/nightly-android/lastSuccessfulBuild/artifact/out/). diff --git a/tensorflow/docs_src/mobile/prepare_models.md b/tensorflow/docs_src/mobile/prepare_models.md index 2b84dbb973..8b22c04d87 100644 --- a/tensorflow/docs_src/mobile/prepare_models.md +++ b/tensorflow/docs_src/mobile/prepare_models.md @@ -105,8 +105,8 @@ inline constants so everything’s in one file. To handle the conversion, you need the `freeze_graph.py` script, that’s held in [`tensorflow/python/tools/freeze_graph.py`](https://www.tensorflow.org/code/tensorflow/python/tools/freeze_graph.py). You’ll run it like this: - bazel build tensorflow/python/tools:freeze_graph - bazel-bin/tensorflow/python/tools/freeze_graph \ + bazel build tensorflow/tools:freeze_graph + bazel-bin/tensorflow/tools/freeze_graph \ --input_graph=/tmp/model/my_graph.pb \ --input_checkpoint=/tmp/model/model.ckpt-1000 \ --output_graph=/tmp/frozen_graph.pb \ diff --git a/tensorflow/docs_src/performance/quantization.md b/tensorflow/docs_src/performance/quantization.md index c97f74139c..2fea02d861 100644 --- a/tensorflow/docs_src/performance/quantization.md +++ b/tensorflow/docs_src/performance/quantization.md @@ -227,8 +227,8 @@ of 30.0f, and an 8-bit array, the quantized values represent the following:
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.9.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.9.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.8.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.8.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.7.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
- +
QuantizedFloat
0-10.0
12810.0
25530.0
12810.0
Table 2: Example quantized value range diff --git a/tensorflow/docs_src/programmers_guide/estimators.md b/tensorflow/docs_src/programmers_guide/estimators.md index b13b47184d..c4aae1d9d6 100644 --- a/tensorflow/docs_src/programmers_guide/estimators.md +++ b/tensorflow/docs_src/programmers_guide/estimators.md @@ -21,17 +21,18 @@ Note: TensorFlow also includes a deprecated `Estimator` class at Estimators provide the following benefits: -* You can run Estimator-based models on a local host or on a +* You can run Estimators-based models on a local host or on a distributed multi-server environment without changing your model. - Furthermore, you can run Estimator-based models on CPUs, GPUs, + Furthermore, you can run Estimators-based models on CPUs, GPUs, or TPUs without recoding your model. * Estimators simplify sharing implementations between model developers. -* You can develop a state of the art model with high-level intuitive code. +* You can develop a state of the art model with high-level intuitive code, In short, it is generally much easier to create models with Estimators than with the low-level TensorFlow APIs. -* Estimators are themselves built on @{tf.layers}, which +* Estimators are themselves built on tf.layers, which simplifies customization. -* Estimators build the graph for you. +* Estimators build the graph for you. In other words, you don't have to + build the graph. * Estimators provide a safe distributed training loop that controls how and when to: * build the graph @@ -56,7 +57,7 @@ the "plumbing" for you. That is, pre-made Estimators create and manage pre-made Estimators let you experiment with different model architectures by making only minimal code changes. @{tf.estimator.DNNClassifier$`DNNClassifier`}, for example, is a pre-made Estimator class that trains classification models -based on dense, feed-forward neural networks. +through dense, feed-forward neural networks. ### Structure of a pre-made Estimators program @@ -78,7 +79,7 @@ of the following four steps: an input function: def input_fn(dataset): - ... # manipulate dataset, extracting the feature dict and the label + ... # manipulate dataset, extracting feature names and the label return feature_dict, label (See @{$programmers_guide/datasets} for full details.) @@ -95,13 +96,13 @@ of the following four steps: population = tf.feature_column.numeric_column('population') crime_rate = tf.feature_column.numeric_column('crime_rate') median_education = tf.feature_column.numeric_column('median_education', - normalizer_fn=lambda x: x - global_education_mean) + normalizer_fn='lambda x: x - global_education_mean') 3. **Instantiate the relevant pre-made Estimator.** For example, here's a sample instantiation of a pre-made Estimator named `LinearClassifier`: # Instantiate an estimator, passing the feature columns. - estimator = tf.estimator.LinearClassifier( + estimator = tf.estimator.Estimator.LinearClassifier( feature_columns=[population, crime_rate, median_education], ) diff --git a/tensorflow/docs_src/programmers_guide/feature_columns.md b/tensorflow/docs_src/programmers_guide/feature_columns.md index 90f5c53a17..845194fe0e 100644 --- a/tensorflow/docs_src/programmers_guide/feature_columns.md +++ b/tensorflow/docs_src/programmers_guide/feature_columns.md @@ -528,10 +528,10 @@ suggested by the following snippet: categorical_column = ... # Create any categorical column # Represent the categorical column as an embedding column. -# This means creating an embedding vector lookup table with one element for each category. +# This means creating a one-hot vector with one element for each category. embedding_column = tf.feature_column.embedding_column( categorical_column=categorical_column, - dimension=embedding_dimensions) + dimension=dimension_of_embedding_vector) ``` @{$programmers_guide/embedding$Embeddings} is a significant topic within machine diff --git a/tensorflow/examples/learn/iris.py b/tensorflow/examples/learn/iris.py index 86f5204ec3..03e60972aa 100644 --- a/tensorflow/examples/learn/iris.py +++ b/tensorflow/examples/learn/iris.py @@ -21,8 +21,7 @@ from __future__ import division from __future__ import print_function import os - -from six.moves.urllib.request import urlretrieve +import urllib import tensorflow as tf @@ -39,7 +38,9 @@ FEATURE_KEYS = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width'] def maybe_download_iris_data(file_name, download_url): """Downloads the file and returns the number of data.""" if not os.path.exists(file_name): - urlretrieve(download_url, file_name) + raw = urllib.urlopen(download_url).read() + with open(file_name, 'w') as f: + f.write(raw) # The first line is a comma-separated string. The first one is the number of # total data in the file. diff --git a/tensorflow/java/src/gen/cc/op_generator.cc b/tensorflow/java/src/gen/cc/op_generator.cc index 9b171f66ec..debd95fc62 100644 --- a/tensorflow/java/src/gen/cc/op_generator.cc +++ b/tensorflow/java/src/gen/cc/op_generator.cc @@ -376,6 +376,9 @@ void GenerateOp(const OpSpec& op, const EndpointSpec& endpoint, } } // op annotations + op_class.add_annotation( + Annotation::Create("Generated", "javax.annotation") + .attributes("value = \"TensorFlow Java Op Generator\"")); if (endpoint.deprecated()) { op_class.add_annotation(Annotation::Create("Deprecated")); string explanation; @@ -412,12 +415,8 @@ void GenerateOp(const OpSpec& op, const EndpointSpec& endpoint, SourceFileWriter writer(op_file.get()); std::list dependencies; CollectOpDependencies(op, mode, &dependencies); - writer.Write(kLicense) - .EndLine() - .Write("// This class has been generated, DO NOT EDIT!") - .EndLine() - .EndLine() - .BeginType(op_class, PUBLIC | FINAL, &dependencies, &op_javadoc); + writer.Write(kLicense).EndLine().BeginType(op_class, PUBLIC | FINAL, + &dependencies, &op_javadoc); if (!op.optional_attributes().empty()) { RenderOptionsClass(op, op_class, &writer); } diff --git a/tensorflow/java/src/gen/cc/op_specs.cc b/tensorflow/java/src/gen/cc/op_specs.cc index 941ab2699c..181fd4c5e3 100644 --- a/tensorflow/java/src/gen/cc/op_specs.cc +++ b/tensorflow/java/src/gen/cc/op_specs.cc @@ -96,7 +96,6 @@ Type TypeResolver::TypeOf(const OpDef_ArgDef& arg_def, bool* iterable_out) { *iterable_out = true; visited_attrs_.insert(std::make_pair(arg_def.number_attr(), Type::Int())); } - Type type = Type::Wildcard(); if (arg_def.type() != DataType::DT_INVALID) { // resolve type from DataType diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index bd97b181ff..b2e6c60021 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -196,11 +196,11 @@ def implicit_val_and_grad(f): # TODO(cais): Remove calls to tf.constant() once the gradients functions # accept lists and np.ndarrays. - def grad_fn(*args, **kwds): + def grad_fn(*args): """Computes the gradient of the wrapped function.""" this_tape = tape.push_new_tape() try: - end_node = f(*args, **kwds) + end_node = f(*args) if end_node is None: raise ValueError("Cannot differentiate a function that returns None; " "did you forget to return a value from {}?".format( diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD index 20522098b0..9cd17e0407 100644 --- a/tensorflow/python/estimator/BUILD +++ b/tensorflow/python/estimator/BUILD @@ -978,10 +978,7 @@ py_test( size = "large", srcs = ["keras_test.py"], srcs_version = "PY2AND3", - tags = [ - "no_windows", - "notsan", - ], + tags = ["notsan"], deps = [ ":keras", "//tensorflow/core:protos_all_py", diff --git a/tensorflow/python/estimator/exporter.py b/tensorflow/python/estimator/exporter.py index b18212cfcd..7cdf840c97 100644 --- a/tensorflow/python/estimator/exporter.py +++ b/tensorflow/python/estimator/exporter.py @@ -156,7 +156,7 @@ def _loss_smaller(best_eval_result, current_eval_result): return best_eval_result[default_key] > current_eval_result[default_key] -def _verify_compare_fn_args(compare_fn): +def _verify_compre_fn_args(compare_fn): """Verifies compare_fn arguments.""" args = set(util.fn_args(compare_fn)) if 'best_eval_result' not in args: @@ -265,7 +265,7 @@ class BestExporter(Exporter): self._compare_fn = compare_fn if self._compare_fn is None: raise ValueError('`compare_fn` must not be None.') - _verify_compare_fn_args(self._compare_fn) + _verify_compre_fn_args(self._compare_fn) self._saved_model_exporter = _SavedModelExporter( name, serving_input_receiver_fn, assets_extra, as_text) diff --git a/tensorflow/python/estimator/inputs/numpy_io.py b/tensorflow/python/estimator/inputs/numpy_io.py index a6cefdece2..035c7c148c 100644 --- a/tensorflow/python/estimator/inputs/numpy_io.py +++ b/tensorflow/python/estimator/inputs/numpy_io.py @@ -136,13 +136,11 @@ def numpy_input_fn(x, values in `x` have same shape). ValueError: if duplicate keys are in both `x` and `y` when `y` is a dict. ValueError: if x or y is an empty dict. - TypeError: `x` is not a dict or array. - ValueError: if 'shuffle' is not provided or a bool. + TypeError: `x` is not a dict or array, or if `shuffle` is not bool. """ if not isinstance(shuffle, bool): - raise ValueError('shuffle must be provided and explicitly set as boolean ' - '(it is recommended to set it as True for training); ' - 'got {}'.format(shuffle)) + raise TypeError('shuffle must be explicitly set as boolean; ' + 'got {}'.format(shuffle)) def input_fn(): """Numpy input function.""" diff --git a/tensorflow/python/estimator/inputs/numpy_io_test.py b/tensorflow/python/estimator/inputs/numpy_io_test.py index 81b201cc5c..92d057e25d 100644 --- a/tensorflow/python/estimator/inputs/numpy_io_test.py +++ b/tensorflow/python/estimator/inputs/numpy_io_test.py @@ -286,9 +286,8 @@ class NumpyIoTest(test.TestCase): x = np.arange(32, 36) y = np.arange(4) with self.test_session(): - with self.assertRaisesRegexp(ValueError, - 'shuffle must be provided and explicitly ' - 'set as boolean'): + with self.assertRaisesRegexp(TypeError, + 'shuffle must be explicitly set as boolean'): # Default shuffle is None. numpy_io.numpy_input_fn(x, y) diff --git a/tensorflow/python/estimator/inputs/pandas_io.py b/tensorflow/python/estimator/inputs/pandas_io.py index 57f8e5fd6a..938e244fb3 100644 --- a/tensorflow/python/estimator/inputs/pandas_io.py +++ b/tensorflow/python/estimator/inputs/pandas_io.py @@ -68,16 +68,15 @@ def pandas_input_fn(x, Raises: ValueError: if `x` already contains a column with the same name as `y`, or if the indexes of `x` and `y` don't match. - ValueError: if 'shuffle' is not provided or a bool. + TypeError: `shuffle` is not bool. """ if not HAS_PANDAS: raise TypeError( 'pandas_input_fn should not be called without pandas installed') if not isinstance(shuffle, bool): - raise ValueError('shuffle must be provided and explicitly set as boolean ' - '(it is recommended to set it as True for training); ' - 'got {}'.format(shuffle)) + raise TypeError('shuffle must be explicitly set as boolean; ' + 'got {}'.format(shuffle)) x = x.copy() if y is not None: diff --git a/tensorflow/python/estimator/inputs/pandas_io_test.py b/tensorflow/python/estimator/inputs/pandas_io_test.py index dcecf6dd61..e5912a3b28 100644 --- a/tensorflow/python/estimator/inputs/pandas_io_test.py +++ b/tensorflow/python/estimator/inputs/pandas_io_test.py @@ -70,9 +70,8 @@ class PandasIoTest(test.TestCase): return x, _ = self.makeTestDataFrame() y_noindex = pd.Series(np.arange(-32, -28)) - with self.assertRaisesRegexp(ValueError, - 'shuffle must be provided and explicitly ' - 'set as boolean'): + with self.assertRaisesRegexp(TypeError, + 'shuffle must be explicitly set as boolean'): # Default shuffle is None pandas_io.pandas_input_fn(x, y_noindex) diff --git a/tensorflow/python/estimator/inputs/queues/feeding_functions.py b/tensorflow/python/estimator/inputs/queues/feeding_functions.py index 51a61adb21..8e2ec83020 100644 --- a/tensorflow/python/estimator/inputs/queues/feeding_functions.py +++ b/tensorflow/python/estimator/inputs/queues/feeding_functions.py @@ -250,7 +250,7 @@ class _PandasFeedFn(object): num_epochs=None): if len(placeholders) != len(dataframe.columns) + 1: raise ValueError("Expected {} placeholders; got {}.".format( - len(dataframe.columns) + 1, len(placeholders))) + len(dataframe.columns), len(placeholders))) self._index_placeholder = placeholders[0] self._col_placeholders = placeholders[1:] self._dataframe = dataframe diff --git a/tensorflow/python/estimator/keras.py b/tensorflow/python/estimator/keras.py index 2f439f765e..c80af08fba 100644 --- a/tensorflow/python/estimator/keras.py +++ b/tensorflow/python/estimator/keras.py @@ -70,7 +70,7 @@ def _convert_tensor(x): return x -def _any_variable_initialized(): +def _any_variable_initalized(): """Check if any variable has been initialized in the Keras model. Returns: @@ -511,7 +511,7 @@ def model_to_estimator(keras_model=None, keras_model_fn, model_dir=model_dir, config=config) # Check if we need to call get_weights: - if _any_variable_initialized(): + if _any_variable_initalized(): keras_weights = keras_model.get_weights() # Warn if config passed to estimator tries to update GPUOptions. If a # session has already been created, the GPUOptions passed to the first diff --git a/tensorflow/python/estimator/keras_test.py b/tensorflow/python/estimator/keras_test.py index 5e094ae92b..6688a84130 100644 --- a/tensorflow/python/estimator/keras_test.py +++ b/tensorflow/python/estimator/keras_test.py @@ -31,10 +31,10 @@ from tensorflow.python.estimator import run_config as run_config_lib from tensorflow.python.estimator.inputs import numpy_io from tensorflow.python.framework import ops from tensorflow.python.framework import test_util +from tensorflow.python.keras import backend as K from tensorflow.python.keras import testing_utils from tensorflow.python.keras.applications import mobilenet from tensorflow.python.keras.optimizers import SGD -from tensorflow.python.ops.parsing_ops import gen_parsing_ops from tensorflow.python.platform import gfile from tensorflow.python.platform import test from tensorflow.python.summary.writer import writer_cache @@ -146,13 +146,13 @@ def randomize_io_type(array, name): def multi_inputs_multi_outputs_model(): a = keras.layers.Input(shape=(16,), name='input_a') b = keras.layers.Input(shape=(16,), name='input_b') - m = keras.layers.Input(shape=(8,), dtype='string', name='input_m') + m = keras.layers.Input(shape=(8,), dtype='bool', name='input_m') dense = keras.layers.Dense(8, name='dense_1') a_2 = dense(a) - # Read m - m_2 = keras.layers.Lambda(gen_parsing_ops.string_to_number)(m) - s_2 = keras.layers.Lambda(lambda k: k[0] * k[1])([m_2, a_2]) + # Apply a mask + s_2 = keras.layers.Lambda(lambda k: + K.switch(k[0], k[1], K.zeros_like(k[1])))([m, a_2]) b_2 = dense(b) merged = keras.layers.concatenate([s_2, b_2], name='merge') c = keras.layers.Dense(3, activation='softmax', name='dense_2')(merged) @@ -372,13 +372,13 @@ class TestKerasEstimator(test_util.TensorFlowTestCase): def train_input_fn(): input_dict = {'input_a': a_train, 'input_b': b_train, - 'input_m': input_m_train.astype(np.str)} + 'input_m': input_m_train > 0} output_dict = {'dense_2': c_train, 'dense_3': d_train} return input_dict, output_dict def eval_input_fn(): input_dict = {'input_a': a_test, 'input_b': b_test, - 'input_m': input_m_test.astype(np.str)} + 'input_m': input_m_test > 0} output_dict = {'dense_2': c_test, 'dense_3': d_test} return input_dict, output_dict diff --git a/tensorflow/python/grappler/layout_optimizer_test.py b/tensorflow/python/grappler/layout_optimizer_test.py index af5d709f7e..2d6925d1a8 100644 --- a/tensorflow/python/grappler/layout_optimizer_test.py +++ b/tensorflow/python/grappler/layout_optimizer_test.py @@ -1389,7 +1389,7 @@ class LayoutOptimizerTest(test.TestCase): expected_num_transposes = 3 self.assertEqual(expected_num_transposes, num_transposes) self._assert_trans_nhwc_to_nchw('map/while/Conv2D-0', nodes) - self._assert_trans_nchw_to_nhwc('map/while/Add_1-0-2', nodes) + self._assert_trans_nchw_to_nhwc('map/while/Add-0-2', nodes) self.assertAllClose(output_val_ref, output_val, atol=1e-3) def testLoopWithVecAnd4D(self): @@ -1413,7 +1413,7 @@ class LayoutOptimizerTest(test.TestCase): expected_num_transposes = 2 self.assertEqual(expected_num_transposes, num_transposes) self._assert_trans_nhwc_to_nchw('map/while/Conv2D-0', nodes) - self._assert_trans_nchw_to_nhwc('map/while/Add_1-0-2', nodes) + self._assert_trans_nchw_to_nhwc('map/while/Add-0-2', nodes) self.assertAllClose(output_val_ref, output_val, atol=1e-3) def testBinaryOpSecondPort(self): diff --git a/tensorflow/python/keras/activations.py b/tensorflow/python/keras/activations.py index f608dea430..e487f583be 100644 --- a/tensorflow/python/keras/activations.py +++ b/tensorflow/python/keras/activations.py @@ -93,8 +93,6 @@ def selu(x): - To be used together with the initialization "lecun_normal". - To be used together with the dropout variant "AlphaDropout". - References: - - [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515) """ alpha = 1.6732632423543772848170429916717 scale = 1.0507009873554804934193349852946 diff --git a/tensorflow/python/keras/callbacks.py b/tensorflow/python/keras/callbacks.py index 9f91368e5b..70b6a8431a 100644 --- a/tensorflow/python/keras/callbacks.py +++ b/tensorflow/python/keras/callbacks.py @@ -724,6 +724,15 @@ class TensorBoard(Callback): for weight in layer.weights: mapped_weight_name = weight.name.replace(':', '_') tf_summary.histogram(mapped_weight_name, weight) + if self.write_grads: + grads = model.optimizer.get_gradients(model.total_loss, weight) + + def is_indexed_slices(grad): + return type(grad).__name__ == 'IndexedSlices' + + grads = [grad.values if is_indexed_slices(grad) else grad + for grad in grads] + tf_summary.histogram('{}_grad'.format(mapped_weight_name), grads) if self.write_images: w_img = array_ops.squeeze(weight) shape = K.int_shape(w_img) @@ -750,18 +759,6 @@ class TensorBoard(Callback): assert len(shape) == 4 and shape[-1] in [1, 3, 4] tf_summary.image(mapped_weight_name, w_img) - if self.write_grads: - for weight in layer.trainable_weights: - mapped_weight_name = weight.name.replace(':', '_') - grads = model.optimizer.get_gradients(model.total_loss, weight) - - def is_indexed_slices(grad): - return type(grad).__name__ == 'IndexedSlices' - - grads = [grad.values if is_indexed_slices(grad) else grad - for grad in grads] - tf_summary.histogram('{}_grad'.format(mapped_weight_name), grads) - if hasattr(layer, 'output'): tf_summary.histogram('{}_out'.format(layer.name), layer.output) self.merged = tf_summary.merge_all() diff --git a/tensorflow/python/keras/callbacks_test.py b/tensorflow/python/keras/callbacks_test.py index 5062a26580..b355f4a269 100644 --- a/tensorflow/python/keras/callbacks_test.py +++ b/tensorflow/python/keras/callbacks_test.py @@ -653,8 +653,6 @@ class KerasCallbacksTest(test.TestCase): model.add( keras.layers.Dense( NUM_HIDDEN, input_dim=INPUT_DIM, activation='relu')) - # non_trainable_weights: moving_variance, moving_mean - model.add(keras.layers.BatchNormalization()) model.add(keras.layers.Dense(NUM_CLASSES, activation='softmax')) model.compile( loss='categorical_crossentropy', diff --git a/tensorflow/python/keras/engine/network.py b/tensorflow/python/keras/engine/network.py index 1c9135982e..a4cd017d60 100644 --- a/tensorflow/python/keras/engine/network.py +++ b/tensorflow/python/keras/engine/network.py @@ -123,7 +123,7 @@ class Network(base_layer.Layer): # Entries are unique. Includes input and output layers. self._layers = [] - # Used in symbolic mode only, only in conjunction with graph-networks + # Used in symbolic mode only, only in conjonction with graph-networks self._outbound_nodes = [] self._inbound_nodes = [] diff --git a/tensorflow/python/keras/engine/saving_test.py b/tensorflow/python/keras/engine/saving_test.py index 7e82db028b..6a94986b9c 100644 --- a/tensorflow/python/keras/engine/saving_test.py +++ b/tensorflow/python/keras/engine/saving_test.py @@ -482,7 +482,7 @@ class TestWholeModelSaving(test.TestCase): with h5py.File(fname, 'r') as h5file: num_names_arrays = len([attr for attr in h5file['model_weights'].attrs if attr.startswith('layer_names')]) - # The chunking of layer names array should have happened. + # The chunking of layer names array should have happend. self.assertGreater(num_names_arrays, 0) out2 = model.predict(x) self.assertAllClose(out, out2, atol=1e-05) @@ -527,7 +527,7 @@ class TestWholeModelSaving(test.TestCase): num_weight_arrays = len( [attr for attr in h5file['model_weights']['nested_model'].attrs if attr.startswith('weight_names')]) - # The chunking of layer names array should have happened. + # The chunking of layer names array should have happend. self.assertGreater(num_weight_arrays, 0) out2 = model.predict(x) self.assertAllClose(out, out2, atol=1e-05) diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py index fce6cbdb7a..89c1f1a40f 100644 --- a/tensorflow/python/keras/engine/training.py +++ b/tensorflow/python/keras/engine/training.py @@ -24,7 +24,6 @@ import numpy as np from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.ops import iterator_ops from tensorflow.python.eager import context -from tensorflow.python.framework import constant_op from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_util @@ -410,13 +409,11 @@ class Model(Network): else: if sample_weight_mode == 'temporal': sample_weights.append(array_ops.placeholder_with_default( - constant_op.constant([[1.]], dtype=K.floatx()), - shape=[None, None], name=name + '_sample_weights')) + [[1.]], shape=[None, None], name=name + '_sample_weights')) sample_weight_modes.append('temporal') else: sample_weights.append(array_ops.placeholder_with_default( - constant_op.constant([1.], dtype=K.floatx()), - shape=[None], name=name + '_sample_weights')) + [1.], shape=[None], name=name + '_sample_weights')) sample_weight_modes.append(None) self.sample_weight_modes = sample_weight_modes self._feed_sample_weight_modes = [] diff --git a/tensorflow/python/keras/engine/training_eager.py b/tensorflow/python/keras/engine/training_eager.py index e8838cd3bc..2ecbff3a1c 100644 --- a/tensorflow/python/keras/engine/training_eager.py +++ b/tensorflow/python/keras/engine/training_eager.py @@ -732,7 +732,7 @@ def slice_arrays(arrays, indices, contiguous=True): """Slices batches out of provided arrays (workaround for eager tensors). Unfortunately eager tensors don't have the same slicing behavior as - Numpy arrays (they follow the same slicing behavior as symbolic TF tensors), + Numpy arrays (they folow the same slicing behavior as symbolic TF tensors), hence we cannot use `generic_utils.slice_arrays` directly and we have to implement this workaround based on `concat`. This has a performance cost. diff --git a/tensorflow/python/keras/initializers_test.py b/tensorflow/python/keras/initializers_test.py index c519e194bd..a54d6da839 100644 --- a/tensorflow/python/keras/initializers_test.py +++ b/tensorflow/python/keras/initializers_test.py @@ -71,7 +71,7 @@ class KerasInitializersTest(test.TestCase): stddev=1, seed=126), tensor_shape, - target_mean=0., target_max=2, target_min=-2) + target_mean=0., target_std=None, target_max=2) def test_constant(self): tensor_shape = (5, 6, 4) @@ -83,49 +83,49 @@ class KerasInitializersTest(test.TestCase): tensor_shape = (5, 6, 4, 2) with self.test_session(): fan_in, _ = init_ops._compute_fans(tensor_shape) - std = np.sqrt(1. / fan_in) + scale = np.sqrt(3. / fan_in) self._runner(keras.initializers.lecun_uniform(seed=123), tensor_shape, - target_mean=0., target_std=std) + target_mean=0., target_max=scale, target_min=-scale) def test_glorot_uniform(self): tensor_shape = (5, 6, 4, 2) with self.test_session(): fan_in, fan_out = init_ops._compute_fans(tensor_shape) - std = np.sqrt(2. / (fan_in + fan_out)) + scale = np.sqrt(6. / (fan_in + fan_out)) self._runner(keras.initializers.glorot_uniform(seed=123), tensor_shape, - target_mean=0., target_std=std) + target_mean=0., target_max=scale, target_min=-scale) def test_he_uniform(self): tensor_shape = (5, 6, 4, 2) with self.test_session(): fan_in, _ = init_ops._compute_fans(tensor_shape) - std = np.sqrt(2. / fan_in) + scale = np.sqrt(6. / fan_in) self._runner(keras.initializers.he_uniform(seed=123), tensor_shape, - target_mean=0., target_std=std) + target_mean=0., target_max=scale, target_min=-scale) def test_lecun_normal(self): tensor_shape = (5, 6, 4, 2) with self.test_session(): fan_in, _ = init_ops._compute_fans(tensor_shape) - std = np.sqrt(1. / fan_in) + scale = np.sqrt(1. / fan_in) self._runner(keras.initializers.lecun_normal(seed=123), tensor_shape, - target_mean=0., target_std=std) + target_mean=0., target_std=None, target_max=2 * scale) def test_glorot_normal(self): tensor_shape = (5, 6, 4, 2) with self.test_session(): fan_in, fan_out = init_ops._compute_fans(tensor_shape) - std = np.sqrt(2. / (fan_in + fan_out)) + scale = np.sqrt(2. / (fan_in + fan_out)) self._runner(keras.initializers.glorot_normal(seed=123), tensor_shape, - target_mean=0., target_std=std) + target_mean=0., target_std=None, target_max=2 * scale) def test_he_normal(self): tensor_shape = (5, 6, 4, 2) with self.test_session(): fan_in, _ = init_ops._compute_fans(tensor_shape) - std = np.sqrt(2. / fan_in) + scale = np.sqrt(2. / fan_in) self._runner(keras.initializers.he_normal(seed=123), tensor_shape, - target_mean=0., target_std=std) + target_mean=0., target_std=None, target_max=2 * scale) def test_orthogonal(self): tensor_shape = (20, 20) diff --git a/tensorflow/python/keras/layers/core.py b/tensorflow/python/keras/layers/core.py index f60064ed63..5061825d38 100644 --- a/tensorflow/python/keras/layers/core.py +++ b/tensorflow/python/keras/layers/core.py @@ -19,9 +19,7 @@ from __future__ import division from __future__ import print_function import copy -import sys import types as python_types -import warnings import numpy as np @@ -716,7 +714,6 @@ class Lambda(Layer): return self.mask def get_config(self): - module = self.function.__module__ if isinstance(self.function, python_types.LambdaType): function = generic_utils.func_dump(self.function) function_type = 'lambda' @@ -724,26 +721,21 @@ class Lambda(Layer): function = self.function.__name__ function_type = 'function' - output_shape_module = None if isinstance(self._output_shape, python_types.LambdaType): output_shape = generic_utils.func_dump(self._output_shape) output_shape_type = 'lambda' - output_shape_module = self._output_shape.__module__ elif callable(self._output_shape): output_shape = self._output_shape.__name__ output_shape_type = 'function' - output_shape_module = self._output_shape.__module__ else: output_shape = self._output_shape output_shape_type = 'raw' config = { 'function': function, - 'module': module, 'function_type': function_type, 'output_shape': output_shape, 'output_shape_type': output_shape_type, - 'output_shape_module': output_shape_module, 'arguments': self.arguments } base_config = super(Lambda, self).get_config() @@ -753,16 +745,8 @@ class Lambda(Layer): def from_config(cls, config, custom_objects=None): config = config.copy() globs = globals() - module = config.pop('module', None) - if module in sys.modules: - globs.update(sys.modules[module].__dict__) - elif module is not None: - # Note: we don't know the name of the function if it's a lambda. - warnings.warn('{} is not loaded, but a Lambda layer uses it. ' - 'It may cause errors.'.format(module) - , UserWarning) if custom_objects: - globs.update(custom_objects) + globs = dict(list(globs.items()) + list(custom_objects.items())) function_type = config.pop('function_type') if function_type == 'function': # Simple lookup in custom objects @@ -776,14 +760,6 @@ class Lambda(Layer): else: raise TypeError('Unknown function type:', function_type) - output_shape_module = config.pop('output_shape_module', None) - if output_shape_module in sys.modules: - globs.update(sys.modules[output_shape_module].__dict__) - elif output_shape_module is not None: - # Note: we don't know the name of the function if it's a lambda. - warnings.warn('{} is not loaded, but a Lambda layer uses it. ' - 'It may cause errors.'.format(output_shape_module) - , UserWarning) output_shape_type = config.pop('output_shape_type') if output_shape_type == 'function': # Simple lookup in custom objects diff --git a/tensorflow/python/keras/models_test.py b/tensorflow/python/keras/models_test.py index e6e45902a8..c616d8f24f 100644 --- a/tensorflow/python/keras/models_test.py +++ b/tensorflow/python/keras/models_test.py @@ -144,19 +144,5 @@ class CheckpointingTests(test.TestCase): model.load_weights(save_prefix) self.assertEqual(12., self.evaluate(beta1_power)) -class TestModelBackend(test.TestCase): - - def test_model_backend_float64_use_cases(self): - # Test case for GitHub issue 19318 - floatx = keras.backend.floatx() - keras.backend.set_floatx('float64') - - x = keras.Input((5,)) - y = keras.layers.Dense(1)(x) - model = keras.models.Model(x, y) - model.compile('rmsprop', 'mse') - - keras.backend.set_floatx(floatx) - if __name__ == '__main__': test.main() diff --git a/tensorflow/python/kernel_tests/as_string_op_test.py b/tensorflow/python/kernel_tests/as_string_op_test.py index 94ed8ebd31..9d54add264 100644 --- a/tensorflow/python/kernel_tests/as_string_op_test.py +++ b/tensorflow/python/kernel_tests/as_string_op_test.py @@ -130,16 +130,6 @@ class AsStringOpTest(test.TestCase): result = output.eval(feed_dict={input_: int_inputs_}) self.assertAllEqual(s(result), ["%d" % x for x in int_inputs_]) - def testHalfInt(self): - s = lambda strs: [x.decode("ascii") for x in strs] - - with self.test_session(): - input_ = array_ops.placeholder(dtypes.int16) - int_inputs_ = [np.iinfo(np.int16).min, np.iinfo(np.int16).max] - output = string_ops.as_string(input_) - result = output.eval(feed_dict={input_: int_inputs_}) - self.assertAllEqual(s(result), ["%d" % x for x in int_inputs_]) - def testBool(self): bool_inputs_ = [False, True] s = lambda strs: [x.decode("ascii") for x in strs] diff --git a/tensorflow/python/kernel_tests/betainc_op_test.py b/tensorflow/python/kernel_tests/betainc_op_test.py index 16fdedac41..08b03f8518 100644 --- a/tensorflow/python/kernel_tests/betainc_op_test.py +++ b/tensorflow/python/kernel_tests/betainc_op_test.py @@ -172,7 +172,7 @@ class BetaincTest(test.TestCase): tf_gout_t = math_ops.betainc(tf_ga_s, tf_gb_s, tf_gx_s) err = gradient_checker.compute_gradient_error( [tf_gx_s], [gx_s.shape], tf_gout_t, gx_s.shape) - tf_logging.info("betainc gradient err = %g " % err) + print("betainc gradient err = %g " % err) self.assertLess(err, err_tolerance) # Test broadcast gradient @@ -181,7 +181,7 @@ class BetaincTest(test.TestCase): tf_gout_t = math_ops.betainc(tf_ga_s, tf_gb_s, tf_gx_s) err = gradient_checker.compute_gradient_error( [tf_gx_s], [()], tf_gout_t, ga_s.shape) - tf_logging.info("betainc gradient err = %g " % err) + print("betainc gradient err = %g " % err) self.assertLess(err, err_tolerance) diff --git a/tensorflow/python/kernel_tests/clip_ops_test.py b/tensorflow/python/kernel_tests/clip_ops_test.py index fb52d10475..e08123b041 100644 --- a/tensorflow/python/kernel_tests/clip_ops_test.py +++ b/tensorflow/python/kernel_tests/clip_ops_test.py @@ -18,12 +18,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import numpy as np - from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops from tensorflow.python.ops import clip_ops from tensorflow.python.ops import gradient_checker from tensorflow.python.platform import test @@ -417,16 +414,6 @@ class ClipTest(test.TestCase): self.assertAllClose(np_ans, tf_ans) - def testClipByValueEmptyTensor(self): - # Test case for GitHub issue 19337 - zero = array_ops.placeholder(dtype=dtypes.float32, shape=None) - x = clip_ops.clip_by_value(zero, zero, zero) - y = clip_ops.clip_by_value(zero, 1.0, 1.0) - z = clip_ops.clip_by_value(zero, zero, 1.0) - w = clip_ops.clip_by_value(zero, 1.0, zero) - with self.test_session(use_gpu=True) as sess: - sess.run([x, y, z, w], feed_dict={zero: np.zeros((7, 0))}) - if __name__ == '__main__': test.main() diff --git a/tensorflow/python/kernel_tests/conv_ops_test.py b/tensorflow/python/kernel_tests/conv_ops_test.py index 80ba7dafc9..8699fd5b25 100644 --- a/tensorflow/python/kernel_tests/conv_ops_test.py +++ b/tensorflow/python/kernel_tests/conv_ops_test.py @@ -312,8 +312,8 @@ class Conv2DTest(test.TestCase): expected_values = self.evaluate(expected_results) computed_values = self.evaluate(computed_results) for e_value, c_value in zip(expected_values, computed_values): - tf_logging.info("expected = ", e_value) - tf_logging.info("actual = ", c_value) + print("expected = ", e_value) + print("actual = ", c_value) self.assertAllClose( e_value.flatten(), c_value.flatten(), atol=tolerance, rtol=1e-4) @@ -337,8 +337,8 @@ class Conv2DTest(test.TestCase): for i in range(len(tensors)): conv = tensors[i] value = values[i] - tf_logging.info("expected = ", expected) - tf_logging.info("actual = ", value) + print("expected = ", expected) + print("actual = ", value) tol = 1e-5 if value.dtype == np.float16: tol = 1e-3 @@ -547,8 +547,8 @@ class Conv2DTest(test.TestCase): # "values" consists of two tensors for two backprops value = self.evaluate(conv) self.assertShapeEqual(value, conv) - tf_logging.info("expected = ", expected) - tf_logging.info("actual = ", value) + print("expected = ", expected) + print("actual = ", value) self.assertArrayNear(expected, value.flatten(), err) def _CompareBackpropInput(self, input_sizes, filter_sizes, output_sizes, @@ -723,8 +723,8 @@ class Conv2DTest(test.TestCase): data_format=data_format) value = self.evaluate(conv) self.assertShapeEqual(value, conv) - tf_logging.info("expected = ", expected) - tf_logging.info("actual = ", value) + print("expected = ", expected) + print("actual = ", value) self.assertArrayNear(expected, value.flatten(), 1e-5) def _CompareBackFilter(self, input_sizes, filter_sizes, output_sizes, @@ -912,8 +912,8 @@ class Conv2DTest(test.TestCase): value_2 = sess.run(conv_2) self.assertShapeEqual(value, conv) self.assertShapeEqual(value_2, conv_2) - tf_logging.info("expected = ", value_2) - tf_logging.info("actual = ", value) + print("expected = ", value_2) + print("actual = ", value) self.assertArrayNear(value_2.flatten(), value.flatten(), err) # Testing for backprops @@ -965,8 +965,8 @@ class Conv2DTest(test.TestCase): value_2 = sess.run(conv_2) self.assertShapeEqual(value, conv) self.assertShapeEqual(value_2, conv_2) - tf_logging.info("expected = ", value_2) - tf_logging.info("actual = ", value) + print("expected = ", value_2) + print("actual = ", value) self.assertArrayNear(value_2.flatten(), value.flatten(), err) def testConv2D2x2Depth3ValidBackpropFilterStride1x1Dilation2x1(self): @@ -1178,7 +1178,7 @@ class Conv2DTest(test.TestCase): # since fp16 numerical gradients are too imprecise. err = np.fabs(jacob_t - reference_jacob_t).max() - tf_logging.info("conv_2d gradient error = ", err) + print("conv_2d gradient error = ", err) self.assertLess(err, 0.002) def testInputGradientValidPaddingStrideOne(self): @@ -1546,7 +1546,7 @@ class DepthwiseConv2DTest(test.TestCase): conv = nn_impl.depthwise_conv2d( t1, t2, strides=[1, stride, stride, 1], padding=padding) value = sess.run(conv) - tf_logging.info("value = ", value) + print("value = ", value) self.assertArrayNear(expected, np.ravel(value), 1e-5) self.assertShapeEqual(value, conv) @@ -1668,7 +1668,7 @@ class SeparableConv2DTest(test.TestCase): conv = array_ops.transpose(conv, [0, 2, 3, 1]) value = sess.run(conv) - tf_logging.info("value = ", value) + print("value = ", value) self.assertArrayNear(expected, np.ravel(value), 1e-5) self.assertShapeEqual(value, conv) @@ -1826,7 +1826,7 @@ class Conv2DBenchmark(test.Benchmark): wall_time = time.time() - start self.report_benchmark( name="conv_stack_iter_%d" % iter_index, wall_time=wall_time) - tf_logging.info("conv_stack_iter_%d: %.4f" % (iter_index, wall_time)) + print("conv_stack_iter_%d: %.4f" % (iter_index, wall_time)) def GetInceptionFwdTest(input_size, filter_size, stride, padding, diff --git a/tensorflow/python/kernel_tests/gather_nd_op_test.py b/tensorflow/python/kernel_tests/gather_nd_op_test.py index 58e2a8ac2a..91ebe8de99 100644 --- a/tensorflow/python/kernel_tests/gather_nd_op_test.py +++ b/tensorflow/python/kernel_tests/gather_nd_op_test.py @@ -197,21 +197,7 @@ class GatherNdTest(test.TestCase): self.assertEqual(None, shape.ndims) self.assertEqual(None, shape[0].value) - def testBadIndicesCPU(self): - with self.test_session(use_gpu=False): - params = [0, 1, 2] - indices = [[[0], [7]]] # Make this one higher rank - gather_nd = array_ops.gather_nd(params, indices) - with self.assertRaisesOpError( - r"flat indices\[1, :\] = \[7\] does not index into param " - r"\(shape: \[3\]\)"): - gather_nd.eval() - - def _disabledTestBadIndicesGPU(self): - # TODO disabled due to different behavior on GPU and CPU - # On GPU the bad indices do not raise error but fetch 0 values - if not test.is_gpu_available(): - return + def testBadIndices(self): with self.test_session(use_gpu=True): params = [0, 1, 2] indices = [[[0], [7]]] # Make this one higher rank @@ -221,21 +207,7 @@ class GatherNdTest(test.TestCase): r"\(shape: \[3\]\)"): gather_nd.eval() - def testBadIndicesWithSlicesCPU(self): - with self.test_session(use_gpu=False): - params = [[0, 1, 2]] - indices = [[[0], [0], [1]]] # Make this one higher rank - gather_nd = array_ops.gather_nd(params, indices) - with self.assertRaisesOpError( - r"flat indices\[2, :\] = \[1\] does not index into param " - r"\(shape: \[1,3\]\)"): - gather_nd.eval() - - def _disabledTestBadIndicesWithSlicesGPU(self): - # TODO disabled due to different behavior on GPU and CPU - # On GPU the bad indices do not raise error but fetch 0 values - if not test.is_gpu_available(): - return + def testBadIndicesWithSlices(self): with self.test_session(use_gpu=True): params = [[0, 1, 2]] indices = [[[0], [0], [1]]] # Make this one higher rank diff --git a/tensorflow/python/kernel_tests/gather_op_test.py b/tensorflow/python/kernel_tests/gather_op_test.py index 033fa95935..a2fcd751df 100644 --- a/tensorflow/python/kernel_tests/gather_op_test.py +++ b/tensorflow/python/kernel_tests/gather_op_test.py @@ -27,8 +27,7 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradients_impl from tensorflow.python.platform import test -_TEST_TYPES = (dtypes.int64, dtypes.float32, - dtypes.complex64, dtypes.complex128) +_TEST_TYPES = (dtypes.float32, dtypes.complex64, dtypes.complex128) class GatherTest(test.TestCase): @@ -123,9 +122,6 @@ class GatherTest(test.TestCase): gather, [tf_params, tf_indices, tf_axis], gather_grad) self.assertEqual(indices_grad, None) self.assertEqual(axis_grad, None) - if dtype.is_integer: - self.assertEqual(params_grad, None) - continue # For axis 0, we are able to create an efficient IndexedSlices for # the gradient. if axis == 0: @@ -181,19 +177,7 @@ class GatherTest(test.TestCase): gather_t = array_ops.gather(params, indices, axis=axis) self.assertEqual(None, gather_t.shape) - def testBadIndicesCPU(self): - with self.test_session(use_gpu=False): - params = [[0, 1, 2], [3, 4, 5]] - with self.assertRaisesOpError(r"indices\[0,0\] = 7 is not in \[0, 2\)"): - array_ops.gather(params, [[7]], axis=0).eval() - with self.assertRaisesOpError(r"indices\[0,0\] = 7 is not in \[0, 3\)"): - array_ops.gather(params, [[7]], axis=1).eval() - - def _disabledTestBadIndicesGPU(self): - # TODO disabled due to different behavior on GPU and CPU - # On GPU the bad indices do not raise error but fetch 0 values - if not test.is_gpu_available(): - return + def testBadIndices(self): with self.test_session(use_gpu=True): params = [[0, 1, 2], [3, 4, 5]] with self.assertRaisesOpError(r"indices\[0,0\] = 7 is not in \[0, 2\)"): diff --git a/tensorflow/python/kernel_tests/init_ops_test.py b/tensorflow/python/kernel_tests/init_ops_test.py index 795aa67248..a9b55854f1 100644 --- a/tensorflow/python/kernel_tests/init_ops_test.py +++ b/tensorflow/python/kernel_tests/init_ops_test.py @@ -362,33 +362,6 @@ class UniformUnitScalingInitializationTest(test.TestCase): dtype=dtypes.string) -class VarianceScalingInitializationTest(test.TestCase): - - def testNormalDistribution(self): - shape = [100, 100] - expect_mean = 0. - expect_var = 1. / shape[0] - init = init_ops.variance_scaling_initializer(distribution='normal') - - with self.test_session(use_gpu=True): - x = init(shape).eval() - - self.assertNear(np.mean(x), expect_mean, err=1e-2) - self.assertNear(np.var(x), expect_var, err=1e-2) - - def testUniformDistribution(self): - shape = [100, 100] - expect_mean = 0. - expect_var = 1. / shape[0] - init = init_ops.variance_scaling_initializer(distribution='uniform') - - with self.test_session(use_gpu=True): - x = init(shape).eval() - - self.assertNear(np.mean(x), expect_mean, err=1e-2) - self.assertNear(np.var(x), expect_var, err=1e-2) - - # TODO(vrv): move to sequence_ops_test? class RangeTest(test.TestCase): diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py index e95c729715..a0c372db7d 100644 --- a/tensorflow/python/kernel_tests/pooling_ops_test.py +++ b/tensorflow/python/kernel_tests/pooling_ops_test.py @@ -947,7 +947,7 @@ class PoolingTest(test.TestCase): output_sizes, x_init_value=x_init_value, delta=1e-2) - tf_logging.info("%s gradient error = " % func_name, err) + print("%s gradient error = " % func_name, err) self.assertLess(err, err_tolerance) def _ConstructAndTestSecondGradient(self, @@ -1024,7 +1024,7 @@ class PoolingTest(test.TestCase): input_sizes, x_init_value=x_init_value, delta=1e-2) - tf_logging.info("%s second-order gradient error = " % func_name, err) + print("%s second-order gradient error = " % func_name, err) self.assertLess(err, err_tolerance) def _testMaxPoolGradValidPadding1_1(self, data_format, use_gpu): diff --git a/tensorflow/python/kernel_tests/py_func_test.py b/tensorflow/python/kernel_tests/py_func_test.py index 253e43920b..677253946e 100644 --- a/tensorflow/python/kernel_tests/py_func_test.py +++ b/tensorflow/python/kernel_tests/py_func_test.py @@ -19,7 +19,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import gc import re import numpy as np @@ -435,29 +434,13 @@ class PyFuncTest(test.TestCase): # ----- Tests shared by py_func and eager_py_func ----- def testCleanup(self): - # Delete everything created by previous tests to avoid side effects. - ops.reset_default_graph() - gc.collect() - initial_size = script_ops._py_funcs.size() - # Encapsulate the graph generation, so locals can be deleted. - def make_graphs(): - for _ in xrange(1000): - g = ops.Graph() - with g.as_default(): - c = constant_op.constant([1.], dtypes.float32) - _ = script_ops.py_func(lambda x: x + 1, [c], [dtypes.float32]) - _ = script_ops.eager_py_func(lambda x: x + 1, [c], [dtypes.float32]) - # These ops have a reference to 'c' which has a reference to the graph. - # Checks if the functions are being deleted though the graph is referenced from them. - # (see #18292) - _ = script_ops.py_func(lambda x: x + c.shape[0], [c], [dtypes.float32]) - _ = script_ops.eager_py_func(lambda x: x + c.shape[0], [c], [dtypes.float32]) - - # Call garbage collector to enforce deletion. - make_graphs() - ops.reset_default_graph() - gc.collect() - self.assertEqual(initial_size, script_ops._py_funcs.size()) + for _ in xrange(1000): + g = ops.Graph() + with g.as_default(): + c = constant_op.constant([1.], dtypes.float32) + _ = script_ops.py_func(lambda x: x + 1, [c], [dtypes.float32]) + _ = script_ops.eager_py_func(lambda x: x + 1, [c], [dtypes.float32]) + self.assertLess(script_ops._py_funcs.size(), 100) # ----- Tests for eager_py_func ----- @test_util.run_in_graph_and_eager_modes() diff --git a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py index faa4b49a8d..79fe927b8a 100644 --- a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py +++ b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py @@ -144,9 +144,7 @@ class StatefulScatterNdTest(test.TestCase): self.assertAllClose(new, ref_var.eval()) def _VariableRankTests(self, np_scatter, tf_scatter): - for vtype in (np.int32, - np.float32, np.float64, - np.complex64, np.complex128): + for vtype in (np.float32, np.float64, np.complex64, np.complex128): for itype in (np.int32, np.int64): self._VariableRankTest(np_scatter, tf_scatter, vtype, itype) @@ -223,7 +221,7 @@ class StatefulScatterNdTest(test.TestCase): # self._VariableRankTests(_NumpyDiv, state_ops.scatter_nd_div) def _ScatterRepeatIndicesTest(self, np_scatter, tf_scatter): - for vtype in (np.int32, np.float32, np.float64): + for vtype in (np.float32, np.float64): for itype in (np.int32, np.int64): self._VariableRankTest( np_scatter, tf_scatter, vtype, itype, repeat_indices=True) diff --git a/tensorflow/python/kernel_tests/scatter_ops_test.py b/tensorflow/python/kernel_tests/scatter_ops_test.py index 1a0fa744ae..c70a4ffce7 100644 --- a/tensorflow/python/kernel_tests/scatter_ops_test.py +++ b/tensorflow/python/kernel_tests/scatter_ops_test.py @@ -159,13 +159,7 @@ class ScatterTest(test.TestCase): # Clips small values to avoid division by zero. def clip_small_values(x): - threshold = 1e-4 - sign = np.sign(x) - - if isinstance(x, np.int32): - threshold = 1 - sign = np.random.choice([-1, 1]) - return threshold * sign if np.abs(x) < threshold else x + return 1e-4 * np.sign(x) if np.abs(x) < 1e-4 else x updates = np.vectorize(clip_small_values)(updates) old = _AsType(np.random.randn(*((first_dim,) + extra_shape)), vtype) @@ -187,11 +181,7 @@ class ScatterTest(test.TestCase): tf_scatter, repeat_indices=False, updates_are_scalar=False): - vtypes = [np.float32, np.float64] - if tf_scatter != state_ops.scatter_div: - vtypes.append(np.int32) - - for vtype in vtypes: + for vtype in (np.float32, np.float64): for itype in (np.int32, np.int64): self._VariableRankTest(tf_scatter, vtype, itype, repeat_indices, updates_are_scalar) diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py index a82855dfeb..794be096b7 100644 --- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py +++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py @@ -264,9 +264,7 @@ class UnsortedSegmentTest(SegmentReductionHelper): # A subset of ops has been enabled for complex numbers self.complex_ops_list = [(np.add, None, - math_ops.unsorted_segment_sum, lambda t: 0), - (np.ndarray.__mul__, None, - math_ops.unsorted_segment_prod, lambda t: 1)] + math_ops.unsorted_segment_sum, lambda t: 0)] self.differentiable_dtypes = [dtypes_lib.float16, dtypes_lib.float32, dtypes_lib.float64] self.all_dtypes = (self.differentiable_dtypes + diff --git a/tensorflow/python/kernel_tests/string_split_op_test.py b/tensorflow/python/kernel_tests/string_split_op_test.py index e20daccb28..a5bd1b6ee0 100644 --- a/tensorflow/python/kernel_tests/string_split_op_test.py +++ b/tensorflow/python/kernel_tests/string_split_op_test.py @@ -146,101 +146,5 @@ class StringSplitOpTest(test.TestCase): self.assertAllEqual(shape, [3, 1]) -class StringSplitV2OpTest(test.TestCase): - - def testSplitV2(self): - strings = ["pigs on the wing", "animals"] - - with self.test_session() as sess: - tokens = string_ops.string_split_v2(strings) - indices, values, shape = sess.run(tokens) - self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2], [0, 3], [1, 0]]) - self.assertAllEqual(values, [b"pigs", b"on", b"the", b"wing", b"animals"]) - self.assertAllEqual(shape, [2, 4]) - - def testSplitV2MultiCharSeparator(self): - # Match Python behavior: - # >>> '1<>2<>3'.split('<>') - # ['1', '2', '3'] - # >>> "<><>4<>5<><>6<>".split("<>") - # ['', '', '4', '5', '', '6', ''] - strings = ["1<>2<>3", "<><>4<>5<><>6<>"] - - with self.test_session() as sess: - tokens = string_ops.string_split_v2(strings, sep="<>") - indices, values, shape = sess.run(tokens) - self.assertAllEqual( - indices, [[0, 0], [0, 1], [0, 2], - [1, 0], [1, 1], [1, 2], [1, 3], [1, 4], [1, 5], [1, 6]]) - self.assertAllEqual(values, [b"1", b"2", b"3", - b"", b"", b"4", b"5", b"", b"6", b""]) - self.assertAllEqual(shape, [2, 7]) - - def testSplitV2SimpleSeparator(self): - # Match Python behavior: - # >>> '1,2,3'.split(',') - # ['1', '2', '3'] - # >>> '1,2,,3,'.split(',') - # ['1', '2', '', '3', ''] - strings = ["1,2,3", "4,5,,6,"] - - with self.test_session() as sess: - tokens = string_ops.string_split_v2(strings, sep=',') - indices, values, shape = sess.run(tokens) - self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2], - [1, 0], [1, 1], [1, 2], [1, 3], [1, 4]]) - self.assertAllEqual(values, [b"1", b"2", b"3", - b"4", b"5", b"", b"6", b""]) - self.assertAllEqual(shape, [2, 5]) - - def testSplitV2EmptySeparator(self): - # Match Python behavior: - # >>> '1 2 3'.split() - # ['1', '2', '3'] - #>>> ' 1 2 3 '.split() - #['1', '2', '3'] - strings = ["1 2 3", " 4 5 6 "] - - with self.test_session() as sess: - tokens = string_ops.string_split_v2(strings) - indices, values, shape = sess.run(tokens) - self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2], - [1, 0], [1, 1], [1, 2]]) - self.assertAllEqual(values, [b"1", b"2", b"3", b"4", b"5", b"6"]) - self.assertAllEqual(shape, [2, 3]) - - def testSplitV2SimpleSeparatorMaxSplit(self): - # Match Python behavior: - # >>> '1,2,3'.split(',', maxsplit=1) - # ['1', '2,3'] - # >>> '4,5,,6,'.split(',', maxsplit=1) - # ['4', '5,,6,'] - strings = ["1,2,3", "4,5,,6,"] - - with self.test_session() as sess: - tokens = string_ops.string_split_v2(strings, sep=',', maxsplit=1) - indices, values, shape = sess.run(tokens) - self.assertAllEqual(indices, [[0, 0], [0, 1], - [1, 0], [1, 1]]) - self.assertAllEqual(values, [b"1", b"2,3", b"4", b"5,,6,"]) - self.assertAllEqual(shape, [2, 2]) - - def testSplitV2EmptySeparatorMaxSplit(self): - # Match Python behavior: - # '1 2 3'.split(maxsplit=1) - # ['1', '2 3'] - # >>> " 4 5 6 ".split(maxsplit=1) - # ['4', '5 6 '] - strings = ["1 2 3", " 4 5 6 "] - - with self.test_session() as sess: - tokens = string_ops.string_split_v2(strings, maxsplit=1) - indices, values, shape = sess.run(tokens) - self.assertAllEqual(indices, [[0, 0], [0, 1], - [1, 0], [1, 1]]) - self.assertAllEqual(values, [b"1", b"2 3", b"4", b"5 6 "]) - self.assertAllEqual(shape, [2, 2]) - - if __name__ == "__main__": test.main() diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index fae63b1132..8129334703 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -2619,10 +2619,6 @@ reverse.__doc__ = gen_array_ops.reverse_v2.__doc__ # pylint: disable=redefined-builtin @tf_export("reverse_sequence") -@deprecation.deprecated_args( - None, "seq_dim is deprecated, use seq_axis instead", "seq_dim") -@deprecation.deprecated_args( - None, "batch_dim is deprecated, use batch_axis instead", "batch_dim") def reverse_sequence(input, seq_lengths, seq_axis=None, diff --git a/tensorflow/python/ops/gradient_checker.py b/tensorflow/python/ops/gradient_checker.py index 94c8d79335..12afcd0b51 100644 --- a/tensorflow/python/ops/gradient_checker.py +++ b/tensorflow/python/ops/gradient_checker.py @@ -283,10 +283,10 @@ def compute_gradient(x, numbers. For example, if `x` is complex with shape `[m]` and `y` is complex with shape `[n]`, each Jacobian `J` will have shape `[m * 2, n * 2]` with - J[::2, ::2] = d(Re y)/d(Re x) - J[::2, 1::2] = d(Im y)/d(Re x) - J[1::2, ::2] = d(Re y)/d(Im x) - J[1::2, 1::2] = d(Im y)/d(Im x) + J[:m, :n] = d(Re y)/d(Re x) + J[:m, n:] = d(Im y)/d(Re x) + J[m:, :n] = d(Re y)/d(Im x) + J[m:, n:] = d(Im y)/d(Im x) Args: x: a tensor or list of tensors diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index f27d9224c1..bdcf420980 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -28,7 +28,6 @@ from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import functional_ops from tensorflow.python.ops import gen_image_ops from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import math_ops @@ -259,14 +258,14 @@ def random_flip_up_down(image, seed=None): dimension, which is `height`. Otherwise output the image as-is. Args: - image: 4-D Tensor of shape `[batch, height, width, channels]` or - 3-D Tensor of shape `[height, width, channels]`. + image: A 3-D tensor of shape `[height, width, channels].` seed: A Python integer. Used to create a random seed. See @{tf.set_random_seed} for behavior. Returns: - A tensor of the same type and shape as `image`. + A 3-D tensor of the same type and shape as `image`. + Raises: ValueError: if the shape of `image` not supported. """ @@ -281,14 +280,13 @@ def random_flip_left_right(image, seed=None): second dimension, which is `width`. Otherwise output the image as-is. Args: - image: 4-D Tensor of shape `[batch, height, width, channels]` or - 3-D Tensor of shape `[height, width, channels]`. + image: A 3-D tensor of shape `[height, width, channels].` seed: A Python integer. Used to create a random seed. See @{tf.set_random_seed} for behavior. Returns: - A tensor of the same type and shape as `image`. + A 3-D tensor of the same type and shape as `image`. Raises: ValueError: if the shape of `image` not supported. @@ -299,8 +297,7 @@ def random_flip_left_right(image, seed=None): def _random_flip(image, flip_index, seed, scope_name): """Randomly (50% chance) flip an image along axis `flip_index`. Args: - image: 4-D Tensor of shape `[batch, height, width, channels]` or - 3-D Tensor of shape `[height, width, channels]`. + image: A 3-D tensor of shape `[height, width, channels].` flip_index: The dimension along which to flip the image. Vertical: 0, Horizontal: 1 seed: A Python integer. Used to create a random seed. See @@ -309,37 +306,22 @@ def _random_flip(image, flip_index, seed, scope_name): scope_name: Name of the scope in which the ops are added. Returns: - A tensor of the same type and shape as `image`. + A 3-D tensor of the same type and shape as `image`. Raises: ValueError: if the shape of `image` not supported. """ with ops.name_scope(None, scope_name, [image]) as scope: image = ops.convert_to_tensor(image, name='image') - image = _AssertAtLeast3DImage(image) - shape = image.get_shape() - if shape.ndims == 3 or shape.ndims is None: - uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed) - mirror_cond = math_ops.less(uniform_random, .5) - result = control_flow_ops.cond( - mirror_cond, - lambda: array_ops.reverse(image, [flip_index]), - lambda: image, - name=scope - ) - return fix_image_flip_shape(image, result) - elif shape.ndims == 4: - uniform_random = random_ops.random_uniform( - [array_ops.shape(image)[0]], 0, 1.0, seed=seed - ) - mirror_cond = math_ops.less(uniform_random, .5) - return array_ops.where( - mirror_cond, - image, - functional_ops.map_fn(lambda x: array_ops.reverse(x, [flip_index]), image, dtype=image.dtype) - ) - else: - raise ValueError('\'image\' must have either 3 or 4 dimensions.') + image = _Assert3DImage(image) + uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed) + mirror_cond = math_ops.less(uniform_random, .5) + result = control_flow_ops.cond( + mirror_cond, + lambda: array_ops.reverse(image, [flip_index]), + lambda: image, + name=scope) + return fix_image_flip_shape(image, result) @tf_export('image.flip_left_right') @@ -1652,13 +1634,13 @@ def is_jpeg(contents, name=None): @tf_export('image.decode_image') -def decode_image(contents, channels=None, dtype=dtypes.uint8, name=None): +def decode_image(contents, channels=None, name=None): """Convenience function for `decode_bmp`, `decode_gif`, `decode_jpeg`, and `decode_png`. Detects whether an image is a BMP, GIF, JPEG, or PNG, and performs the - appropriate operation to convert the input bytes `string` into a `Tensor` - of type `dtype`. + appropriate operation to convert the input bytes `string` into a `Tensor` of + type `uint8`. Note: `decode_gif` returns a 4-D array `[num_frames, height, width, 3]`, as opposed to `decode_bmp`, `decode_jpeg` and `decode_png`, which return 3-D @@ -1670,11 +1652,10 @@ def decode_image(contents, channels=None, dtype=dtypes.uint8, name=None): contents: 0-D `string`. The encoded image bytes. channels: An optional `int`. Defaults to `0`. Number of color channels for the decoded image. - dtype: The desired DType of the returned `Tensor`. name: A name for the operation (optional) Returns: - `Tensor` with type `dtype` and shape `[height, width, num_channels]` for + `Tensor` with type `uint8` with shape `[height, width, num_channels]` for BMP, JPEG, and PNG images and shape `[num_frames, height, width, 3]` for GIF images. @@ -1698,7 +1679,7 @@ def decode_image(contents, channels=None, dtype=dtypes.uint8, name=None): channels_msg = 'Channels must be in (None, 0, 3) when decoding BMP images' assert_channels = control_flow_ops.Assert(good_channels, [channels_msg]) with ops.control_dependencies([assert_decode, assert_channels]): - return convert_image_dtype(gen_image_ops.decode_bmp(contents), dtype) + return gen_image_ops.decode_bmp(contents) def _gif(): # Create assert to make sure that channels is not set to 1 @@ -1711,7 +1692,7 @@ def decode_image(contents, channels=None, dtype=dtypes.uint8, name=None): channels_msg = 'Channels must be in (None, 0, 3) when decoding GIF images' assert_channels = control_flow_ops.Assert(good_channels, [channels_msg]) with ops.control_dependencies([assert_channels]): - return convert_image_dtype(gen_image_ops.decode_gif(contents), dtype) + return gen_image_ops.decode_gif(contents) def check_gif(): # Create assert op to check that bytes are GIF decodable @@ -1720,11 +1701,7 @@ def decode_image(contents, channels=None, dtype=dtypes.uint8, name=None): def _png(): """Decodes a PNG image.""" - return convert_image_dtype( - gen_image_ops.decode_png(contents, channels, - dtype=dtypes.uint8 - if dtype == dtypes.uint8 - else dtypes.uint16), dtype) + return gen_image_ops.decode_png(contents, channels) def check_png(): """Checks if an image is PNG.""" @@ -1740,8 +1717,7 @@ def decode_image(contents, channels=None, dtype=dtypes.uint8, name=None): 'images') assert_channels = control_flow_ops.Assert(good_channels, [channels_msg]) with ops.control_dependencies([assert_channels]): - return convert_image_dtype( - gen_image_ops.decode_jpeg(contents, channels), dtype) + return gen_image_ops.decode_jpeg(contents, channels) # Decode normal JPEG images (start with \xff\xd8\xff\xe0) # as well as JPEG images with EXIF data (start with \xff\xd8\xff\xe1). @@ -1902,7 +1878,7 @@ def sample_distorted_bounding_box(image_size, width / height within this range. area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The cropped area of the image must contain a fraction of the - supplied image within this range. + supplied image within in this range. max_attempts: An optional `int`. Defaults to `100`. Number of attempts at generating a cropped region of the image of the specified constraints. After `max_attempts` failures, return the diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py index 2a6ab26e96..45499dcce0 100644 --- a/tensorflow/python/ops/image_ops_test.py +++ b/tensorflow/python/ops/image_ops_test.py @@ -533,37 +533,6 @@ class FlipImageBenchmark(test.Benchmark): iters=benchmark_rounds, wall_time=step_time) - def _benchmarkBatchedRandomFlipLeftRight(self, device, cpu_count): - image_shape = [16, 299, 299, 3] - warmup_rounds = 100 - benchmark_rounds = 1000 - config = config_pb2.ConfigProto() - if cpu_count is not None: - config.inter_op_parallelism_threads = 1 - config.intra_op_parallelism_threads = cpu_count - with session.Session("", graph=ops.Graph(), config=config) as sess: - with ops.device(device): - inputs = variables.Variable( - random_ops.random_uniform(image_shape, dtype=dtypes.float32) * 255, - trainable=False, - dtype=dtypes.float32) - run_op = image_ops.random_flip_left_right(inputs) - sess.run(variables.global_variables_initializer()) - for i in xrange(warmup_rounds + benchmark_rounds): - if i == warmup_rounds: - start = time.time() - sess.run(run_op) - end = time.time() - step_time = (end - start) / benchmark_rounds - tag = device + "_%s" % (cpu_count if cpu_count is not None else "_all") - print("benchmarkBatchedRandomFlipLeftRight_16_299_299_3_%s step_time: " - "%.2f us" % - (tag, step_time * 1e6)) - self.report_benchmark( - name="benchmarkBatchedRandomFlipLeftRight_16_299_299_3_%s" % (tag), - iters=benchmark_rounds, - wall_time=step_time) - def benchmarkFlipLeftRightCpu1(self): self._benchmarkFlipLeftRight("/cpu:0", 1) @@ -582,15 +551,6 @@ class FlipImageBenchmark(test.Benchmark): def benchmarkRandomFlipLeftRightGpu(self): self._benchmarkRandomFlipLeftRight(test.gpu_device_name(), None) - def benchmarkBatchedRandomFlipLeftRightCpu1(self): - self._benchmarkBatchedRandomFlipLeftRight("/cpu:0", 1) - - def benchmarkBatchedRandomFlipLeftRightCpuAll(self): - self._benchmarkBatchedRandomFlipLeftRight("/cpu:0", None) - - def benchmarkBatchedRandomFlipLeftRightGpu(self): - self._benchmarkBatchedRandomFlipLeftRight(test.gpu_device_name(), None) - class AdjustHueBenchmark(test.Benchmark): @@ -1027,7 +987,7 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): with self.test_session(use_gpu=True): x_tf = constant_op.constant(x_np, shape=x_np.shape) - y = image_ops.random_flip_left_right(x_tf, seed=seed) + y = image_ops.random_flip_left_right(x_tf) self.assertTrue(y.op.name.startswith("random_flip_left_right")) count_flipped = 0 @@ -1048,50 +1008,6 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): self.assertGreaterEqual(count_flipped, 20) self.assertGreaterEqual(count_unflipped, 20) - def testRandomFlipLeftRightWithBatch(self): - batch_size = 16 - seed = 42 - - # create single item of test data - x_np_raw = np.array( - [[1, 2, 3], [1, 2, 3]], dtype=np.uint8 - ).reshape([1, 2, 3, 1]) - y_np_raw = np.array( - [[3, 2, 1], [3, 2, 1]], dtype=np.uint8 - ).reshape([1, 2, 3, 1]) - - # create batched test data - x_np = np.vstack([x_np_raw for _ in range(batch_size)]) - y_np = np.vstack([y_np_raw for _ in range(batch_size)]) - - with self.test_session(use_gpu=True): - x_tf = constant_op.constant(x_np, shape=x_np.shape) - y = image_ops.random_flip_left_right(x_tf, seed=seed) - self.assertTrue(y.op.name.startswith("random_flip_left_right")) - - count_flipped = 0 - count_unflipped = 0 - for _ in range(100): - y_tf = y.eval() - - # check every element of the batch - for i in range(batch_size): - if y_tf[i][0][0] == 1: - self.assertAllEqual(y_tf[i], x_np[i]) - count_unflipped += 1 - else: - self.assertAllEqual(y_tf[i], y_np[i]) - count_flipped += 1 - - # 100 trials, each containing batch_size elements - # Mean: 50 * batch_size - # Std Dev: ~5 * sqrt(batch_size) - # Six Sigma: 50 * batch_size - (5 * 6 * sqrt(batch_size)) - # = 50 * batch_size - 30 * sqrt(batch_size) = 800 - 30 * 4 = 680 - six_sigma = 50 * batch_size - 30 * np.sqrt(batch_size) - self.assertGreaterEqual(count_flipped, six_sigma) - self.assertGreaterEqual(count_unflipped, six_sigma) - def testInvolutionUpDown(self): x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1]) @@ -1141,11 +1057,9 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1]) y_np = np.array([[4, 5, 6], [1, 2, 3]], dtype=np.uint8).reshape([2, 3, 1]) - seed = 42 - with self.test_session(use_gpu=True): x_tf = constant_op.constant(x_np, shape=x_np.shape) - y = image_ops.random_flip_up_down(x_tf, seed=seed) + y = image_ops.random_flip_up_down(x_tf, seed=42) self.assertTrue(y.op.name.startswith("random_flip_up_down")) count_flipped = 0 count_unflipped = 0 @@ -1165,50 +1079,6 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): self.assertGreaterEqual(count_flipped, 20) self.assertGreaterEqual(count_unflipped, 20) - def testRandomFlipUpDownWithBatch(self): - batch_size = 16 - seed = 42 - - # create single item of test data - x_np_raw = np.array( - [[1, 2, 3], [4, 5, 6]], dtype=np.uint8 - ).reshape([1, 2, 3, 1]) - y_np_raw = np.array( - [[4, 5, 6], [1, 2, 3]], dtype=np.uint8 - ).reshape([1, 2, 3, 1]) - - # create batched test data - x_np = np.vstack([x_np_raw for _ in range(batch_size)]) - y_np = np.vstack([y_np_raw for _ in range(batch_size)]) - - with self.test_session(use_gpu=True): - x_tf = constant_op.constant(x_np, shape=x_np.shape) - y = image_ops.random_flip_up_down(x_tf, seed=seed) - self.assertTrue(y.op.name.startswith("random_flip_up_down")) - - count_flipped = 0 - count_unflipped = 0 - for _ in range(100): - y_tf = y.eval() - - # check every element of the batch - for i in range(batch_size): - if y_tf[i][0][0] == 1: - self.assertAllEqual(y_tf[i], x_np[i]) - count_unflipped += 1 - else: - self.assertAllEqual(y_tf[i], y_np[i]) - count_flipped += 1 - - # 100 trials, each containing batch_size elements - # Mean: 50 * batch_size - # Std Dev: ~5 * sqrt(batch_size) - # Six Sigma: 50 * batch_size - (5 * 6 * sqrt(batch_size)) - # = 50 * batch_size - 30 * sqrt(batch_size) = 800 - 30 * 4 = 680 - six_sigma = 50 * batch_size - 30 * np.sqrt(batch_size) - self.assertGreaterEqual(count_flipped, six_sigma) - self.assertGreaterEqual(count_unflipped, six_sigma) - def testInvolutionTranspose(self): x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1]) @@ -1286,7 +1156,6 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): #Ops that support 4D input for op in [ image_ops.flip_left_right, image_ops.flip_up_down, - image_ops.random_flip_left_right, image_ops.random_flip_up_down, image_ops.transpose_image, image_ops.rot90 ]: transformed_unknown_dims_4 = op(p_unknown_dims_4) @@ -1297,6 +1166,14 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): "must be at least three-dimensional"): op(p_wrong_rank) + for op in [ + image_ops.random_flip_left_right, + image_ops.random_flip_up_down, + ]: + with self.assertRaisesRegexp(ValueError, "must be three-dimensional"): + op(p_wrong_rank) + + def testRot90GroupOrder(self): image = np.arange(24, dtype=np.uint8).reshape([2, 4, 3]) with self.test_session(use_gpu=True): @@ -1331,6 +1208,41 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): y_np = np.rot90(image, k=k, axes=(1, 2)) self.assertAllEqual(y_np, y_tf.eval({k_placeholder: k})) +class RandomFlipTest(test_util.TensorFlowTestCase): + + def testRandomLeftRight(self): + x_np = np.array([0, 1], dtype=np.uint8).reshape([1, 2, 1]) + num_iterations = 500 + + hist = [0, 0] + with self.test_session(use_gpu=True): + x_tf = constant_op.constant(x_np, shape=x_np.shape) + y = image_ops.random_flip_left_right(x_tf) + for _ in xrange(num_iterations): + y_np = y.eval().flatten()[0] + hist[y_np] += 1 + + # Ensure that each entry is observed within 4 standard deviations. + four_stddev = 4.0 * np.sqrt(num_iterations / 2.0) + self.assertAllClose(hist, [num_iterations / 2.0] * 2, atol=four_stddev) + + def testRandomUpDown(self): + x_np = np.array([0, 1], dtype=np.uint8).reshape([2, 1, 1]) + num_iterations = 500 + + hist = [0, 0] + with self.test_session(use_gpu=True): + x_tf = constant_op.constant(x_np, shape=x_np.shape) + y = image_ops.random_flip_up_down(x_tf) + for _ in xrange(num_iterations): + y_np = y.eval().flatten()[0] + hist[y_np] += 1 + + # Ensure that each entry is observed within 4 standard deviations. + four_stddev = 4.0 * np.sqrt(num_iterations / 2.0) + self.assertAllClose(hist, [num_iterations / 2.0] * 2, atol=four_stddev) + + class AdjustContrastTest(test_util.TensorFlowTestCase): def _testContrast(self, x_np, y_np, contrast_factor): @@ -3968,88 +3880,5 @@ class SobelEdgesTest(test_util.TensorFlowTestCase): self.assertAllClose(expected_batch, actual_sobel) -class DecodeImageTest(test_util.TensorFlowTestCase): - - def testJpegUint16(self): - with self.test_session(use_gpu=True) as sess: - base = "tensorflow/core/lib/jpeg/testdata" - jpeg0 = io_ops.read_file(os.path.join(base, "jpeg_merge_test1.jpg")) - image0 = image_ops.decode_image(jpeg0, dtype=dtypes.uint16) - image1 = image_ops.convert_image_dtype(image_ops.decode_jpeg(jpeg0), - dtypes.uint16) - image0, image1 = sess.run([image0, image1]) - self.assertAllEqual(image0, image1) - - def testPngUint16(self): - with self.test_session(use_gpu=True) as sess: - base = "tensorflow/core/lib/png/testdata" - png0 = io_ops.read_file(os.path.join(base, "lena_rgba.png")) - image0 = image_ops.decode_image(png0, dtype=dtypes.uint16) - image1 = image_ops.convert_image_dtype( - image_ops.decode_png(png0, dtype=dtypes.uint16), dtypes.uint16) - image0, image1 = sess.run([image0, image1]) - self.assertAllEqual(image0, image1) - - def testGifUint16(self): - with self.test_session(use_gpu=True) as sess: - base = "tensorflow/core/lib/gif/testdata" - gif0 = io_ops.read_file(os.path.join(base, "scan.gif")) - image0 = image_ops.decode_image(gif0, dtype=dtypes.uint16) - image1 = image_ops.convert_image_dtype(image_ops.decode_gif(gif0), - dtypes.uint16) - image0, image1 = sess.run([image0, image1]) - self.assertAllEqual(image0, image1) - - def testBmpUint16(self): - with self.test_session(use_gpu=True) as sess: - base = "tensorflow/core/lib/bmp/testdata" - bmp0 = io_ops.read_file(os.path.join(base, "lena.bmp")) - image0 = image_ops.decode_image(bmp0, dtype=dtypes.uint16) - image1 = image_ops.convert_image_dtype(image_ops.decode_bmp(bmp0), - dtypes.uint16) - image0, image1 = sess.run([image0, image1]) - self.assertAllEqual(image0, image1) - - def testJpegFloat32(self): - with self.test_session(use_gpu=True) as sess: - base = "tensorflow/core/lib/jpeg/testdata" - jpeg0 = io_ops.read_file(os.path.join(base, "jpeg_merge_test1.jpg")) - image0 = image_ops.decode_image(jpeg0, dtype=dtypes.float32) - image1 = image_ops.convert_image_dtype(image_ops.decode_jpeg(jpeg0), - dtypes.float32) - image0, image1 = sess.run([image0, image1]) - self.assertAllEqual(image0, image1) - - def testPngFloat32(self): - with self.test_session(use_gpu=True) as sess: - base = "tensorflow/core/lib/png/testdata" - png0 = io_ops.read_file(os.path.join(base, "lena_rgba.png")) - image0 = image_ops.decode_image(png0, dtype=dtypes.float32) - image1 = image_ops.convert_image_dtype( - image_ops.decode_png(png0, dtype=dtypes.uint16), dtypes.float32) - image0, image1 = sess.run([image0, image1]) - self.assertAllEqual(image0, image1) - - def testGifFloat32(self): - with self.test_session(use_gpu=True) as sess: - base = "tensorflow/core/lib/gif/testdata" - gif0 = io_ops.read_file(os.path.join(base, "scan.gif")) - image0 = image_ops.decode_image(gif0, dtype=dtypes.float32) - image1 = image_ops.convert_image_dtype(image_ops.decode_gif(gif0), - dtypes.float32) - image0, image1 = sess.run([image0, image1]) - self.assertAllEqual(image0, image1) - - def testBmpFloat32(self): - with self.test_session(use_gpu=True) as sess: - base = "tensorflow/core/lib/bmp/testdata" - bmp0 = io_ops.read_file(os.path.join(base, "lena.bmp")) - image0 = image_ops.decode_image(bmp0, dtype=dtypes.float32) - image1 = image_ops.convert_image_dtype(image_ops.decode_bmp(bmp0), - dtypes.float32) - image0, image1 = sess.run([image0, image1]) - self.assertAllEqual(image0, image1) - - if __name__ == "__main__": googletest.main() diff --git a/tensorflow/python/ops/init_ops.py b/tensorflow/python/ops/init_ops.py index 724fcc39cd..2df230d470 100644 --- a/tensorflow/python/ops/init_ops.py +++ b/tensorflow/python/ops/init_ops.py @@ -467,8 +467,7 @@ class VarianceScaling(Initializer): else: scale /= max(1., (fan_in + fan_out) / 2.) if self.distribution == "normal": - # constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.) - stddev = math.sqrt(scale) / .87962566103423978 + stddev = math.sqrt(scale) return random_ops.truncated_normal( shape, 0.0, stddev, dtype, seed=self.seed) else: diff --git a/tensorflow/python/ops/logging_ops.py b/tensorflow/python/ops/logging_ops.py index 8276047cb6..222b8ebc9d 100644 --- a/tensorflow/python/ops/logging_ops.py +++ b/tensorflow/python/ops/logging_ops.py @@ -35,9 +35,8 @@ from tensorflow.python.util.tf_export import tf_export # Assert and Print are special symbols in python, so we must -# have an upper-case version of them. For users with Python 3 or Python 2.7 -# with `from __future__ import print_function`, we also allow lowercase. -@tf_export("Print", "print") +# use an upper-case version of them. +@tf_export("Print") def Print(input_, data, message=None, first_n=None, summarize=None, name=None): """Prints a list of tensors. diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 466d0dadc8..e40481f3a7 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -125,8 +125,8 @@ def abs(x, name=None): # pylint: disable=redefined-builtin ``` Args: - x: A `Tensor` or `SparseTensor` of type `float16`, `float32`, `float64`, - `int32`, `int64`, `complex64` or `complex128`. + x: A `Tensor` or `SparseTensor` of type `float32`, `float64`, `int32`, + `int64`, `complex64` or `complex128`. name: A name for the operation (optional). Returns: @@ -430,10 +430,10 @@ def pow(x, y, name=None): # pylint: disable=redefined-builtin ``` Args: - x: A `Tensor` of type `float16`, `float32`, `float64`, `int32`, `int64`, - `complex64`, or `complex128`. - y: A `Tensor` of type `float16`, `float32`, `float64`, `int32`, `int64`, - `complex64`, or `complex128`. + x: A `Tensor` of type `float32`, `float64`, `int32`, `int64`, `complex64`, + or `complex128`. + y: A `Tensor` of type `float32`, `float64`, `int32`, `int64`, `complex64`, + or `complex128`. name: A name for the operation (optional). Returns: @@ -600,7 +600,7 @@ def round(x, name=None): # pylint: disable=redefined-builtin ``` Args: - x: A `Tensor` of type `float16`, `float32`, `float64`, `int32`, or `int64`. + x: A `Tensor` of type `float32` or `float64`. name: A name for the operation (optional). Returns: @@ -1257,7 +1257,7 @@ def reduce_sum(input_tensor, entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. - If `axis` is None, all dimensions are reduced, and a + If `axis` has no entries, all dimensions are reduced, and a tensor with a single element is returned. For example: @@ -1397,7 +1397,7 @@ def reduce_mean(input_tensor, entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. - If `axis` is None, all dimensions are reduced, and a + If `axis` has no entries, all dimensions are reduced, and a tensor with a single element is returned. For example: @@ -1469,7 +1469,7 @@ def reduce_prod(input_tensor, entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. - If `axis` is None, all dimensions are reduced, and a + If `axis` has no entries, all dimensions are reduced, and a tensor with a single element is returned. Args: @@ -1519,7 +1519,7 @@ def reduce_min(input_tensor, entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. - If `axis` is None, all dimensions are reduced, and a + If `axis` has no entries, all dimensions are reduced, and a tensor with a single element is returned. Args: @@ -1568,7 +1568,7 @@ def reduce_max(input_tensor, entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. - If `axis` is None, all dimensions are reduced, and a + If `axis` has no entries, all dimensions are reduced, and a tensor with a single element is returned. Args: @@ -1617,7 +1617,7 @@ def reduce_all(input_tensor, entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. - If `axis` is None, all dimensions are reduced, and a + If `axis` has no entries, all dimensions are reduced, and a tensor with a single element is returned. For example: @@ -1675,7 +1675,7 @@ def reduce_any(input_tensor, entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. - If `axis` is None, all dimensions are reduced, and a + If `axis` has no entries, all dimensions are reduced, and a tensor with a single element is returned. For example: diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py index f47f38e29e..783d485892 100644 --- a/tensorflow/python/ops/nn_impl.py +++ b/tensorflow/python/ops/nn_impl.py @@ -621,7 +621,7 @@ def normalize_moments(counts, mean_ss, variance_ss, shift, name=None): """Calculate the mean and variance of based on the sufficient statistics. Args: - counts: A `Tensor` containing the total count of the data (one value). + counts: A `Tensor` containing a the total count of the data (one value). mean_ss: A `Tensor` containing the mean sufficient statistics: the (possibly shifted) sum of the elements to average over. variance_ss: A `Tensor` containing the variance sufficient statistics: the @@ -689,9 +689,6 @@ def moments( # Compute true mean while keeping the dims for proper broadcasting. mean = math_ops.reduce_mean(y, axes, keepdims=True, name="mean") # sample variance, not unbiased variance - # Note: stop_gradient does not change the gradient that gets - # backpropagated to the mean from the variance calculation, - # because that gradient is zero variance = math_ops.reduce_mean( math_ops.squared_difference(y, array_ops.stop_gradient(mean)), axes, diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 0c2f5b06c4..a0b55eb077 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -1596,12 +1596,12 @@ def leaky_relu(features, alpha=0.2, name=None): Returns: The activation value. """ - with ops.name_scope(name, "LeakyRelu", [features, alpha]) as name: + with ops.name_scope(name, "LeakyRelu", [features, alpha]): features = ops.convert_to_tensor(features, name="features") if features.dtype.is_integer: features = math_ops.to_float(features) alpha = ops.convert_to_tensor(alpha, dtype=features.dtype, name="alpha") - return math_ops.maximum(alpha * features, features, name=name) + return math_ops.maximum(alpha * features, features) def _flatten_outer_dims(logits): diff --git a/tensorflow/python/ops/nn_test.py b/tensorflow/python/ops/nn_test.py index 035b4735af..46a5f4fae6 100644 --- a/tensorflow/python/ops/nn_test.py +++ b/tensorflow/python/ops/nn_test.py @@ -962,16 +962,6 @@ class LeakyReluTest(test_lib.TestCase): self.assertAllClose( outputs, [-0.4, -0.2, 0.0, 1.0, 2.0], rtol=tol, atol=tol) - def testName(self): - np_values = np.array([-2, -1, 0, 1, 2], dtype=np.float64) - outputs_with_name_set = nn_ops.leaky_relu( - constant_op.constant(np_values), - name='test_relu_op') - self.assertEqual(outputs_with_name_set.name, 'test_relu_op:0') - outputs_without_name_set = nn_ops.leaky_relu( - constant_op.constant(np_values)) - self.assertEqual(outputs_without_name_set.name, 'LeakyRelu:0') - class SwishTest(test_lib.TestCase): diff --git a/tensorflow/python/ops/script_ops.py b/tensorflow/python/ops/script_ops.py index 219562de5d..f8676ccb5f 100644 --- a/tensorflow/python/ops/script_ops.py +++ b/tensorflow/python/ops/script_ops.py @@ -23,7 +23,6 @@ import threading # Used by py_util.cc to get tracebacks. import traceback # pylint: disable=unused-import -import weakref import numpy as np import six @@ -130,14 +129,11 @@ class FuncRegistry(object): def __init__(self): self._lock = threading.Lock() self._unique_id = 0 # GUARDED_BY(self._lock) - # Only store weakrefs to the funtions. The strong reference is stored in - # the graph. - self._funcs = weakref.WeakValueDictionary() + self._funcs = {} def insert(self, func): """Registers `func` and returns a unique token for this entry.""" token = self._next_unique_token() - # Store a weakref to the function self._funcs[token] = func return token @@ -190,7 +186,7 @@ class FuncRegistry(object): Raises: ValueError: if no function is registered for `token`. """ - func = self._funcs.get(token, None) + func = self._funcs[token] if func is None: raise ValueError("callback %s is not found" % token) if isinstance(func, EagerFunc): @@ -232,6 +228,19 @@ _py_funcs = FuncRegistry() pywrap_tensorflow.InitializePyTrampoline(_py_funcs) +class CleanupFunc(object): + """A helper class to remove a registered function from _py_funcs.""" + + def __init__(self, token): + self._token = token + + def __del__(self): + if _py_funcs is not None: + # If _py_funcs is None, the program is most likely in shutdown, and the + # _py_funcs object has been destroyed already. + _py_funcs.remove(self._token) + + def _internal_py_func(func, inp, Tout, @@ -261,15 +270,17 @@ def _internal_py_func(func, # bound to that of the outer graph instead. graph = graph._outer_graph + cleanup = CleanupFunc(token) + # TODO(zhifengc): Consider adding a Graph method to collect # `cleanup` objects in one of its member. - if not hasattr(graph, "_py_funcs_used_in_graph"): - graph._py_funcs_used_in_graph = [] + if not hasattr(graph, "_cleanup_py_funcs_used_in_graph"): + graph._cleanup_py_funcs_used_in_graph = [] - # Store a reference to the function in the graph to ensure it stays alive - # as long as the graph lives. When the graph is destroyed, the function - # is left to the garbage collector for destruction as well. - graph._py_funcs_used_in_graph.append(func) + # When `graph` is destroyed, elements in _cleanup_py_funcs_used_in_graph + # will be destroyed and their __del__ will remove the 'token' from + # the funcs registry. + graph._cleanup_py_funcs_used_in_graph.append(cleanup) # pylint: enable=protected-access if eager: diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py index c3b16a7bd5..0130233746 100644 --- a/tensorflow/python/ops/sparse_ops.py +++ b/tensorflow/python/ops/sparse_ops.py @@ -84,8 +84,6 @@ def _convert_to_sparse_tensors(sp_inputs): # pylint: disable=protected-access @tf_export("sparse_concat") -@deprecation.deprecated_args( - None, "concat_dim is deprecated, use axis instead", "concat_dim") def sparse_concat(axis, sp_inputs, name=None, @@ -599,8 +597,6 @@ class KeywordRequired(object): @tf_export("sparse_split") -@deprecation.deprecated_args( - None, "split_dim is deprecated, use axis instead", "split_dim") def sparse_split(keyword_required=KeywordRequired(), sp_input=None, num_split=None, diff --git a/tensorflow/python/ops/string_ops.py b/tensorflow/python/ops/string_ops.py index 0280c89c10..ae79c01949 100644 --- a/tensorflow/python/ops/string_ops.py +++ b/tensorflow/python/ops/string_ops.py @@ -91,59 +91,6 @@ def string_split(source, delimiter=" ", skip_empty=True): # pylint: disable=inv shape.set_shape([2]) return sparse_tensor.SparseTensor(indices, values, shape) -@tf_export("strings.split") -def string_split_v2(source, sep=None, maxsplit=-1): - """Split elements of `source` based on `sep` into a `SparseTensor`. - - Let N be the size of source (typically N will be the batch size). Split each - element of `source` based on `sep` and return a `SparseTensor` - containing the split tokens. Empty tokens are ignored. - - For example, N = 2, source[0] is 'hello world' and source[1] is 'a b c', - then the output will be - - st.indices = [0, 0; - 0, 1; - 1, 0; - 1, 1; - 1, 2] - st.shape = [2, 3] - st.values = ['hello', 'world', 'a', 'b', 'c'] - - If `sep` is given, consecutive delimiters are not grouped together and are - deemed to delimit empty strings. For example, source of `"1<>2<><>3"` and - sep of `"<>"` returns `["1", "2", "", "3"]`. If `sep` is None or an empty - string, consecutive whitespace are regarded as a single separator, and the - result will contain no empty strings at the startor end if the string has - leading or trailing whitespace. - - Note that the above mentioned behavior matches python's str.split. - - Args: - source: `1-D` string `Tensor`, the strings to split. - sep: `0-D` string `Tensor`, the delimiter character. - maxsplit: An `int`. If `maxsplit > 0`, limit of the split of the result. - - Raises: - ValueError: If sep is not a string. - - Returns: - A `SparseTensor` of rank `2`, the strings split according to the delimiter. - The first column of the indices corresponds to the row in `source` and the - second column corresponds to the index of the split component in this row. - """ - if sep is None: - sep = '' - sep = ops.convert_to_tensor(sep, dtype=dtypes.string) - source = ops.convert_to_tensor(source, dtype=dtypes.string) - - indices, values, shape = gen_string_ops.string_split_v2( - source, sep=sep, maxsplit=maxsplit) - indices.set_shape([None, 2]) - values.set_shape([None]) - shape.set_shape([2]) - return sparse_tensor.SparseTensor(indices, values, shape) - def _reduce_join_reduction_dims(x, axis, reduction_indices): """Returns range(rank(x) - 1, 0, -1) if reduction_indices is None.""" diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py index 47414c28af..f49e2d314d 100644 --- a/tensorflow/python/ops/variable_scope.py +++ b/tensorflow/python/ops/variable_scope.py @@ -1786,23 +1786,6 @@ class variable_scope(object): assert v.name == "foo/bar/v:0" ``` - Simple example of how to reenter a premade variable scope safely: - - ```python - with tf.variable_scope("foo") as vs: - pass - - # Re-enter the variable scope. - with tf.variable_scope(vs, - auxiliary_name_scope=False) as vs1: - # Restore the original name_scope. - with tf.name_scope(vs1.original_name_scope): - v = tf.get_variable("v", [1]) - assert v.name == "foo/v:0" - c = tf.constant([1], name="c") - assert c.name == "foo/c:0" - ``` - Basic example of sharing a variable AUTO_REUSE: ```python @@ -1941,9 +1924,7 @@ class variable_scope(object): (which must have the same shape). Constraints are not safe to use when doing asynchronous distributed training. auxiliary_name_scope: If `True`, we create an auxiliary name scope with - the scope. If `False`, we don't create it. Note that the argument is - not inherited, and it only takes effect for once when creating. You - should only use it for re-entering a premade variable scope. + the scope. If `False`, we don't touch name scope. Returns: A scope that can be captured and reused. diff --git a/tensorflow/python/tools/import_pb_to_tensorboard.py b/tensorflow/python/tools/import_pb_to_tensorboard.py old mode 100644 new mode 100755 diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index b59f8e1f98..522965990b 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -1719,7 +1719,7 @@ def tf_py_build_info_genrule(): name="py_build_info_gen", outs=["platform/build_info.py"], cmd= - "$(location //tensorflow/tools/build_info:gen_build_info.py) --raw_generate \"$@\" --build_config " + if_cuda("cuda", "cpu"), + "$(location //tensorflow/tools/build_info:gen_build_info.py) --raw_generate \"$@\" --build_config " + if_cuda("cuda", "cpu"), local=1, tools=[clean_dep("//tensorflow/tools/build_info:gen_build_info.py")],) diff --git a/tensorflow/tools/api/generator/create_python_api.py b/tensorflow/tools/api/generator/create_python_api.py index 671b7e387e..bca9fa49eb 100644 --- a/tensorflow/tools/api/generator/create_python_api.py +++ b/tensorflow/tools/api/generator/create_python_api.py @@ -41,11 +41,7 @@ _GENERATED_FILE_HEADER = """# This file is MACHINE GENERATED! Do not edit. # Generated by: tensorflow/tools/api/generator/create_python_api.py script. \"\"\"%s \"\"\" - -from __future__ import print_function - """ -_GENERATED_FILE_FOOTER = "\n\ndel print_function\n" class SymbolExposedTwiceError(Exception): @@ -153,7 +149,6 @@ class _ModuleInitCodeBuilder(object): _names_with_underscore = [%s] __all__ = [_s for _s in dir() if not _s.startswith('_')] __all__.extend([_s for _s in _names_with_underscore]) -__all__.remove('print_function') ''' % underscore_names_str return module_text_map @@ -338,8 +333,7 @@ def create_api_files( if module or not root_init_template: contents = ( _GENERATED_FILE_HEADER % - get_module_docstring(module, package, api_name) + - text + _GENERATED_FILE_FOOTER) + get_module_docstring(module, package, api_name) + text) else: # Read base init file with open(root_init_template, 'r') as root_init_template_file: diff --git a/tensorflow/tools/api/golden/tensorflow.image.pbtxt b/tensorflow/tools/api/golden/tensorflow.image.pbtxt index 10171b3d60..5bb3b3c444 100644 --- a/tensorflow/tools/api/golden/tensorflow.image.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.image.pbtxt @@ -58,7 +58,7 @@ tf_module { } member_method { name: "decode_image" - argspec: "args=[\'contents\', \'channels\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \"\", \'None\'], " + argspec: "args=[\'contents\', \'channels\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { name: "decode_jpeg" diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt index 3051c4437e..dc2bd40096 100644 --- a/tensorflow/tools/api/golden/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.pbtxt @@ -1532,10 +1532,6 @@ tf_module { name: "pow" argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } - member_method { - name: "print" - argspec: "args=[\'input_\', \'data\', \'message\', \'first_n\', \'summarize\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], " - } member_method { name: "py_func" argspec: "args=[\'func\', \'inp\', \'Tout\', \'stateful\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], " diff --git a/tensorflow/tools/api/golden/tensorflow.strings.pbtxt b/tensorflow/tools/api/golden/tensorflow.strings.pbtxt index b641c39feb..a3fbe95bba 100644 --- a/tensorflow/tools/api/golden/tensorflow.strings.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.strings.pbtxt @@ -4,8 +4,4 @@ tf_module { name: "regex_full_match" argspec: "args=[\'input\', \'pattern\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } - member_method { - name: "split" - argspec: "args=[\'source\', \'sep\', \'maxsplit\'], varargs=None, keywords=None, defaults=[\'None\', \'-1\'], " - } } diff --git a/tensorflow/tools/ci_build/builds/pip.sh b/tensorflow/tools/ci_build/builds/pip.sh index 883bb93647..5fa75e1d61 100755 --- a/tensorflow/tools/ci_build/builds/pip.sh +++ b/tensorflow/tools/ci_build/builds/pip.sh @@ -322,10 +322,6 @@ create_activate_virtualenv_and_install_tensorflow() { pip install -v ${PIP_FLAGS} ${WHL_PATH} || \ die "pip install (forcing to reinstall tensorflow) FAILED" echo "Successfully installed pip package ${TF_WHEEL_PATH}" - - # Force downgrade setuptools. - pip install --upgrade setuptools==39.1.0 - } ################################################################################ diff --git a/tensorflow/tools/ci_build/builds/with_the_same_user b/tensorflow/tools/ci_build/builds/with_the_same_user index b216e3549f..d4bf546d40 100755 --- a/tensorflow/tools/ci_build/builds/with_the_same_user +++ b/tensorflow/tools/ci_build/builds/with_the_same_user @@ -40,7 +40,7 @@ if [ -n "${CI_BUILD_USER_FORCE_BADNAME}" ]; then ADDUSER_OPTS="--force-badname" fi -getent group "${CI_BUILD_GID}" || addgroup ${ADDUSER_OPTS} --gid "${CI_BUILD_GID}" "${CI_BUILD_GROUP}" +getent group "${CI_BUILD_GID}" || addgroup --gid "${CI_BUILD_GID}" "${CI_BUILD_GROUP}" getent passwd "${CI_BUILD_UID}" || adduser ${ADDUSER_OPTS} \ --gid "${CI_BUILD_GID}" --uid "${CI_BUILD_UID}" \ --gecos "${CI_BUILD_USER} (generated by with_the_same_user script)" \ diff --git a/tensorflow/tools/ci_build/ci_build.sh b/tensorflow/tools/ci_build/ci_build.sh index 1f0fd0387a..072dd6ab99 100755 --- a/tensorflow/tools/ci_build/ci_build.sh +++ b/tensorflow/tools/ci_build/ci_build.sh @@ -134,12 +134,6 @@ if [[ $? != "0" ]]; then die "ERROR: docker build failed. Dockerfile is at ${DOCKERFILE_PATH}" fi -# If caller wants the with_the_same_user script to allow bad usernames, -# pass the var to the docker environment -if [ -n "${CI_BUILD_USER_FORCE_BADNAME}" ]; then - CI_BUILD_USER_FORCE_BADNAME_ENV="-e CI_BUILD_USER_FORCE_BADNAME=yes" -fi - # Run the command inside the container. echo "Running '${COMMAND[*]}' inside ${DOCKER_IMG_NAME}..." mkdir -p ${WORKSPACE}/bazel-ci_build-cache @@ -154,7 +148,6 @@ ${DOCKER_BINARY} run --rm --pid=host \ -e "CI_BUILD_GROUP=$(id -g -n)" \ -e "CI_BUILD_GID=$(id -g)" \ -e "CI_TENSORFLOW_SUBMODULE_PATH=${CI_TENSORFLOW_SUBMODULE_PATH}" \ - ${CI_BUILD_USER_FORCE_BADNAME_ENV} \ -v ${WORKSPACE}:/workspace \ -w /workspace \ ${GPU_EXTRA_PARAMS} \ diff --git a/tensorflow/tools/ci_build/copy_binary.py b/tensorflow/tools/ci_build/copy_binary.py index 148526492d..420d390d2b 100755 --- a/tensorflow/tools/ci_build/copy_binary.py +++ b/tensorflow/tools/ci_build/copy_binary.py @@ -32,8 +32,7 @@ import shutil import tempfile import zipfile -TF_NIGHTLY_REGEX = (r"(.+)tf_nightly(|_gpu)-(\d\.[\d]{1,2}" - "\.\d.dev[\d]{0,8})-(.+)\.whl") +TF_NIGHTLY_REGEX = r"(.+)tf_nightly(|_gpu)-(\d\.\d\.\d.dev[\d]{0,8})-(.+)\.whl" BINARY_STRING_TEMPLATE = "%s-%s-%s.whl" diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh index 88f1d04193..60290df833 100755 --- a/tensorflow/tools/ci_build/install/install_pip_packages.sh +++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh @@ -115,7 +115,3 @@ pip2 install keras_applications==1.0.2 pip3 install keras_applications==1.0.2 pip2 install keras_preprocessing==1.0.1 pip3 install keras_preprocessing==1.0.1 - -# Install last working version of setuptools. -pip2 install --upgrade setuptools==39.1.0 -pip3 install --upgrade setuptools==39.1.0 diff --git a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh index acd69ef346..edb9d4b929 100755 --- a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh +++ b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh @@ -39,6 +39,7 @@ if [[ -z $pip35_version ]]; then fi set -e +pip3.5 install --upgrade setuptools pip3.5 install --upgrade pip pip3.5 install --upgrade virtualenv @@ -85,7 +86,4 @@ pip3.5 install --upgrade termcolor pip3.5 install keras_applications==1.0.2 pip3.5 install keras_preprocessing==1.0.1 -# Install last working version of setuptools. -pip3.5 install --upgrade setuptools==39.1.0 - # LINT.ThenChange(//tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh) diff --git a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh index 323b30f48e..5635977731 100755 --- a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh +++ b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh @@ -49,6 +49,7 @@ cd Python-3.6.1 make altinstall ln -s /usr/local/bin/pip3.6 /usr/local/bin/pip3 +pip3 install --upgrade setuptools pip3 install --upgrade pip pip3 install --upgrade virtualenv @@ -100,8 +101,4 @@ pip3 install --upgrade termcolor # Keras pip3.5 install keras_applications==1.0.2 pip3.5 install keras_preprocessing==1.0.1 - -# Install last working version of setuptools. -pip3 install --upgrade setuptools==39.1.0 - # LINT.ThenChange(//tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh) diff --git a/tensorflow/tools/ci_build/linux/mkl/basic-mkl-test.sh b/tensorflow/tools/ci_build/linux/mkl/basic-mkl-test.sh deleted file mode 100755 index 10a09a415a..0000000000 --- a/tensorflow/tools/ci_build/linux/mkl/basic-mkl-test.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# -# Usage: basic_mkl_test.sh - -# Helper function to traverse directories up until given file is found. -function upsearch () { - test / == "$PWD" && return || \ - test -e "$1" && echo "$PWD" && return || \ - cd .. && upsearch "$1" -} - -# Set up WORKSPACE. -WORKSPACE="${WORKSPACE:-$(upsearch WORKSPACE)}" - -BUILD_TAG=mkl-ci-test CI_BUILD_USER_FORCE_BADNAME=yes ${WORKSPACE}/tensorflow/tools/ci_build/ci_build.sh cpu tensorflow/tools/ci_build/linux/cpu/run_mkl.sh diff --git a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh index b8bce57c87..1bd1852ffc 100755 --- a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh +++ b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh @@ -79,7 +79,6 @@ if [[ $1 == "PI_ONE" ]]; then --linkopt=-L${OPENBLAS_INSTALL_PATH}/lib/ --linkopt=-l:libopenblas.a" echo "Building for the Pi One/Zero, with no NEON support" - WHEEL_ARCH=linux_armv6l else PI_COPTS='--copt=-march=armv7-a --copt=-mfpu=neon-vfpv4 --copt=-std=gnu11 --copt=-DS_IREAD=S_IRUSR --copt=-DS_IWRITE=S_IWUSR @@ -87,7 +86,6 @@ else --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8' - WHEEL_ARCH=linux_armv7l echo "Building for the Pi Two/Three, with NEON acceleration" fi @@ -102,8 +100,6 @@ bazel build -c opt ${PI_COPTS} \ --copt=-fomit-frame-pointer --cpu=armeabi \ --crosstool_top=@local_config_arm_compiler//:toolchain \ --verbose_failures \ - //tensorflow:libtensorflow.so \ - //tensorflow:libtensorflow_framework.so \ //tensorflow/tools/benchmark:benchmark_model \ //tensorflow/tools/pip_package:build_pip_package @@ -116,12 +112,10 @@ BDIST_OPTS="--universal" \ bazel-bin/tensorflow/tools/pip_package/build_pip_package "${OUTDIR}" OLD_FN=$(ls "${OUTDIR}" | grep -m 1 \.whl) -SUB='s/tensorflow-([^-]+)-([^-]+)-.*/tensorflow-\1-\2-none-'${WHEEL_ARCH}'.whl/; print' +SUB='s/tensorflow-([^-]+)-([^-]+)-.*/tensorflow-\1-\2-none-any.whl/; print' NEW_FN=$(echo "${OLD_FN}" | perl -ne "${SUB}") mv "${OUTDIR}/${OLD_FN}" "${OUTDIR}/${NEW_FN}" cp bazel-bin/tensorflow/tools/benchmark/benchmark_model "${OUTDIR}" -cp bazel-bin/tensorflow/libtensorflow.so "${OUTDIR}" -cp bazel-bin/tensorflow/libtensorflow_framework.so "${OUTDIR}" echo "Output can be found here:" find "${OUTDIR}" diff --git a/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl b/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl index f8f63e276c..47539b2423 100644 --- a/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl +++ b/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl @@ -31,11 +31,7 @@ def _def_file_filter_configure_impl(repository_ctx): vc_path = find_vc_path(repository_ctx) if vc_path == "visual-studio-not-found": auto_configure_fail("Visual C++ build tools not found on your machine") - - undname = find_msvc_tool(repository_ctx, vc_path, "undname.exe") - if undname == None: - auto_configure_fail("Couldn't find undname.exe under %s, please check your VC installation and set BAZEL_VC environment variable correctly." % vc_path) - undname_bin_path = undname.replace("\\", "\\\\") + undname_bin_path = find_msvc_tool(repository_ctx, vc_path, "undname.exe").replace("\\", "\\\\") repository_ctx.template( "def_file_filter.py", diff --git a/tensorflow/tools/dist_test/local_test.sh b/tensorflow/tools/dist_test/local_test.sh index b0114721bd..06c2b997cb 100755 --- a/tensorflow/tools/dist_test/local_test.sh +++ b/tensorflow/tools/dist_test/local_test.sh @@ -64,6 +64,9 @@ die() { # Configurations DOCKER_IMG_NAME="tensorflow/tf-dist-test-local-cluster" +# Use TensorFlow v1.5.0 for Python 2.7 and CPU only as we set num_gpus to 0 in the below +DEFAULT_WHL_FILE_LOCATION="https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0-cp27-none-linux_x86_64.whl" + # Parse input arguments LEAVE_CONTAINER_RUNNING=0 MODEL_NAME="" @@ -74,7 +77,8 @@ SYNC_REPLICAS_FLAG="" WHL_FILE_LOCATION=${1} if [[ -z "${WHL_FILE_LOCATION}" ]]; then - echo "WARNING: No wheel url passed. Will use latest tf-nightly cpu p2 wheel." + WHL_FILE_LOCATION=${DEFAULT_WHL_FILE_LOCATION} + echo "use default whl file location" fi while true; do @@ -127,11 +131,7 @@ echo "Building in temporary directory: ${BUILD_DIR}" cp -r ${DIR}/* "${BUILD_DIR}"/ || \ die "Failed to copy files to ${BUILD_DIR}" -# Download whl file into the build context directory. -if [[ -z "${WHL_FILE_LOCATION}" ]]; then - pip2 download --no-deps tf-nightly - cp tf-nightly-*.whl "${BUILD_DIR}"/tensorflow-none-any.whl -elif [[ $WHL_FILE_LOCATION =~ 'http://' || $WHL_FILE_LOCATION =~ 'https://' ]]; then +if [[ $WHL_FILE_LOCATION =~ 'http://' || $WHL_FILE_LOCATION =~ 'https://' ]]; then # Download whl file into the build context directory. wget -P "${BUILD_DIR}" "${WHL_FILE_LOCATION}" || \ die "Failed to download tensorflow whl file from URL: ${WHL_FILE_LOCATION}" diff --git a/tensorflow/tools/dist_test/remote_test.sh b/tensorflow/tools/dist_test/remote_test.sh index e188c88c8f..935535312d 100755 --- a/tensorflow/tools/dist_test/remote_test.sh +++ b/tensorflow/tools/dist_test/remote_test.sh @@ -108,7 +108,7 @@ fi # Parse command-line arguments. WHL_URL=${1} if [[ -z "${WHL_URL}" ]]; then - echo "WARNING: No wheel url passed. Will use latest tf-nightly cpu p2 wheel." + die "whl URL is not specified" fi # Create docker build context directory. @@ -121,13 +121,8 @@ cp -r ${DIR}/* ${BUILD_DIR}/ || \ die "Failed to copy files to ${BUILD_DIR}" # Download whl file into the build context directory. -if [[ -z "${WHL_URL}" ]]; then - pip2 download --no-deps tf-nightly - cp tf-nightly-*.whl "${BUILD_DIR}"/tensorflow-none-any.whl -else - wget -P "${BUILD_DIR}" ${WHL_URL} || \ - die "Failed to download tensorflow whl file from URL: ${WHL_URL}" -fi +wget -P "${BUILD_DIR}" ${WHL_URL} || \ + die "Failed to download tensorflow whl file from URL: ${WHL_URL}" # Build docker image for test. docker build ${NO_CACHE_FLAG} \ diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel index 57a491255e..406d134699 100644 --- a/tensorflow/tools/docker/Dockerfile.devel +++ b/tensorflow/tools/docker/Dockerfile.devel @@ -76,7 +76,7 @@ RUN mkdir /bazel && \ # Download and build TensorFlow. WORKDIR /tensorflow -RUN git clone --branch=r1.9 --depth=1 https://github.com/tensorflow/tensorflow.git . +RUN git clone --branch=r1.8 --depth=1 https://github.com/tensorflow/tensorflow.git . # TODO(craigcitro): Don't install the pip package, since it makes it # more difficult to experiment with local changes. Instead, just add diff --git a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl index 6796ad70e5..a6cd44ced1 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl +++ b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl @@ -3,7 +3,7 @@ FROM tensorflow/tensorflow:latest-devel LABEL maintainer="Clayne Robison" # These arguments are parameterized. Use --build-args to override. -ARG TF_BRANCH=r1.9 +ARG TF_BRANCH=r1.8 ARG WHL_DIR=/whl RUN apt-get update && apt-get install -y --no-install-recommends \ diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu index 204b5b4dba..2fe47f3356 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-gpu +++ b/tensorflow/tools/docker/Dockerfile.devel-gpu @@ -13,8 +13,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ cuda-cusparse-dev-9-0 \ curl \ git \ - libcudnn7=7.1.4.18-1+cuda9.0 \ - libcudnn7-dev=7.1.4.18-1+cuda9.0 \ + libcudnn7=7.0.5.15-1+cuda9.0 \ + libcudnn7-dev=7.0.5.15-1+cuda9.0 \ libcurl3-dev \ libfreetype6-dev \ libhdf5-serial-dev \ @@ -85,7 +85,7 @@ RUN mkdir /bazel && \ # Download and build TensorFlow. WORKDIR /tensorflow -RUN git clone --branch=r1.9 --depth=1 https://github.com/tensorflow/tensorflow.git . +RUN git clone --branch=r1.8 --depth=1 https://github.com/tensorflow/tensorflow.git . # Configure the build for our CUDA configuration. ENV CI_BUILD_PYTHON python diff --git a/tensorflow/tools/docker/Dockerfile.gpu b/tensorflow/tools/docker/Dockerfile.gpu index 9197651ff4..bff4a20392 100644 --- a/tensorflow/tools/docker/Dockerfile.gpu +++ b/tensorflow/tools/docker/Dockerfile.gpu @@ -12,7 +12,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ cuda-cusolver-9-0 \ cuda-cusparse-9-0 \ curl \ - libcudnn7=7.1.4.18-1+cuda9.0 \ + libcudnn7=7.0.5.15-1+cuda9.0 \ libfreetype6-dev \ libhdf5-serial-dev \ libpng12-dev \ diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index 620fef9363..5910f0625e 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -61,7 +61,6 @@ COMMON_PIP_DEPS = [ "//tensorflow/contrib/autograph/core:core", "//tensorflow/contrib/autograph/impl:impl", "//tensorflow/contrib/autograph/lang:lang", - "//tensorflow/contrib/autograph/operators:operators", "//tensorflow/contrib/autograph/pyct:pyct", "//tensorflow/contrib/autograph/pyct/static_analysis:static_analysis", "//tensorflow/contrib/boosted_trees:boosted_trees_pip", diff --git a/tensorflow/tools/pip_package/build_pip_package.sh b/tensorflow/tools/pip_package/build_pip_package.sh index f7e42ce536..0c4065bc77 100755 --- a/tensorflow/tools/pip_package/build_pip_package.sh +++ b/tensorflow/tools/pip_package/build_pip_package.sh @@ -41,15 +41,51 @@ function is_windows() { fi } -function prepare_src() { +function main() { if [ $# -lt 1 ] ; then echo "No destination dir provided" exit 1 fi - TMPDIR="$1" - mkdir -p "$TMPDIR" - echo $(date) : "=== Preparing sources in dir: ${TMPDIR}" + DEST=$(real_path $1) + TMPDIR=$(mktemp -d -t tmp.XXXXXXXXXX) + + PKG_NAME_FLAG="" + GPU_BUILD=0 + NIGHTLY_BUILD=0 + PROJECT_NAME="" + while true; do + if [[ "$1" == "--nightly_flag" ]]; then + NIGHTLY_BUILD=1 + elif [[ "$1" == "--gpu" ]]; then + GPU_BUILD=1 + elif [[ "$1" == "--gpudirect" ]]; then + PKG_NAME_FLAG="--project_name tensorflow_gpudirect" + elif [[ "$1" == "--project_name" ]]; then + shift + if [[ -z "$1" ]]; then + break + fi + PROJECT_NAME="$1" + fi + shift + + if [[ -z "$1" ]]; then + break + fi + done + + if [[ -n ${PROJECT_NAME} ]]; then + PKG_NAME_FLAG="--project_name ${PROJECT_NAME}" + elif [[ ${NIGHTLY_BUILD} == "1" && ${GPU_BUILD} == "1" ]]; then + PKG_NAME_FLAG="--project_name tf_nightly_gpu" + elif [[ ${NIGHTLY_BUILD} == "1" ]]; then + PKG_NAME_FLAG="--project_name tf_nightly" + elif [[ ${GPU_BUILD} == "1" ]]; then + PKG_NAME_FLAG="--project_name tensorflow_gpu" + fi + + echo $(date) : "=== Using tmpdir: ${TMPDIR}" if [ ! -d bazel-bin/tensorflow ]; then echo "Could not find bazel-bin. Did you run from the root of the build tree?" @@ -119,28 +155,17 @@ function prepare_src() { # over so user defined ops can be compiled. mkdir -p ${TMPDIR}/google mkdir -p ${TMPDIR}/third_party - pushd ${RUNFILES%org_tensorflow} > /dev/null + pushd ${RUNFILES%org_tensorflow} for header in $(find protobuf_archive -name \*.h); do mkdir -p "${TMPDIR}/google/$(dirname ${header})" cp "$header" "${TMPDIR}/google/$(dirname ${header})/" done - popd > /dev/null + popd cp -R $RUNFILES/third_party/eigen3 ${TMPDIR}/third_party cp tensorflow/tools/pip_package/MANIFEST.in ${TMPDIR} cp tensorflow/tools/pip_package/README ${TMPDIR} cp tensorflow/tools/pip_package/setup.py ${TMPDIR} -} - -function build_wheel() { - if [ $# -lt 2 ] ; then - echo "No src and dest dir provided" - exit 1 - fi - - TMPDIR="$1" - DEST="$2" - PKG_NAME_FLAG="$3" # Before we leave the top-level directory, make sure we know how to # call python. @@ -148,110 +173,15 @@ function build_wheel() { source tools/python_bin_path.sh fi - pushd ${TMPDIR} > /dev/null + pushd ${TMPDIR} rm -f MANIFEST echo $(date) : "=== Building wheel" "${PYTHON_BIN_PATH:-python}" setup.py bdist_wheel ${PKG_NAME_FLAG} >/dev/null mkdir -p ${DEST} cp dist/* ${DEST} - popd > /dev/null + popd + rm -rf ${TMPDIR} echo $(date) : "=== Output wheel file is in: ${DEST}" } -function usage() { - echo "Usage:" - echo "$0 [--src srcdir] [--dst dstdir] [options]" - echo "$0 dstdir [options]" - echo "" - echo " --src prepare sources in srcdir" - echo " will use temporary dir if not specified" - echo "" - echo " --dst build wheel in dstdir" - echo " if dstdir is not set do not build, only prepare sources" - echo "" - echo " Options:" - echo " --project_name set project name to name" - echo " --gpu build tensorflow_gpu" - echo " --gpudirect build tensorflow_gpudirect" - echo " --nightly_flag build tensorflow nightly" - echo "" - exit 1 -} - -function main() { - PKG_NAME_FLAG="" - PROJECT_NAME="" - GPU_BUILD=0 - NIGHTLY_BUILD=0 - SRCDIR="" - DSTDIR="" - CLEANSRC=1 - while true; do - if [[ "$1" == "--help" ]]; then - usage - exit 1 - elif [[ "$1" == "--nightly_flag" ]]; then - NIGHTLY_BUILD=1 - elif [[ "$1" == "--gpu" ]]; then - GPU_BUILD=1 - elif [[ "$1" == "--gpudirect" ]]; then - PKG_NAME_FLAG="--project_name tensorflow_gpudirect" - elif [[ "$1" == "--project_name" ]]; then - shift - if [[ -z "$1" ]]; then - break - fi - PROJECT_NAME="$1" - elif [[ "$1" == "--src" ]]; then - shift - SRCDIR="$(real_path $1)" - CLEANSRC=0 - elif [[ "$1" == "--dst" ]]; then - shift - DSTDIR="$(real_path $1)" - else - DSTDIR="$(real_path $1)" - fi - shift - - if [[ -z "$1" ]]; then - break - fi - done - - if [[ -z "$DSTDIR" ]] && [[ -z "$SRCDIR" ]]; then - echo "No destination dir provided" - usage - exit 1 - fi - - if [[ -z "$SRCDIR" ]]; then - # make temp srcdir if none set - SRCDIR="$(mktemp -d -t tmp.XXXXXXXXXX)" - fi - - prepare_src "$SRCDIR" - - if [[ -z "$DSTDIR" ]]; then - # only want to prepare sources - exit - fi - - if [[ -n ${PROJECT_NAME} ]]; then - PKG_NAME_FLAG="--project_name ${PROJECT_NAME}" - elif [[ ${NIGHTLY_BUILD} == "1" && ${GPU_BUILD} == "1" ]]; then - PKG_NAME_FLAG="--project_name tf_nightly_gpu" - elif [[ ${NIGHTLY_BUILD} == "1" ]]; then - PKG_NAME_FLAG="--project_name tf_nightly" - elif [[ ${GPU_BUILD} == "1" ]]; then - PKG_NAME_FLAG="--project_name tensorflow_gpu" - fi - - build_wheel "$SRCDIR" "$DSTDIR" "$PKG_NAME_FLAG" - - if [[ $CLEANSRC -ne 0 ]]; then - rm -rf "${TMPDIR}" - fi -} - main "$@" diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 97f625e7e9..d25a9e77b1 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -45,7 +45,7 @@ DOCLINES = __doc__.split('\n') # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.9.0-rc0' +_VERSION = '1.8.0' REQUIRED_PACKAGES = [ 'absl-py >= 0.1.6', @@ -54,7 +54,6 @@ REQUIRED_PACKAGES = [ 'numpy >= 1.13.3', 'six >= 1.10.0', 'protobuf >= 3.4.0', - 'setuptools <= 39.1.0', 'tensorboard >= 1.8.0, < 1.9.0', 'termcolor >= 1.1.0', ] diff --git a/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc b/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc index 15d7c70281..29add6d5ea 100644 --- a/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc +++ b/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc @@ -814,9 +814,6 @@ void Generator::Generate(const FileDescriptor& fd) { // Add header to cc file. SetOutput(&cc_); Print("// GENERATED FILE - DO NOT MODIFY"); - Print(); - Print("#include "); // for `std::stable_sort()` - Print(); headers = {GetProtoTextHeaderName(fd, true /* impl */)}; AddHeadersToCurrentSection(headers); Print(); diff --git a/tensorflow/tools/quantization/quantize_graph_test.py b/tensorflow/tools/quantization/quantize_graph_test.py index 92bb5127da..df71840b64 100644 --- a/tensorflow/tools/quantization/quantize_graph_test.py +++ b/tensorflow/tools/quantization/quantize_graph_test.py @@ -119,8 +119,8 @@ def are_tensors_near(a, b, tolerance): flat_a = a.flatten() flat_b = b.flatten() if len(flat_a) != len(flat_b): - tf_logging.info("Tensors are different sizes: " + str(len(flat_a)) + " vs " - + str(len(flat_b))) + print("Tensors are different sizes: " + str(len(flat_a)) + " vs " + str( + len(flat_b))) return False value_count = len(flat_a) how_many_different = 0 @@ -140,10 +140,10 @@ def are_tensors_near(a, b, tolerance): if how_many_different == 0: return True else: - tf_logging.info("Tensors have {0} different values ({1}%), with mean" - " difference {2} and mean absolute difference {3}".format( - how_many_different, proportion_different * 100, - mean_difference, mean_abs_difference)) + print("Tensors have {0} different values ({1}%), with mean difference" + " {2} and mean absolute difference {3}".format( + how_many_different, proportion_different * 100, mean_difference, + mean_abs_difference)) return False diff --git a/tensorflow/tools/test/upload_test_benchmarks.py b/tensorflow/tools/test/upload_test_benchmarks.py index c030575109..9c45359ee1 100644 --- a/tensorflow/tools/test/upload_test_benchmarks.py +++ b/tensorflow/tools/test/upload_test_benchmarks.py @@ -89,6 +89,7 @@ import shutil from six import text_type from google.cloud import datastore +from six import text_type def is_real_file(dirpath, fname): diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 4f3df570a5..dbec66216a 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -50,31 +50,31 @@ def tf_workspace(path_prefix="", tf_repo_name=""): mkl_repository( name = "mkl_linux", urls = [ - "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.14/mklml_lnx_2018.0.3.20180406.tgz", - "https://github.com/intel/mkl-dnn/releases/download/v0.14/mklml_lnx_2018.0.3.20180406.tgz" + "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.13/mklml_lnx_2018.0.2.20180127.tgz", + "https://github.com/intel/mkl-dnn/releases/download/v0.13/mklml_lnx_2018.0.2.20180127.tgz", ], - sha256 = "d2305244fdc9b87db7426ed4496e87a4b3977ad3374d73b8000e8b7a5b7aa725", - strip_prefix = "mklml_lnx_2018.0.3.20180406", + sha256 = "74844bd77294742bf2396ff040369d1aa4cdd9e826fcd38cf8398ae83564d146", + strip_prefix = "mklml_lnx_2018.0.2.20180127", build_file = clean_dep("//third_party/mkl:mkl.BUILD") ) mkl_repository( name = "mkl_windows", urls = [ - "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.14/mklml_win_2018.0.3.20180406.zip", - "https://github.com/intel/mkl-dnn/releases/download/v0.14/mklml_win_2018.0.3.20180406.zip" + "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.13/mklml_win_2018.0.2.20180127.zip", + "https://github.com/intel/mkl-dnn/releases/download/v0.13/mklml_win_2018.0.2.20180127.zip" ], - sha256 = "a584a5bf1c8d2ad70b90d12b52652030e9a338217719064fdb84b7ad0d693694", - strip_prefix = "mklml_win_2018.0.3.20180406", + sha256 = "d8fbf0faa0684bffa3548005d05fe5cfe56ff9dbc0e15e7612d7ac01055a6ded", + strip_prefix = "mklml_win_2018.0.2.20180127", build_file = clean_dep("//third_party/mkl:mkl.BUILD") ) mkl_repository( name = "mkl_darwin", urls = [ - "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.14/mklml_mac_2018.0.3.20180406.tgz", - "https://github.com/intel/mkl-dnn/releases/download/v0.14/mklml_mac_2018.0.3.20180406.tgz" + "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.13/mklml_mac_2018.0.2.20180127.tgz", + "https://github.com/intel/mkl-dnn/releases/download/v0.13/mklml_mac_2018.0.2.20180127.tgz" ], - sha256 = "094e3dfd61c816136dc8d12a45cc611ce26c5f4828176a3644cd0b0efa15a25b", - strip_prefix = "mklml_mac_2018.0.3.20180406", + sha256 = "aa740d71e14562bfea56e6829e6dc186e7487cbcf6748a88dec73826b7ec1943", + strip_prefix = "mklml_mac_2018.0.2.20180127", build_file = clean_dep("//third_party/mkl:mkl.BUILD") ) @@ -85,11 +85,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "mkl_dnn", urls = [ - "https://mirror.bazel.build/github.com/intel/mkl-dnn/archive/v0.14.tar.gz", - "https://github.com/intel/mkl-dnn/archive/v0.14.tar.gz", + "https://mirror.bazel.build/github.com/intel/mkl-dnn/archive/v0.13.tar.gz", + "https://github.com/intel/mkl-dnn/archive/v0.13.tar.gz", ], - sha256 = "efebc53882856afec86457a2da644693f5d59c68772d41d640d6b60a8efc4eb0", - strip_prefix = "mkl-dnn-0.14", + sha256 = "d2cfd93a70cfe86ebe054477c530c9b5c1218b70f75856eb6d1956c68ee89e8f", + strip_prefix = "mkl-dnn-0.13", build_file = clean_dep("//third_party/mkl_dnn:mkldnn.BUILD"), ) @@ -187,11 +187,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "highwayhash", urls = [ - "http://mirror.bazel.build/github.com/google/highwayhash/archive/fd3d9af80465e4383162e4a7c5e2f406e82dd968.tar.gz", - "https://github.com/google/highwayhash/archive/fd3d9af80465e4383162e4a7c5e2f406e82dd968.tar.gz", + "https://mirror.bazel.build/github.com/google/highwayhash/archive/dfcb97ca4fe9277bf9dc1802dd979b071896453b.tar.gz", + "https://github.com/google/highwayhash/archive/dfcb97ca4fe9277bf9dc1802dd979b071896453b.tar.gz", ], - sha256 = "9c3e0e87d581feeb0c18d814d98f170ff23e62967a2bd6855847f0b2fe598a37", - strip_prefix = "highwayhash-fd3d9af80465e4383162e4a7c5e2f406e82dd968", + sha256 = "0f30a15b1566d93f146c8d149878a06e91d9bb7ec2cfd76906df62a82be4aac9", + strip_prefix = "highwayhash-dfcb97ca4fe9277bf9dc1802dd979b071896453b", build_file = clean_dep("//third_party:highwayhash.BUILD"), ) diff --git a/third_party/eigen.BUILD b/third_party/eigen.BUILD index e54c1a4501..07bb6645eb 100644 --- a/third_party/eigen.BUILD +++ b/third_party/eigen.BUILD @@ -64,7 +64,6 @@ cc_library( # This define (mostly) guarantees we don't link any problematic # code. We use it, but we do not rely on it, as evidenced above. "EIGEN_MPL2_ONLY", - "EIGEN_MAX_ALIGN_BYTES=64", ], includes = ["."], visibility = ["//visibility:public"], diff --git a/third_party/highwayhash.BUILD b/third_party/highwayhash.BUILD index 08cb84ea2c..1b8e40765e 100644 --- a/third_party/highwayhash.BUILD +++ b/third_party/highwayhash.BUILD @@ -10,7 +10,6 @@ cc_library( srcs = ["highwayhash/sip_hash.cc"], hdrs = [ "highwayhash/sip_hash.h", - "highwayhash/endianess.h", "highwayhash/state_helpers.h", ], visibility = ["//visibility:public"], diff --git a/third_party/jpeg/jpeg.BUILD b/third_party/jpeg/jpeg.BUILD index 663a218733..4418ac32fc 100644 --- a/third_party/jpeg/jpeg.BUILD +++ b/third_party/jpeg/jpeg.BUILD @@ -291,10 +291,8 @@ cc_library( "jchuff.h", "jconfig.h", "jdct.h", - "jerror.h", "jinclude.h", "jmorecfg.h", - "jpegint.h", "jpeglib.h", "jsimd.h", "jsimddct.h", diff --git a/third_party/png.BUILD b/third_party/png.BUILD index 17c5449cc0..76ab32d69c 100644 --- a/third_party/png.BUILD +++ b/third_party/png.BUILD @@ -28,14 +28,7 @@ cc_library( "pngwrite.c", "pngwtran.c", "pngwutil.c", - ] + select({ - "@org_tensorflow//tensorflow:linux_ppc64le": [ - "powerpc/powerpc_init.c", - "powerpc/filter_vsx_intrinsics.c", - ], - "//conditions:default": [ - ], - }), + ], hdrs = [ "png.h", "pngconf.h", diff --git a/third_party/py/python_configure.bzl b/third_party/py/python_configure.bzl index 3c7e5c8469..954f21f5f8 100644 --- a/third_party/py/python_configure.bzl +++ b/third_party/py/python_configure.bzl @@ -6,7 +6,6 @@ * `PYTHON_LIB_PATH`: Location of python libraries. """ -_BAZEL_SH = "BAZEL_SH" _PYTHON_BIN_PATH = "PYTHON_BIN_PATH" _PYTHON_LIB_PATH = "PYTHON_LIB_PATH" _TF_PYTHON_CONFIG_REPO = "TF_PYTHON_CONFIG_REPO" @@ -153,22 +152,6 @@ def _get_python_bin(repository_ctx): _PYTHON_BIN_PATH, repository_ctx.os.environ.get("PATH", ""))) -def _get_bash_bin(repository_ctx): - """Gets the bash bin path.""" - bash_bin = repository_ctx.os.environ.get(_BAZEL_SH) - if bash_bin != None: - return bash_bin - else: - bash_bin_path = repository_ctx.which("bash") - if bash_bin_path != None: - return str(bash_bin_path) - else: - _fail("Cannot find bash in PATH, please make sure " + - "bash is installed and add its directory in PATH, or --define " + - "%s='/path/to/bash'.\nPATH=%s" % ( - _BAZEL_SH, repository_ctx.os.environ.get("PATH", ""))) - - def _get_python_lib(repository_ctx, python_bin): """Gets the python lib path.""" python_lib = repository_ctx.os.environ.get(_PYTHON_LIB_PATH) @@ -201,14 +184,14 @@ def _get_python_lib(repository_ctx, python_bin): " print(paths[0])\n" + "END") cmd = '%s - %s' % (python_bin, print_lib) - result = repository_ctx.execute([_get_bash_bin(repository_ctx), "-c", cmd]) + result = repository_ctx.execute(["bash", "-c", cmd]) return result.stdout.strip('\n') def _check_python_lib(repository_ctx, python_lib): """Checks the python lib path.""" cmd = 'test -d "%s" -a -x "%s"' % (python_lib, python_lib) - result = repository_ctx.execute([_get_bash_bin(repository_ctx), "-c", cmd]) + result = repository_ctx.execute(["bash", "-c", cmd]) if result.return_code == 1: _fail("Invalid python library path: %s" % python_lib) @@ -216,7 +199,7 @@ def _check_python_lib(repository_ctx, python_lib): def _check_python_bin(repository_ctx, python_bin): """Checks the python bin path.""" cmd = '[[ -x "%s" ]] && [[ ! -d "%s" ]]' % (python_bin, python_bin) - result = repository_ctx.execute([_get_bash_bin(repository_ctx), "-c", cmd]) + result = repository_ctx.execute(["bash", "-c", cmd]) if result.return_code == 1: _fail("--define %s='%s' is not executable. Is it the python binary?" % ( _PYTHON_BIN_PATH, python_bin)) @@ -311,7 +294,6 @@ def _python_autoconf_impl(repository_ctx): python_configure = repository_rule( implementation = _python_autoconf_impl, environ = [ - _BAZEL_SH, _PYTHON_BIN_PATH, _PYTHON_LIB_PATH, _TF_PYTHON_CONFIG_REPO, diff --git a/third_party/repo.bzl b/third_party/repo.bzl index cb67d3e961..36f5aa5bde 100644 --- a/third_party/repo.bzl +++ b/third_party/repo.bzl @@ -17,6 +17,7 @@ _SINGLE_URL_WHITELIST = depset([ "arm_compiler", "ortools_archive", + "gemmlowp", ]) def _is_windows(ctx): @@ -87,9 +88,7 @@ def _tf_http_archive(ctx): if ctx.attr.patch_file != None: _apply_patch(ctx, ctx.attr.patch_file) if ctx.attr.build_file != None: - # Use BUILD.bazel to avoid conflict with third party projects with - # BUILD or build (directory) underneath. - ctx.template("BUILD.bazel", ctx.attr.build_file, { + ctx.template("BUILD", ctx.attr.build_file, { "%prefix%": ".." if _repos_are_siblings() else "external", }, False) -- cgit v1.2.3 From 6070ae0e148f50dbc8f36e1654f0a3f53b8b067e Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Mon, 18 Jun 2018 21:00:34 -0700 Subject: Merge changes from github. PiperOrigin-RevId: 201110240 --- CONTRIBUTING.md | 2 +- README.md | 1 + RELEASE.md | 67 +++- configure.py | 5 + tensorflow/BUILD | 4 +- tensorflow/c/generate-pc.sh | 11 +- tensorflow/cc/gradients/math_grad.cc | 1 + tensorflow/cc/gradients/nn_grad.cc | 47 +++ tensorflow/cc/gradients/nn_grad_test.cc | 84 ++++- tensorflow/compiler/aot/codegen_test_h.golden | 4 +- .../compiler/aot/embedded_protocol_buffers.h | 2 +- tensorflow/compiler/aot/runtime.h | 4 +- tensorflow/compiler/aot/runtime_test.cc | 16 +- tensorflow/compiler/xla/service/cpu/BUILD | 18 +- tensorflow/compiler/xla/service/cpu/cpu_runtime.cc | 2 + tensorflow/compiler/xla/service/cpu/cpu_runtime.h | 1 + tensorflow/compiler/xla/service/cpu/ir_emitter.cc | 8 +- .../compiler/xla/service/cpu/runtime_fft_impl.h | 20 +- .../xla/service/cpu/runtime_single_threaded_fft.cc | 32 ++ .../xla/service/cpu/runtime_single_threaded_fft.h | 31 ++ .../compiler/xla/service/cpu/simple_orc_jit.cc | 2 + tensorflow/compiler/xla/service/pattern_matcher.h | 2 +- .../compiler/xla/service/tuple_simplifier.cc | 7 + tensorflow/compiler/xla/service/tuple_simplifier.h | 9 +- .../compiler/xla/service/tuple_simplifier_test.cc | 77 ++++ tensorflow/contrib/autograph/__init__.py | 3 + tensorflow/contrib/cmake/tf_c.cmake | 22 +- tensorflow/contrib/cmake/tf_cc_ops.cmake | 2 +- tensorflow/contrib/cmake/tf_python.cmake | 3 +- tensorflow/contrib/cmake/tools/create_def_file.py | 9 +- .../bijectors/sinh_arcsinh_bijector_test.py | 28 +- tensorflow/contrib/eager/python/datasets.py | 3 +- .../python/examples/notebooks/4_high_level.ipynb | 4 +- .../feature_column/sequence_feature_column.py | 22 +- .../feature_column/sequence_feature_column_test.py | 41 ++ tensorflow/contrib/ffmpeg/__init__.py | 1 - tensorflow/contrib/ffmpeg/ffmpeg_ops.py | 1 - tensorflow/contrib/framework/__init__.py | 3 +- .../ops/fused_conv2d_bias_activation_op_test.py | 11 +- .../src_impl/hexagon_controller.c | 2 +- tensorflow/contrib/lite/download_dependencies.sh | 4 +- .../contrib/lite/examples/minimal/minimal.cc | 2 +- .../contrib/lite/g3doc/tf_ops_compatibility.md | 14 +- tensorflow/contrib/lite/java/ovic/README.md | 4 +- .../kernels/internal/reference/reference_ops.h | 4 +- tensorflow/contrib/lite/python/interpreter.py | 2 +- .../interpreter_wrapper/interpreter_wrapper.cc | 9 +- .../interpreter_wrapper/interpreter_wrapper.h | 3 +- tensorflow/contrib/lite/python/lite.py | 11 + tensorflow/contrib/lite/toco/import_tensorflow.cc | 2 +- tensorflow/contrib/lite/toco/toco_port.cc | 6 + tensorflow/contrib/lite/toco/toco_port.h | 18 + tensorflow/contrib/makefile/compile_nsync.sh | 2 +- .../contrib/makefile/download_dependencies.sh | 4 +- .../contrib/metrics/python/ops/metric_ops.py | 2 +- tensorflow/contrib/mpi_collectives/kernels/ring.h | 2 +- .../contrib/opt/python/training/adamax_test.py | 6 +- .../opt/python/training/model_average_optimizer.py | 2 +- tensorflow/contrib/periodic_resample/BUILD | 19 +- .../kernels/periodic_resample_op.cc | 5 + .../kernels/periodic_resample_op.h | 415 +++++++++++++++------ .../contrib/periodic_resample/ops/array_ops.cc | 53 ++- .../periodic_resample/ops/array_ops_test.cc | 41 ++ .../kernel_tests/periodic_resample_op_test.py | 27 +- .../python/ops/periodic_resample_op.py | 8 +- .../predictor/contrib_estimator_predictor.py | 5 +- .../contrib/predictor/core_estimator_predictor.py | 5 +- .../contrib/predictor/predictor_factories.py | 24 +- .../contrib/predictor/predictor_factories_test.py | 19 + .../contrib/predictor/saved_model_predictor.py | 6 +- tensorflow/contrib/quantize/README.md | 2 +- .../contrib/slim/python/slim/evaluation_test.py | 25 +- tensorflow/contrib/summary/summary.py | 5 +- .../contrib/tensor_forest/client/eval_metrics.py | 45 +-- .../contrib/tensor_forest/python/tensor_forest.py | 34 +- .../tensor_forest/python/tensor_forest_test.py | 45 +++ .../contrib/tensorrt/convert/convert_graph.cc | 66 ++-- .../contrib/tensorrt/convert/convert_nodes.cc | 97 +++-- tensorflow/contrib/tpu/python/tpu/datasets.py | 16 +- tensorflow/contrib/tpu/python/tpu/datasets_test.py | 26 ++ tensorflow/core/BUILD | 9 +- .../core/api_def/base_api/api_def_Selu.pbtxt | 4 + .../api_def/base_api/api_def_StringSplitV2.pbtxt | 48 +++ .../api_def/python_api/api_def_StringSplitV2.pbtxt | 4 + tensorflow/core/common_runtime/bfc_allocator.cc | 8 +- tensorflow/core/common_runtime/bfc_allocator.h | 3 +- .../direct_session_with_tracking_alloc_test.cc | 16 + .../common_runtime/mkl_threadpool_device_test.cc | 53 +++ tensorflow/core/common_runtime/process_util.cc | 11 +- .../core/common_runtime/threadpool_device.cc | 25 +- .../rpc/grpc_master_service_impl.cc | 4 +- .../core/distributed_runtime/rpc/grpc_testlib.cc | 10 +- tensorflow/core/framework/allocator.h | 5 - tensorflow/core/framework/op_gen_lib.cc | 1 + .../remote_fused_graph_execute_info.proto | 2 +- tensorflow/core/framework/tensor_test.cc | 24 +- tensorflow/core/graph/mkl_layout_pass.cc | 148 +++++++- tensorflow/core/graph/mkl_layout_pass_test.cc | 31 ++ .../core/grappler/clusters/single_machine_test.cc | 8 +- tensorflow/core/grappler/costs/graph_properties.cc | 1 - tensorflow/core/grappler/optimizers/BUILD | 2 +- tensorflow/core/grappler/optimizers/remapper.cc | 4 +- tensorflow/core/kernels/as_string_op.cc | 2 + tensorflow/core/kernels/cwise_op_clip.cc | 43 +-- .../core/kernels/dense_update_functor_gpu.cu.cc | 1 + tensorflow/core/kernels/gather_functor.cc | 1 + tensorflow/core/kernels/gather_functor_gpu.cu.cc | 1 + tensorflow/core/kernels/gather_nd_op.cc | 4 + tensorflow/core/kernels/gather_nd_op_gpu.cu.cc | 2 + tensorflow/core/kernels/gather_op.cc | 1 + tensorflow/core/kernels/mkl_concat_op.cc | 213 ++++++++--- tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc | 2 + tensorflow/core/kernels/mkl_pooling_ops_common.h | 6 +- tensorflow/core/kernels/scatter_nd_op.cc | 4 + tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc | 1 + .../core/kernels/scoped_allocator_ops_test.cc | 9 +- tensorflow/core/kernels/segment_reduction_ops.h | 10 +- tensorflow/core/kernels/sparse_matmul_op.cc | 2 +- tensorflow/core/kernels/string_split_op.cc | 130 +++++++ tensorflow/core/ops/candidate_sampling_ops.cc | 5 +- tensorflow/core/ops/dataset_ops.cc | 24 +- tensorflow/core/ops/image_ops.cc | 4 +- tensorflow/core/ops/math_ops.cc | 2 +- tensorflow/core/ops/nn_ops.cc | 1 + tensorflow/core/ops/string_ops.cc | 20 +- tensorflow/core/platform/cpu_info.cc | 23 ++ tensorflow/core/platform/cpu_info.h | 7 + tensorflow/core/platform/default/build_config.bzl | 2 + .../core/platform/hadoop/hadoop_file_system.cc | 21 +- tensorflow/core/platform/posix/port.cc | 5 + tensorflow/core/public/version.h | 4 +- tensorflow/core/util/mkl_util.h | 50 ++- tensorflow/docs_src/community/groups.md | 29 +- tensorflow/docs_src/get_started/eager.md | 2 +- tensorflow/docs_src/get_started/index.md | 4 +- tensorflow/docs_src/install/install_c.md | 2 +- tensorflow/docs_src/install/install_go.md | 2 +- tensorflow/docs_src/install/install_java.md | 24 +- tensorflow/docs_src/install/install_linux.md | 24 +- tensorflow/docs_src/install/install_mac.md | 10 +- tensorflow/docs_src/install/install_sources.md | 17 +- tensorflow/docs_src/mobile/linking_libs.md | 2 +- tensorflow/docs_src/mobile/prepare_models.md | 4 +- tensorflow/docs_src/performance/quantization.md | 2 +- .../docs_src/programmers_guide/estimators.md | 19 +- .../docs_src/programmers_guide/feature_columns.md | 4 +- tensorflow/examples/learn/iris.py | 7 +- tensorflow/java/src/gen/cc/op_generator.cc | 11 +- tensorflow/java/src/gen/cc/op_specs.cc | 1 + tensorflow/python/eager/backprop.py | 4 +- tensorflow/python/estimator/BUILD | 5 +- tensorflow/python/estimator/exporter.py | 4 +- tensorflow/python/estimator/inputs/numpy_io.py | 8 +- .../python/estimator/inputs/numpy_io_test.py | 5 +- tensorflow/python/estimator/inputs/pandas_io.py | 7 +- .../python/estimator/inputs/pandas_io_test.py | 5 +- .../estimator/inputs/queues/feeding_functions.py | 2 +- tensorflow/python/estimator/keras.py | 4 +- tensorflow/python/estimator/keras_test.py | 14 +- tensorflow/python/keras/activations.py | 2 + tensorflow/python/keras/callbacks.py | 21 +- tensorflow/python/keras/callbacks_test.py | 2 + tensorflow/python/keras/engine/network.py | 2 +- tensorflow/python/keras/engine/saving_test.py | 4 +- tensorflow/python/keras/engine/training.py | 7 +- tensorflow/python/keras/engine/training_eager.py | 2 +- tensorflow/python/keras/initializers_test.py | 26 +- tensorflow/python/keras/layers/core.py | 26 +- tensorflow/python/keras/models_test.py | 14 + .../python/kernel_tests/as_string_op_test.py | 10 + tensorflow/python/kernel_tests/betainc_op_test.py | 4 +- tensorflow/python/kernel_tests/clip_ops_test.py | 13 + tensorflow/python/kernel_tests/conv_ops_test.py | 32 +- .../python/kernel_tests/gather_nd_op_test.py | 32 +- tensorflow/python/kernel_tests/gather_op_test.py | 20 +- tensorflow/python/kernel_tests/init_ops_test.py | 27 ++ tensorflow/python/kernel_tests/pooling_ops_test.py | 4 +- tensorflow/python/kernel_tests/py_func_test.py | 31 +- .../python/kernel_tests/scatter_nd_ops_test.py | 6 +- tensorflow/python/kernel_tests/scatter_ops_test.py | 14 +- .../kernel_tests/segment_reduction_ops_test.py | 4 +- .../python/kernel_tests/string_split_op_test.py | 96 +++++ tensorflow/python/ops/array_ops.py | 4 + tensorflow/python/ops/gradient_checker.py | 8 +- tensorflow/python/ops/image_ops_impl.py | 74 ++-- tensorflow/python/ops/image_ops_test.py | 261 ++++++++++--- tensorflow/python/ops/init_ops.py | 3 +- tensorflow/python/ops/logging_ops.py | 5 +- tensorflow/python/ops/math_ops.py | 28 +- tensorflow/python/ops/nn_impl.py | 5 +- tensorflow/python/ops/nn_ops.py | 4 +- tensorflow/python/ops/nn_test.py | 10 + tensorflow/python/ops/script_ops.py | 35 +- tensorflow/python/ops/sparse_ops.py | 4 + tensorflow/python/ops/string_ops.py | 53 +++ tensorflow/python/ops/variable_scope.py | 21 +- .../python/tools/import_pb_to_tensorboard.py | 0 tensorflow/tensorflow.bzl | 2 +- .../tools/api/generator/create_python_api.py | 8 +- tensorflow/tools/api/golden/tensorflow.image.pbtxt | 2 +- tensorflow/tools/api/golden/tensorflow.pbtxt | 4 + .../tools/api/golden/tensorflow.strings.pbtxt | 4 + tensorflow/tools/ci_build/builds/pip.sh | 4 + .../tools/ci_build/builds/with_the_same_user | 2 +- tensorflow/tools/ci_build/ci_build.sh | 7 + tensorflow/tools/ci_build/copy_binary.py | 3 +- .../tools/ci_build/install/install_pip_packages.sh | 4 + .../install/install_python3.5_pip_packages.sh | 4 +- .../install/install_python3.6_pip_packages.sh | 5 +- .../tools/ci_build/linux/mkl/basic-mkl-test.sh | 29 ++ tensorflow/tools/ci_build/pi/build_raspberry_pi.sh | 8 +- .../def_file_filter/def_file_filter_configure.bzl | 6 +- tensorflow/tools/dist_test/local_test.sh | 12 +- tensorflow/tools/dist_test/remote_test.sh | 11 +- tensorflow/tools/docker/Dockerfile.devel | 2 +- tensorflow/tools/docker/Dockerfile.devel-cpu-mkl | 2 +- tensorflow/tools/docker/Dockerfile.devel-gpu | 6 +- tensorflow/tools/docker/Dockerfile.gpu | 2 +- tensorflow/tools/pip_package/BUILD | 1 + tensorflow/tools/pip_package/build_pip_package.sh | 160 +++++--- tensorflow/tools/pip_package/setup.py | 3 +- .../proto_text/gen_proto_text_functions_lib.cc | 3 + .../tools/quantization/quantize_graph_test.py | 12 +- tensorflow/tools/test/upload_test_benchmarks.py | 1 - tensorflow/workspace.bzl | 40 +- third_party/eigen.BUILD | 1 + third_party/highwayhash.BUILD | 1 + third_party/jpeg/jpeg.BUILD | 2 + third_party/png.BUILD | 9 +- third_party/py/python_configure.bzl | 24 +- third_party/repo.bzl | 5 +- 231 files changed, 3338 insertions(+), 905 deletions(-) create mode 100644 tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.cc create mode 100644 tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.h create mode 100644 tensorflow/contrib/periodic_resample/ops/array_ops_test.cc create mode 100644 tensorflow/core/api_def/base_api/api_def_StringSplitV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_StringSplitV2.pbtxt create mode 100644 tensorflow/core/common_runtime/mkl_threadpool_device_test.cc mode change 100755 => 100644 tensorflow/python/tools/import_pb_to_tensorboard.py create mode 100755 tensorflow/tools/ci_build/linux/mkl/basic-mkl-test.sh (limited to 'configure.py') diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 8669c25c45..db4b1581ae 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -90,7 +90,7 @@ Bazel BUILD files also need to include a license section, e.g., Changes to TensorFlow C++ code should conform to [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html). -Use `clang-tidy` to check your C/C++ changes. To install clang-tidy on ubuntu:16.04, do: +Use `clang-tidy` to check your C/C++ changes. To install `clang-tidy` on ubuntu:16.04, do: ```bash apt-get install -y clang-tidy diff --git a/README.md b/README.md index 6fb4486d0d..63853137cf 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,7 @@ $ python 42 >>> sess.close() ``` +Learn more examples about how to do specific tasks in TensorFlow at the [tutorials page of tensorflow.org](https://www.tensorflow.org/tutorials/). ## Contribution guidelines diff --git a/RELEASE.md b/RELEASE.md index 84d9d52868..e09e9c6190 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,3 +1,62 @@ +# Release 1.9.0 + +## Major Features And Improvements +* Update tf.keras to the Keras 2.1.6 API. +* `tfe.Network` is deprecated. Please inherit from `tf.keras.Model`. +* Adding support of core feature columns and losses to gradient boosted trees estimators. +* The distributions.Bijector API supports broadcasting for Bijectors with new API changes. See [here](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/distributions/bijectors/Bijector) for more details. +* Layered variable names have changed in the following conditions: + * Using `tf.keras.layers` with custom variable scopes. + * Using `tf.layers` in a subclassed `tf.keras.Model` class. See [here](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/layers) for more details + +## Breaking Chances + * If you're opening empty variable scopes; replace `variable_scope`('', ...) by `variable_scope`(`tf.get_variable_scope()`, ...). + +## Bug Fixes and Other Changes +* `tf.data`: + * The `DatasetBase::DebugString()` method is now `const`. + * Added the `tf.contrib.data.sample_from_datasets()` API for randomly sampling from multiple datasets. +* Eager Execution: +* `tf.keras`: + * Move Keras code out of _impl folder and remove API files. + * `tf.keras.Model.save_weights` now saves in TensorFlow format by default. + * Enable dataset iterators to be passed to `tf.keras.Model` training/eval methods. +* Accelerated Linear Algebra (XLA): +* TensorFlow Debugger (tfdbg): fix an issue in which the TensorBoard Debugger Plugin could not handle total source file size exceeding gRPC message size limit (4 MB). +* `tf.contrib`: + * Add `tf.contrib.data.choose_from_datasets()`. + * `tf.contrib.data.make_csv_dataset()` now supports line breaks in quoted strings. Two arguments were removed from `make_csv_dataset`. + * `tf.contrib.framework.zero_initializer` supports ResourceVariable. + * Adding "constrained_optimization" to tensorflow/contrib. +* Other: + * Add GCS Configuration Ops. + * Changing signature of `MakeIterator` to enable propagating error status. + * KL divergence for two Dirichlet distributions. + * More consistent GcsFileSystem behavior for certain reads past EOF. + * Update benchmark for tf.scan to match ranges across eager and graph modes. + * Fixed bug in `tf.reduce_prod gradient` for complex dtypes. + * Add optional `args` argument to `Dataset.from_generator()`. + * Allow the use of '.' in variables (e.g. "hparams.parse('a.b=1.0')"), which would previously raise an error. This will correspond to an attribute name with an embedded '.' symbol (e.g. 'a.b'), which can only be accessed indirectly (e.g. through getattr and setattr). To set this up the user will first need to explicitly add the variable to the hparam object (e.g. "hparams.add_hparam(name='a.b', value=0.0)"). + * Benchmark for tf.scan in graph and eager modes. + * Added complex128 support to FFT, FFT2D, FFT3D, IFFT, IFFT2D, and IFFT3D. + * Making ids unique in `nn.embedding_lookup_sparse`. This helps to reduce RPC calls for looking up the embeddings when there are repeated ids in the batch. + * Support indicator column in boosted trees. + * Prevent `tf.gradients()` from backpropagating through integer tensors. + * LinearOperator[1D,2D,3D]Circulant added to `tensorflow.linalg`. + * Conv3D, Conv3DBackpropInput, Conv3DBackpropFilter now supports arbitrary. + * Added `tf.train.Checkpoint` for reading/writing object-based checkpoints. + * `Dataset.list_files()` now produces determinstic results when `shuffle=False` or a `seed` is passed. + * Added LinearOperatorKronecker, a dense-free implementation of the Kronecker Product. + * Allow LinearOperator to broadcast. + * SavedModelBuilder will now deduplicate asset names that point to files with the same basename and the same contents. Note that this may result in new asset files included in SavedModels in cases where assets with the same name but different contents were previously overwriting each other. + + +## Thanks to our Contributors + +This release contains contributions from many people at Google, as well as: + +Abdullah Alrasheed, Achal Shah, Ad-530, ADiegoCAlonso, Aditya Yogi, Ag Ramesh, akindyakov, Andy Kernahan, Anya Petrova, Aurelien Geron, Ben, Ben Barsdell, Bhavani-Subramanian, braincodercn, Brett Koonce, Brian Nemsick, Brian Zier, Bryan Heden, candy.dc, cclauss, Clayne Robison, ctiijima, Dalmo Cirne, David Norman, David T.H. Kao, DosLin, ekelsen, Elson Rodriguez, Erik Smistad, Felix Abecassis, Fergal Cotter, fo40225, foo0x29a, Freedom" Koan-Sin Tan, FréDéRic Branchaud-Charron, gdh1995, Geoffrey Irving, Giuseppe, gracehoney, Guido Zuidhof, Guillaume Klein, Guozhong Zhuang, Haggai, Harald Husum, imsheridan, Ivan Zhang, Jan Zikes, Jayaram Bobba, Jesse Benson, Jesse Gumz, Jiajia Li, Jie, jinghuangintel, Jingwen, jjsjann123, Joe Yearsley, Joel Hestness, Joel Shor, josephyearsley, Junpeng Lao, Karol M. Langner, Kb Sriram, krantideep95, Krish Ravindranath, Letian Feng, Loo Rong Jie, Lukas Geiger, Maciej, Mahmoud Abuzaina, ManHyuk, Mark Ryan, mbhuiyan, Michal Turek, Mostafa Alaa, Myungsung Kwak, Nand Dalal, Nehal J Wani, Neil Tenenholtz, ngc92, Nicholas Nadeau, P.Eng., Avs, Niranjan Hasabnis, P-Hidringer, Paul Van Eck, Peng Yu, Qing Zhao, Qingying Chen, Quanlong, Rajendra Arora, Rholais Lii, rmanyari, Robin Richtsfeld, Russell Klopfer, Sagi, Sam Sendelbach, Sandeep N Gupta, Sandip Giri, Sarah Edkins, Scott Tseng, Sdalbsoo, Sergii Khomenko, Seungwoo Choi (Biggie), Seyed Majid Azimi, Shaoning Zeng, shengfuintel, Siu Kei, Muk, Smit Shilu, soonson, Stefan Schweter, Sukhwan Kim, Sunitha Kambhampati, Taehoon Lee, tamimaddari82, Tang, Wenyi, Ted Chang, u2takey, Utkarsh Upadhyay, Vadim Markovtsev, voegtlel, Wai Hon Law, wangsiyu, Wenhao Hu, wenhao.hu, William D. Irons, Yan Facai (颜发才), Yanbo Liang, Yihong Wang, Yilei (Dolee) Yang, Yong Tang, Yuan (Terry) Tang + # Release 1.8.0 ## Major Features And Improvements @@ -404,14 +463,6 @@ answered questions, and were part of inspiring discussions. # Release 1.4.0 -## Major Features And Improvements -* `tf.keras` is now part of the core TensorFlow API. -* [`tf.data`](http://tensorflow.org/programmers_guide/datasets) is now part of - the core TensorFlow API. - * The API is now subject to backwards compatibility guarantees. - -# Release 1.4.0 - ## Major Features And Improvements * `tf.keras` is now part of the core TensorFlow API. * [`tf.data`](http://tensorflow.org/programmers_guide/datasets) is now part of diff --git a/configure.py b/configure.py index bde7af8c0e..ada342a50a 100644 --- a/configure.py +++ b/configure.py @@ -1397,6 +1397,10 @@ def set_grpc_build_flags(): write_to_bazelrc('build --define grpc_no_ares=true') +def set_build_strip_flag(): + write_to_bazelrc('build --strip=always') + + def set_windows_build_flags(): if is_windows(): # The non-monolithic build is not supported yet @@ -1519,6 +1523,7 @@ def main(): set_grpc_build_flags() set_cc_opt_flags(environ_cp) + set_build_strip_flag() set_windows_build_flags() if get_var( diff --git a/tensorflow/BUILD b/tensorflow/BUILD index a73c4ca3aa..6d134dbb80 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -475,7 +475,7 @@ tf_cc_shared_object( # excludes all but a subset of function names. # On MacOS, the linker does not support version_script, but has an # an "-exported_symbols_list" command. -z defs disallows undefined -# symbols in object files and -s strips the output. +# symbols in object files. tf_cc_shared_object( name = "libtensorflow.so", @@ -489,7 +489,6 @@ tf_cc_shared_object( "//tensorflow:windows_msvc": [], "//conditions:default": [ "-z defs", - "-s", "-Wl,--version-script", # This line must be directly followed by the version_script.lds file "$(location //tensorflow/c:version_script.lds)", ], @@ -515,7 +514,6 @@ tf_cc_shared_object( "//tensorflow:windows_msvc": [], "//conditions:default": [ "-z defs", - "-s", "-Wl,--version-script", # This line must be directly followed by the version_script.lds file "$(location //tensorflow:tf_version_script.lds)", ], diff --git a/tensorflow/c/generate-pc.sh b/tensorflow/c/generate-pc.sh index 02a6a58b61..7184ad68fb 100755 --- a/tensorflow/c/generate-pc.sh +++ b/tensorflow/c/generate-pc.sh @@ -15,10 +15,12 @@ # ============================================================================== TF_PREFIX='/usr/local' +LIBDIR='lib' usage() { echo "Usage: $0 OPTIONS" echo -e "-p, --prefix\tset installation prefix (default: /usr/local)" + echo -e "-l, --libdir\tset lib directory (default: lib)" echo -e "-v, --version\tset TensorFlow version" echo -e "-h, --help\tdisplay this message" } @@ -26,7 +28,7 @@ usage() { [ $# == 0 ] && usage && exit 0 # read the options -ARGS=$(getopt -o p:v:h --long prefix:,version:,help -n $0 -- "$@") +ARGS=$(getopt -o p:l:v:h --long prefix:,libdir:,version:,help -n $0 -- "$@") eval set -- "$ARGS" # extract options and their arguments into variables. @@ -38,6 +40,11 @@ while true ; do "") shift 2 ;; *) TF_PREFIX=$2 ; shift 2 ;; esac ;; + -l|--libdir) + case "$2" in + "") shift 2 ;; + *) LIBDIR=$2 ; shift 2 ;; + esac ;; -v|--version) case "$2" in "") shift 2 ;; @@ -55,7 +62,7 @@ echo "Generating pkgconfig file for TensorFlow $TF_VERSION in $TF_PREFIX" cat << EOF > tensorflow.pc prefix=${TF_PREFIX} exec_prefix=\${prefix} -libdir=\${exec_prefix}/lib +libdir=\${exec_prefix}/${LIBDIR} includedir=\${prefix}/include Name: TensorFlow diff --git a/tensorflow/cc/gradients/math_grad.cc b/tensorflow/cc/gradients/math_grad.cc index 52c177212a..35a01e0341 100644 --- a/tensorflow/cc/gradients/math_grad.cc +++ b/tensorflow/cc/gradients/math_grad.cc @@ -38,6 +38,7 @@ REGISTER_NO_GRADIENT_OP("NotEqual"); REGISTER_NO_GRADIENT_OP("LogicalAnd"); REGISTER_NO_GRADIENT_OP("LogicalOr"); REGISTER_NO_GRADIENT_OP("LogicalNot"); +REGISTER_NO_GRADIENT_OP("Floor"); // Conjugate helper function returns the conjugate of an Output if it // is complex valued. diff --git a/tensorflow/cc/gradients/nn_grad.cc b/tensorflow/cc/gradients/nn_grad.cc index 0cb3132e94..c73482d5f4 100644 --- a/tensorflow/cc/gradients/nn_grad.cc +++ b/tensorflow/cc/gradients/nn_grad.cc @@ -255,6 +255,53 @@ Status LRNGradHelper(const Scope& scope, const Operation& op, } REGISTER_GRADIENT_OP("LRN", LRNGradHelper); +Status SoftplusGradHelper(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, + std::vector* grad_outputs) { + auto dx = internal::SoftplusGrad(scope, grad_inputs[0], op.input(0)); + grad_outputs->push_back(dx); + return scope.status(); +} +REGISTER_GRADIENT_OP("Softplus", SoftplusGradHelper); + +Status SoftsignGradHelper(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, + std::vector* grad_outputs) { + auto dx = internal::SoftsignGrad(scope, grad_inputs[0], op.input(0)); + grad_outputs->push_back(dx); + return scope.status(); +} +REGISTER_GRADIENT_OP("Softsign", SoftsignGradHelper); + +Status FractionalAvgPoolGradHelper(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, + std::vector* grad_outputs) { + bool overlapping; + TF_RETURN_IF_ERROR( + GetNodeAttr(op.output(0).node()->attrs(), "overlapping", &overlapping)); + auto dx = internal::FractionalAvgPoolGrad( + scope, Shape(scope, op.input(0), Shape::OutType(DT_INT64)), + grad_inputs[0], op.output(1), op.output(2), + internal::FractionalAvgPoolGrad::Overlapping(overlapping)); + grad_outputs->push_back(dx); + return scope.status(); +} +REGISTER_GRADIENT_OP("FractionalAvgPool", FractionalAvgPoolGradHelper); + +Status FractionalMaxPoolGradHelper(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, + std::vector* grad_outputs) { + bool overlapping; + TF_RETURN_IF_ERROR( + GetNodeAttr(op.output(0).node()->attrs(), "overlapping", &overlapping)); + auto dx = internal::FractionalMaxPoolGrad( + scope, op.input(0), op.output(0), grad_inputs[0], op.output(1), + op.output(2), internal::FractionalMaxPoolGrad::Overlapping(overlapping)); + grad_outputs->push_back(dx); + return scope.status(); +} +REGISTER_GRADIENT_OP("FractionalMaxPool", FractionalMaxPoolGradHelper); + } // anonymous namespace } // namespace ops } // namespace tensorflow diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc index c4eba7ecb0..b4d457a9d1 100644 --- a/tensorflow/cc/gradients/nn_grad_test.cc +++ b/tensorflow/cc/gradients/nn_grad_test.cc @@ -28,6 +28,8 @@ namespace { using ops::BiasAdd; using ops::Conv2D; using ops::Elu; +using ops::FractionalAvgPool; +using ops::FractionalMaxPool; using ops::L2Loss; using ops::LogSoftmax; using ops::LRN; @@ -41,6 +43,8 @@ using ops::Relu; using ops::Relu6; using ops::Selu; using ops::Softmax; +using ops::Softplus; +using ops::Softsign; class NNGradTest : public ::testing::Test { protected: @@ -71,22 +75,30 @@ class NNGradTest : public ::testing::Test { EXPECT_LT(max_error, 1e-3); } - // Sets tensor with random values, ensuring that the max value is largest by - // a reasonable amount. - // This is an issue for MaxPool, MaxPoolV2 and MaxPool3D, in which - // perturbations by the numeric gradient computation in the gradient checker - // can change the max value if values are too close together. + // Sets tensor with random values, ensuring that every pair of elements are at + // least a reasonable amount apart. + // This is an issue for max pooling operations, in which perturbations by the + // numeric gradient computation in the gradient checker can change the max + // value if a pool has values that are too close together. template - void SetRandomValuesWithBumpedMax(Tensor* tensor) { + void SetRandomValuesForMaxPooling(Tensor* tensor) { auto tensor_flat = tensor->flat(); - tensor_flat.setRandom(); - int32 max_index = 0; - for (size_t i = 1; i < tensor->NumElements(); i++) { - if (tensor_flat(i) > tensor_flat(max_index)) { - max_index = i; - } + // First set the array to an increasing sequence of values spaced + // a reasonable amount apart + T cur = 0; + for (size_t i = 0; i < tensor->NumElements(); i++) { + tensor_flat(i) = cur; + cur += 5e-2; + } + // Fischer-Yates shuffle the array + for (size_t i = tensor->NumElements() - 1; i >= 1; i--) { + // j <- random integer 0 <= j <= i + size_t j = random::New64() % (i + 1); + // swap values at i, j + T tmp = tensor_flat(i); + tensor_flat(i) = tensor_flat(j); + tensor_flat(j) = tmp; } - tensor_flat(max_index) += 1e-2; } Scope scope_; @@ -189,7 +201,7 @@ TEST_F(NNGradTest, MaxPoolGradHelper) { const std::vector strides{1, 2, 2, 1}; auto y = MaxPool(scope_, x, ksize, strides, "VALID"); Tensor x_init_value = Tensor(DT_FLOAT, x_shape); - SetRandomValuesWithBumpedMax(&x_init_value); + SetRandomValuesForMaxPooling(&x_init_value); RunTest(x, x_init_value, y, y_shape); } @@ -202,7 +214,7 @@ TEST_F(NNGradTest, MaxPoolGradV2Helper) { Tensor strides = test::AsTensor({1, 2, 2, 1}, {4}); auto y = MaxPoolV2(scope_, x, ksize, strides, "VALID"); Tensor x_init_value = Tensor(DT_FLOAT, x_shape); - SetRandomValuesWithBumpedMax(&x_init_value); + SetRandomValuesForMaxPooling(&x_init_value); RunTest(x, x_init_value, y, y_shape); } @@ -215,7 +227,7 @@ TEST_F(NNGradTest, MaxPool3DGradHelper) { const std::vector strides{1, 3, 3, 3, 1}; auto y = MaxPool3D(scope_, x, ksize, strides, "VALID"); Tensor x_init_value = Tensor(DT_FLOAT, x_shape); - SetRandomValuesWithBumpedMax(&x_init_value); + SetRandomValuesForMaxPooling(&x_init_value); RunTest(x, x_init_value, y, y_shape); } @@ -248,5 +260,45 @@ TEST_F(NNGradTest, LRN){ RunTest(x, x_shape, y, x_shape); } +TEST_F(NNGradTest, SoftplusGrad) { + TensorShape shape({3, 7}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape)); + auto y = Softplus(scope_, x); + RunTest(x, shape, y, shape); +} + +TEST_F(NNGradTest, SoftsignGrad) { + TensorShape shape({3, 7}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape)); + auto y = Softsign(scope_, x); + RunTest(x, shape, y, shape); +} + +TEST_F(NNGradTest, FractionalAvgPoolGradHelper) { + TensorShape x_shape({1, 3, 7, 1}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape)); + // Force consistent pooling regions for unit testing. + auto y = FractionalAvgPool( + scope_, x, {1, 1.2, 1.9, 1}, + FractionalAvgPool::Deterministic(true).Overlapping(true).Seed(1).Seed2( + 2)); + TensorShape y_shape({1, 2, 3, 1}); + RunTest(x, x_shape, y.output, y_shape); +} + +TEST_F(NNGradTest, FractionalMaxPoolGradHelper) { + TensorShape x_shape({1, 3, 7, 1}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape)); + // Force consistent pooling regions for unit testing. + auto y = FractionalMaxPool( + scope_, x, {1, 1.2, 1.9, 1}, + FractionalMaxPool::Deterministic(true).Overlapping(true).Seed(1).Seed2( + 2)); + Tensor x_init_value = Tensor(DT_FLOAT, x_shape); + SetRandomValuesForMaxPooling(&x_init_value); + TensorShape y_shape({1, 2, 3, 1}); + RunTest(x, x_init_value, y.output, y_shape); +} + } // namespace } // namespace tensorflow diff --git a/tensorflow/compiler/aot/codegen_test_h.golden b/tensorflow/compiler/aot/codegen_test_h.golden index 6e050cf564..6641d45e83 100644 --- a/tensorflow/compiler/aot/codegen_test_h.golden +++ b/tensorflow/compiler/aot/codegen_test_h.golden @@ -56,9 +56,9 @@ namespace bar { // // Memory stats: // arg bytes total: 104 -// arg bytes aligned: 128 +// arg bytes aligned: 192 // temp bytes total: 126 -// temp bytes aligned: 224 +// temp bytes aligned: 320 class MyClass : public tensorflow::XlaCompiledCpuFunction { public: // Number of input arguments for the compiled computation. diff --git a/tensorflow/compiler/aot/embedded_protocol_buffers.h b/tensorflow/compiler/aot/embedded_protocol_buffers.h index ebfe4806c2..4e194a6aba 100644 --- a/tensorflow/compiler/aot/embedded_protocol_buffers.h +++ b/tensorflow/compiler/aot/embedded_protocol_buffers.h @@ -71,7 +71,7 @@ struct ProtobufToEmbed { const ::tensorflow::protobuf::MessageLite* message; }; -// Embeds a a sequence of protocol buffers into an object file. +// Embeds a sequence of protocol buffers into an object file. // // `target_triple` is the target triple for the target architecture for the // generated object file. diff --git a/tensorflow/compiler/aot/runtime.h b/tensorflow/compiler/aot/runtime.h index d085864f00..d1a669ceb1 100644 --- a/tensorflow/compiler/aot/runtime.h +++ b/tensorflow/compiler/aot/runtime.h @@ -25,8 +25,8 @@ namespace tensorflow { namespace tfcompile { namespace runtime { -// Align to 32-bytes, to mimic tensorflow::Allocator::kAllocatorAlignment. -static constexpr size_t kAlign = 32; +// Align to 64-bytes, to mimic tensorflow::Allocator::kAllocatorAlignment. +static constexpr size_t kAlign = 64; // aligned_buffer_bytes returns the sum of each size in `sizes`, skipping -1 // values. There are `n` entries in `sizes`. Each buffer is aligned to kAlign diff --git a/tensorflow/compiler/aot/runtime_test.cc b/tensorflow/compiler/aot/runtime_test.cc index 6d603a02eb..06ec623eb2 100644 --- a/tensorflow/compiler/aot/runtime_test.cc +++ b/tensorflow/compiler/aot/runtime_test.cc @@ -24,7 +24,7 @@ namespace runtime { namespace { TEST(Runtime, AlignmentValue) { - // We've chosen 32 byte alignment for the tfcompile runtime to mimic the + // We've chosen 64 byte alignment for the tfcompile runtime to mimic the // regular tensorflow allocator, which was chosen to play nicely with Eigen. // The tfcompile runtime also has a requirement that comes from the xla // generated code, on the relation: buffer_size >= 16 ? 2 * sizeof(void*) : 8 @@ -39,13 +39,13 @@ TEST(Runtime, AlignedBufferBytes) { EXPECT_EQ(aligned_buffer_bytes(sizesA, 1), 0); static constexpr intptr_t sizesB[1] = {3}; - EXPECT_EQ(aligned_buffer_bytes(sizesB, 1), 32); + EXPECT_EQ(aligned_buffer_bytes(sizesB, 1), 64); static constexpr intptr_t sizesC[1] = {32}; - EXPECT_EQ(aligned_buffer_bytes(sizesC, 1), 32); + EXPECT_EQ(aligned_buffer_bytes(sizesC, 1), 64); static constexpr intptr_t sizesD[7] = {1, -1, 32, -1, 64, 2, 3}; - EXPECT_EQ(aligned_buffer_bytes(sizesD, 7), 192); + EXPECT_EQ(aligned_buffer_bytes(sizesD, 7), 320); } void* add_ptr(void* base, uintptr_t delta) { @@ -101,11 +101,11 @@ TEST(Runtime, MallocFreeContiguousBuffers) { EXPECT_NE(base, nullptr); EXPECT_EQ(bufD[0], add_ptr(base, 0)); EXPECT_EQ(bufD[1], nullptr); - EXPECT_EQ(bufD[2], add_ptr(base, 32)); + EXPECT_EQ(bufD[2], add_ptr(base, 64)); EXPECT_EQ(bufD[3], nullptr); - EXPECT_EQ(bufD[4], add_ptr(base, 64)); - EXPECT_EQ(bufD[5], add_ptr(base, 128)); - EXPECT_EQ(bufD[6], add_ptr(base, 160)); + EXPECT_EQ(bufD[4], add_ptr(base, 128)); + EXPECT_EQ(bufD[5], add_ptr(base, 192)); + EXPECT_EQ(bufD[6], add_ptr(base, 256)); for (int i = 0; i < 7; ++i) { const intptr_t size = sizesD[i]; if (size != -1) { diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index d82922a359..1067b38f93 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -178,6 +178,7 @@ cc_library( ":runtime_matmul", ":runtime_matmul_mkl", ":runtime_single_threaded_conv2d", + ":runtime_single_threaded_fft", ":runtime_single_threaded_matmul", "@llvm//:execution_engine", "@llvm//:core", @@ -516,7 +517,6 @@ cc_library( deps = [ "//tensorflow/compiler/xla:executable_run_options", "//tensorflow/compiler/xla:xla_data_proto", - "//tensorflow/core:framework", "//tensorflow/core:framework_lite", "//third_party/eigen3", ], @@ -578,6 +578,22 @@ cc_library( ], ) +cc_library( + name = "runtime_single_threaded_fft", + srcs = [ + "runtime_fft_impl.h", + "runtime_single_threaded_fft.cc", + ], + hdrs = ["runtime_single_threaded_fft.h"], + copts = runtime_copts(), + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/core:framework_lite", + "//third_party/eigen3", + ], +) + cc_library( name = "runtime_single_threaded_matmul", srcs = ["runtime_single_threaded_matmul.cc"], diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc b/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc index 215405f680..54c52bc08f 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc @@ -51,6 +51,8 @@ extern const char* const kEigenConvF16SymbolName = extern const char* const kEigenConvF32SymbolName = "__xla_cpu_runtime_EigenConvF32"; extern const char* const kEigenFftSymbolName = "__xla_cpu_runtime_EigenFft"; +extern const char* const kEigenSingleThreadedFftSymbolName = + "__xla_cpu_runtime_EigenSingleThreadedFft"; extern const char* const kEigenSingleThreadedMatMulF16SymbolName = "__xla_cpu_runtime_EigenSingleThreadedMatMulF16"; extern const char* const kEigenSingleThreadedMatMulF32SymbolName = diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime.h b/tensorflow/compiler/xla/service/cpu/cpu_runtime.h index 1dce6efa5c..aa0e967123 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_runtime.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime.h @@ -52,6 +52,7 @@ extern const char* const kMKLSingleThreadedMatMulF64SymbolName; extern const char* const kEigenConvF16SymbolName; extern const char* const kEigenConvF32SymbolName; extern const char* const kEigenFftSymbolName; +extern const char* const kEigenSingleThreadedFftSymbolName; extern const char* const kEigenSingleThreadedMatMulF16SymbolName; extern const char* const kEigenSingleThreadedMatMulF32SymbolName; extern const char* const kEigenSingleThreadedMatMulF64SymbolName; diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index 2c20be155f..758b8c62b4 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -1172,7 +1172,13 @@ Status IrEmitter::HandleFft(HloInstruction* fft) { {int8_ptr_type, int8_ptr_type, int8_ptr_type, int32_type, int32_type, int64_type, int64_type, int64_type, int64_type}, /*isVarArg=*/false); - const char* fn_name = runtime::kEigenFftSymbolName; + + bool multi_threaded_eigen = + hlo_module_config_.debug_options().xla_cpu_multi_thread_eigen(); + const char* fn_name = multi_threaded_eigen + ? runtime::kEigenFftSymbolName + : runtime::kEigenSingleThreadedFftSymbolName; + llvm::Function* fft_func = llvm::cast( module_->getOrInsertFunction(fn_name, fft_type)); fft_func->setCallingConv(llvm::CallingConv::C); diff --git a/tensorflow/compiler/xla/service/cpu/runtime_fft_impl.h b/tensorflow/compiler/xla/service/cpu/runtime_fft_impl.h index 984cb0616e..0bf693edd0 100644 --- a/tensorflow/compiler/xla/service/cpu/runtime_fft_impl.h +++ b/tensorflow/compiler/xla/service/cpu/runtime_fft_impl.h @@ -21,8 +21,6 @@ limitations under the License. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/framework/numeric_types.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/platform/types.h" // 'tensorflow' namespace is used so that int64 and other types don't require @@ -71,11 +69,9 @@ void EigenFftR2C(const EigenDevice& device, complex64* out, float* operand, in_dims[0] = input_batch; Eigen::DSizes out_dims; out_dims[0] = input_batch; - TensorShape temp_shape{input_batch}; for (int i = 0; i < FFTRank; i++) { in_dims[i + 1] = fft_shape[i]; out_dims[i + 1] = i == FFTRank - 1 ? fft_shape[i] / 2 + 1 : fft_shape[i]; - temp_shape.AddDim(fft_shape[i]); } const Eigen::TensorMap, Eigen::Aligned> @@ -88,8 +84,8 @@ void EigenFftR2C(const EigenDevice& device, complex64* out, float* operand, const auto axes = Eigen::ArrayXi::LinSpaced(FFTRank, 1, FFTRank); // Compute the full FFT using a temporary tensor. - Tensor temp(DataTypeToEnum::v(), temp_shape); - auto full_fft = temp.flat_inner_dims(); + Eigen::Tensor full_fft(in_dims); + const Eigen::DSizes zero_start_indices; full_fft.device(device) = input.template fft(axes); @@ -112,11 +108,9 @@ void EigenFftC2R(const EigenDevice& device, float* out, complex64* operand, in_dims[0] = input_batch; Eigen::DSizes out_dims; out_dims[0] = input_batch; - TensorShape temp_shape{input_batch}; for (int i = 0; i < FFTRank; i++) { in_dims[i + 1] = i == FFTRank - 1 ? fft_shape[i] / 2 + 1 : fft_shape[i]; out_dims[i + 1] = fft_shape[i]; - temp_shape.AddDim(fft_shape[i]); } const Eigen::TensorMap, Eigen::Aligned> @@ -129,8 +123,7 @@ void EigenFftC2R(const EigenDevice& device, float* out, complex64* operand, // region we will slice from input given fft_shape. We slice input to // fft_shape on its inner-most dimensions, except the last (which we // slice to fft_shape[-1] / 2 + 1). - Tensor temp(DataTypeToEnum::v(), temp_shape); - auto full_fft = temp.flat_inner_dims(); + Eigen::Tensor full_fft(out_dims); // Calculate the starting point and range of the source of // negative frequency part. @@ -179,7 +172,6 @@ template void EigenFftWithRank(const EigenDevice& device, void* out, void* operand, int32 fft_type, int64 input_batch, int64 fft_length0, int64 fft_length1, int64 fft_length2) { - CHECK(::xla::FftType_IsValid(fft_type)) << fft_type; switch (fft_type) { case ::xla::FftType::FFT: EigenFftC2C( @@ -204,7 +196,8 @@ void EigenFftWithRank(const EigenDevice& device, void* out, void* operand, input_batch, fft_length0, fft_length1, fft_length2); break; default: - LOG(FATAL) << "Unsupported FFT type: " << fft_type; + // Unsupported FFT type + abort(); } } @@ -230,7 +223,8 @@ void EigenFftImpl(const EigenDevice& device, void* out, void* operand, fft_length1, fft_length2); break; default: - LOG(FATAL) << "Unsupported FFT rank " << fft_rank; + // Unsupported FFT rank + abort(); } } diff --git a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.cc b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.cc new file mode 100644 index 0000000000..2613ddb127 --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.cc @@ -0,0 +1,32 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.h" + +#include "tensorflow/compiler/xla/service/cpu/runtime_fft_impl.h" +#include "tensorflow/core/platform/dynamic_annotations.h" +#include "tensorflow/core/platform/types.h" + +using tensorflow::int32; +using tensorflow::int64; + +TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenSingleThreadedFft( + const void* run_options_ptr, void* out, void* operand, int32 fft_type, + int32 fft_rank, int64 input_batch, int64 fft_length0, int64 fft_length1, + int64 fft_length2) { + tensorflow::xla::EigenFftImpl(Eigen::DefaultDevice(), out, operand, fft_type, + fft_rank, input_batch, fft_length0, fft_length1, + fft_length2); +} diff --git a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.h b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.h new file mode 100644 index 0000000000..dcd133d012 --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.h @@ -0,0 +1,31 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_SINGLE_THREADED_FFT_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_SINGLE_THREADED_FFT_H_ + +#include "tensorflow/core/platform/types.h" + +extern "C" { + +extern void __xla_cpu_runtime_EigenSingleThreadedFft( + const void* /* xla::ExecutableRunOptions* */ run_options_ptr, void* out, + void* operand, tensorflow::int32 fft_type, tensorflow::int32 fft_rank, + tensorflow::int64 input_batch, tensorflow::int64 fft_length0, + tensorflow::int64 fft_length1, tensorflow::int64 fft_length2); + +} // extern "C" + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_SINGLE_THREADED_FFT_H_ diff --git a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc index 8d8c5e4c44..c4c90515ac 100644 --- a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc +++ b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc @@ -38,6 +38,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/cpu/runtime_matmul.h" #include "tensorflow/compiler/xla/service/cpu/runtime_matmul_mkl.h" #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.h" +#include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.h" #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.h" #include "tensorflow/compiler/xla/service/cpu/windows_compatibility.h" #include "tensorflow/compiler/xla/types.h" @@ -202,6 +203,7 @@ bool RegisterKnownJITSymbols() { REGISTER_CPU_RUNTIME_SYMBOL(MKLSingleThreadedMatMulF64); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF16); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF32); + REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedFft); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF16); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF32); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF64); diff --git a/tensorflow/compiler/xla/service/pattern_matcher.h b/tensorflow/compiler/xla/service/pattern_matcher.h index d3bc47e61e..2515222cf2 100644 --- a/tensorflow/compiler/xla/service/pattern_matcher.h +++ b/tensorflow/compiler/xla/service/pattern_matcher.h @@ -204,7 +204,7 @@ class LayoutPattern { // Modifies the pattern to match only if the layout equals the given proto. // The layout must outlive the returned pattern. constexpr LayoutPattern> EqualTo( - const Layout* layout) const { + const ::xla::Layout* layout) const { return LayoutPattern>( LayoutPatternEqualImpl(impl_, layout), matched_layout_); } diff --git a/tensorflow/compiler/xla/service/tuple_simplifier.cc b/tensorflow/compiler/xla/service/tuple_simplifier.cc index e536c8afbf..77bdcc9de0 100644 --- a/tensorflow/compiler/xla/service/tuple_simplifier.cc +++ b/tensorflow/compiler/xla/service/tuple_simplifier.cc @@ -30,10 +30,17 @@ limitations under the License. namespace xla { +TupleSimplifier::TupleSimplifier(bool exclude_entry_computation) : + exclude_entry_computation_(exclude_entry_computation) {} + StatusOr TupleSimplifier::Run(HloModule* module) { // Initially add all GTE and Tuple instructions to the worklist. std::queue worklist; for (auto* computation : module->computations()) { + if (exclude_entry_computation_ && + computation == module->entry_computation()) { + continue; + } for (auto* instruction : computation->instructions()) { if (instruction->opcode() == HloOpcode::kTuple || instruction->opcode() == HloOpcode::kGetTupleElement) { diff --git a/tensorflow/compiler/xla/service/tuple_simplifier.h b/tensorflow/compiler/xla/service/tuple_simplifier.h index e5e9b10b5b..7509501883 100644 --- a/tensorflow/compiler/xla/service/tuple_simplifier.h +++ b/tensorflow/compiler/xla/service/tuple_simplifier.h @@ -27,13 +27,20 @@ namespace xla { // the module. class TupleSimplifier : public HloPassInterface { public: - TupleSimplifier() {} + TupleSimplifier() : TupleSimplifier(/*exclude_entry_computation=*/false) {} + explicit TupleSimplifier(bool exclude_entry_computation); ~TupleSimplifier() override {} tensorflow::StringPiece name() const override { return "tuple-simplifier"; } // Run tuple simplification on the given computation. Returns whether the // computation was changed. StatusOr Run(HloModule* module) override; + + private: + // When set, this pipeline stage will perform optimization of all computations + // apart from the module's entry computation. This is used by Graphcore's + // backend. + bool exclude_entry_computation_; }; } // namespace xla diff --git a/tensorflow/compiler/xla/service/tuple_simplifier_test.cc b/tensorflow/compiler/xla/service/tuple_simplifier_test.cc index ca9ae91281..d3635eae81 100644 --- a/tensorflow/compiler/xla/service/tuple_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/tuple_simplifier_test.cc @@ -42,6 +42,12 @@ class TupleSimplifierTest : public HloTestBase { TF_ASSERT_OK(changed_status.status()); EXPECT_EQ(change_expected, changed_status.ValueOrDie()); } + void Run(HloModule* module, bool change_expected, bool exclude_entry) { + TupleSimplifier simplifier(exclude_entry); + auto changed_status = simplifier.Run(module); + TF_ASSERT_OK(changed_status.status()); + EXPECT_EQ(change_expected, changed_status.ValueOrDie()); + } const Shape scalar_shape_ = ShapeUtil::MakeShape(F32, {}); const Shape tuple_shape_ = ShapeUtil::MakeTupleShape( @@ -211,5 +217,76 @@ TEST_F(TupleSimplifierTest, IncompatibleTuples) { EXPECT_THAT(computation->root_instruction(), tuple); } +TEST_F(TupleSimplifierTest, CanExcludeEntryComputation) { + // Verify that the root computation can be excluded + auto module = CreateNewModule(); + + HloInstruction* p0; + HloInstruction* p1; + HloComputation* c0; + HloComputation* c1; + HloComputation* entry; + + { + HloComputation::Builder builder(TestName() + "_1"); + p0 = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape_, "param")); + HloInstruction* gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, p0, 0)); + HloInstruction* gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, p0, 1)); + HloInstruction* gte2 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, p0, 2)); + + builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1, gte2})); + + c0 = module->AddEmbeddedComputation(builder.Build()); + } + { + HloComputation::Builder builder(TestName() + "_2"); + p1 = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape_, "param")); + HloInstruction* gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, p1, 0)); + HloInstruction* gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, p1, 1)); + HloInstruction* gte2 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, p1, 2)); + + builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1, gte2})); + + c1 = module->AddEmbeddedComputation(builder.Build()); + } + { + HloComputation::Builder builder(TestName() + "_Entry"); + HloInstruction* tuple_param = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape_, "param")); + HloInstruction* call0 = builder.AddInstruction( + HloInstruction::CreateCall(tuple_shape_, {tuple_param}, c0)); + HloInstruction* call1 = builder.AddInstruction( + HloInstruction::CreateCall(tuple_shape_, {tuple_param}, c1)); + HloInstruction* gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, call0, 0)); + HloInstruction* gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, call1, 1)); + HloInstruction* tuple0 = + builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1})); + HloInstruction* gte2 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, tuple0, 0)); + HloInstruction* gte3 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, tuple0, 1)); + + builder.AddInstruction(HloInstruction::CreateTuple({gte2, gte3})); + + entry = module->AddEntryComputation(builder.Build()); + } + + Run(module.get(), /*change_expected=*/true, /*exclude_entry=*/ true); + + EXPECT_THAT(c0->root_instruction(), p0); + EXPECT_THAT(c1->root_instruction(), p1); + EXPECT_THAT(entry->instruction_count(), 9); +} + } // namespace } // namespace xla diff --git a/tensorflow/contrib/autograph/__init__.py b/tensorflow/contrib/autograph/__init__.py index 8fd83ef376..361cf2d77c 100644 --- a/tensorflow/contrib/autograph/__init__.py +++ b/tensorflow/contrib/autograph/__init__.py @@ -23,6 +23,7 @@ from __future__ import print_function # TODO(mdan): Bring only the relevant symbols to the top level. from tensorflow.contrib.autograph import utils +from tensorflow.contrib.autograph import operators from tensorflow.contrib.autograph.impl.api import convert from tensorflow.contrib.autograph.impl.api import converted_call from tensorflow.contrib.autograph.impl.api import do_not_convert @@ -43,6 +44,8 @@ _allowed_symbols = [ 'do_not_convert', 'to_code', 'to_graph', + # Overloaded operators + 'operators', # Python language "extensions" 'set_element_type', 'set_loop_options', diff --git a/tensorflow/contrib/cmake/tf_c.cmake b/tensorflow/contrib/cmake/tf_c.cmake index bda5e26f43..2e0a2fcef4 100644 --- a/tensorflow/contrib/cmake/tf_c.cmake +++ b/tensorflow/contrib/cmake/tf_c.cmake @@ -37,13 +37,15 @@ add_dependencies( tf_core_lib tf_protos_cc) -add_library(tf_c_python_api OBJECT - "${tensorflow_source_dir}/tensorflow/c/python_api.cc" - "${tensorflow_source_dir}/tensorflow/c/python_api.h" -) -add_dependencies( - tf_c_python_api - tf_c - tf_core_lib - tf_core_framework - tf_protos_cc) +if(tensorflow_BUILD_PYTHON_BINDINGS) + add_library(tf_c_python_api OBJECT + "${tensorflow_source_dir}/tensorflow/c/python_api.cc" + "${tensorflow_source_dir}/tensorflow/c/python_api.h" + ) + add_dependencies( + tf_c_python_api + tf_c + tf_core_lib + tf_core_framework + tf_protos_cc) +endif() diff --git a/tensorflow/contrib/cmake/tf_cc_ops.cmake b/tensorflow/contrib/cmake/tf_cc_ops.cmake index f73da0b8ab..6c90cf398c 100644 --- a/tensorflow/contrib/cmake/tf_cc_ops.cmake +++ b/tensorflow/contrib/cmake/tf_cc_ops.cmake @@ -155,7 +155,7 @@ if (WIN32) set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/pywrap_tensorflow_internal.lib") endif() else (WIN32) - set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/libpywrap_tensorflow_internal.so") + set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/libpywrap_tensorflow_internal${CMAKE_SHARED_LIBRARY_SUFFIX}") endif (WIN32) add_custom_target(tf_extension_ops) diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index a0c3ddd28b..9244604489 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -715,7 +715,7 @@ if(WIN32) endif() else() add_custom_command(TARGET pywrap_tensorflow_internal POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/libpywrap_tensorflow_internal.so + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/libpywrap_tensorflow_internal${CMAKE_SHARED_LIBRARY_SUFFIX} ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/_pywrap_tensorflow_internal.so) endif() @@ -832,7 +832,6 @@ add_custom_command(TARGET tf_python_build_pip_package POST_BUILD add_custom_command(TARGET tf_python_copy_scripts_to_destination PRE_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${tensorflow_source_dir}/tensorflow/contrib/testing/python/framework/util_test.py ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/testing/python/framework/) - add_custom_command(TARGET tf_python_build_pip_package POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${tensorflow_source_dir}/tensorflow/tools/pip_package/README ${CMAKE_CURRENT_BINARY_DIR}/tf_python/) diff --git a/tensorflow/contrib/cmake/tools/create_def_file.py b/tensorflow/contrib/cmake/tools/create_def_file.py index cffe069aa3..4f957f1e0b 100644 --- a/tensorflow/contrib/cmake/tools/create_def_file.py +++ b/tensorflow/contrib/cmake/tools/create_def_file.py @@ -44,7 +44,8 @@ UNDNAME = "undname.exe" DUMPBIN = "dumpbin.exe" # Exclude if matched -EXCLUDE_RE = re.compile(r"RTTI|deleting destructor|::internal::") +EXCLUDE_RE = re.compile(r"RTTI|deleting destructor|::internal::|Internal|" + r"python_op_gen_internal|grappler") # Include if matched before exclude INCLUDEPRE_RE = re.compile(r"google::protobuf::internal::ExplicitlyConstructed|" @@ -56,6 +57,10 @@ INCLUDEPRE_RE = re.compile(r"google::protobuf::internal::ExplicitlyConstructed|" r"tensorflow::ops::internal::Enter|" r"tensorflow::strings::internal::AppendPieces|" r"tensorflow::strings::internal::CatPieces|" + r"tensorflow::errors::Internal|" + r"tensorflow::Tensor::CopyFromInternal|" + r"tensorflow::kernel_factory::" + r"OpKernelRegistrar::InitInternal|" r"tensorflow::io::internal::JoinPathImpl") # Include if matched after exclude @@ -64,7 +69,7 @@ INCLUDE_RE = re.compile(r"^(TF_\w*)$|" r"tensorflow::|" r"functor::|" r"\?nsync_|" - r"perftools::gputools") + r"stream_executor::") # We want to identify data members explicitly in the DEF file, so that no one # can implicitly link against the DLL if they use one of the variables exported diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_bijector_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_bijector_test.py index 45760a29ee..795f1993ba 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_bijector_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_bijector_test.py @@ -151,16 +151,24 @@ class SinhArcsinhBijectorTest(test.TestCase): self.assertAllClose(y, bijector.forward(x).eval(), rtol=1e-4, atol=0.) self.assertAllClose(x, bijector.inverse(y).eval(), rtol=1e-4, atol=0.) - # Do the numpy calculation in float128 to avoid inf/nan. - y_float128 = np.float128(y) - self.assertAllClose( - np.log(np.cosh( - np.arcsinh(y_float128) / tailweight - skewness) / np.sqrt( - y_float128**2 + 1)) - - np.log(tailweight), - bijector.inverse_log_det_jacobian(y, event_ndims=0).eval(), - rtol=1e-4, - atol=0.) + # On IBM PPC systems, longdouble (np.float128) is same as double except that it can have more precision. + # Type double being of 8 bytes, can't hold square of max of float64 (which is also 8 bytes) and + # below test fails due to overflow error giving inf. So this check avoids that error by skipping square + # calculation and corresponding assert. + + if np.amax(y) <= np.sqrt(np.finfo(np.float128).max) and \ + np.fabs(np.amin(y)) <= np.sqrt(np.fabs(np.finfo(np.float128).min)): + + # Do the numpy calculation in float128 to avoid inf/nan. + y_float128 = np.float128(y) + self.assertAllClose( + np.log(np.cosh( + np.arcsinh(y_float128) / tailweight - skewness) / np.sqrt( + y_float128**2 + 1)) - + np.log(tailweight), + bijector.inverse_log_det_jacobian(y, event_ndims=0).eval(), + rtol=1e-4, + atol=0.) self.assertAllClose( -bijector.inverse_log_det_jacobian(y, event_ndims=0).eval(), bijector.forward_log_det_jacobian(x, event_ndims=0).eval(), diff --git a/tensorflow/contrib/eager/python/datasets.py b/tensorflow/contrib/eager/python/datasets.py index d7909dd5a2..adf92c27ea 100644 --- a/tensorflow/contrib/eager/python/datasets.py +++ b/tensorflow/contrib/eager/python/datasets.py @@ -106,7 +106,8 @@ class Iterator(iterator_ops.EagerIterator, checkpointable.CheckpointableBase): target_device=target, buffer_size=10, container="", - shared_name=_generate_shared_name("function_buffer_resource")) + shared_name=_generate_shared_name( + "contrib_eager_iterator_function_buffer_resource")) self._buffer_resource_deleter = resource_variable_ops.EagerResourceDeleter( # pylint: disable=line-too-long handle=self._buffer_resource_handle, handle_device=self._device) diff --git a/tensorflow/contrib/eager/python/examples/notebooks/4_high_level.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/4_high_level.ipynb index 4fe3a0e3f3..5749f22ac5 100644 --- a/tensorflow/contrib/eager/python/examples/notebooks/4_high_level.ipynb +++ b/tensorflow/contrib/eager/python/examples/notebooks/4_high_level.ipynb @@ -68,7 +68,7 @@ "# simply construct the object. Most layers take as a first argument the number\n", "# of output dimensions / channels.\n", "layer = tf.keras.layers.Dense(100)\n", - "# The number of input dimensionss is often unnecessary, as it can be inferred\n", + "# The number of input dimensions is often unnecessary, as it can be inferred\n", "# the first time the layer is used, but it can be provided if you want to \n", "# specify it manually, which is useful in some complex models.\n", "layer = tf.keras.layers.Dense(10, input_shape=(None, 5))" @@ -267,7 +267,7 @@ " * `build`, where you know the shapes of the input tensors and can do the rest of the initialization\n", " * `call`, where you do the forward computation\n", "\n", - "Note that you don't have to wait until `build` is called to create your variables, you can also create them in `__init__`. However, the advantage of creating them in `build` is that it enables late variable creation based on the shape of the inputs the layer will operate on. On the other hand, creating variables in `__init__` would mean that shapes requires to create the variables will need to be explicitly specified." + "Note that you don't have to wait until `build` is called to create your variables, you can also create them in `__init__`. However, the advantage of creating them in `build` is that it enables late variable creation based on the shape of the inputs the layer will operate on. On the other hand, creating variables in `__init__` would mean that shapes required to create the variables will need to be explicitly specified." ] }, { diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py index 84a413c791..05bcdac2ca 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py @@ -346,7 +346,8 @@ def sequence_numeric_column( key, shape=(1,), default_value=0., - dtype=dtypes.float32): + dtype=dtypes.float32, + normalizer_fn=None): """Returns a feature column that represents sequences of numeric data. Example: @@ -370,6 +371,12 @@ def sequence_numeric_column( default_value: A single value compatible with `dtype` that is used for padding the sparse data into a dense `Tensor`. dtype: The type of values. + normalizer_fn: If not `None`, a function that can be used to normalize the + value of the tensor after `default_value` is applied for parsing. + Normalizer function takes the input `Tensor` as its argument, and returns + the output `Tensor`. (e.g. lambda x: (x - 3.0) / 4.2). Please note that + even though the most common use case of this function is normalization, it + can be used for any kind of Tensorflow transformations. Returns: A `_SequenceNumericColumn`. @@ -383,12 +390,16 @@ def sequence_numeric_column( if not (dtype.is_integer or dtype.is_floating): raise ValueError('dtype must be convertible to float. ' 'dtype: {}, key: {}'.format(dtype, key)) + if normalizer_fn is not None and not callable(normalizer_fn): + raise TypeError( + 'normalizer_fn must be a callable. Given: {}'.format(normalizer_fn)) return _SequenceNumericColumn( key, shape=shape, default_value=default_value, - dtype=dtype) + dtype=dtype, + normalizer_fn=normalizer_fn) def _assert_all_equal_and_return(tensors, name=None): @@ -407,7 +418,7 @@ class _SequenceNumericColumn( fc._SequenceDenseColumn, collections.namedtuple( '_SequenceNumericColumn', - ['key', 'shape', 'default_value', 'dtype'])): + ['key', 'shape', 'default_value', 'dtype', 'normalizer_fn'])): """Represents sequences of numeric data.""" @property @@ -419,7 +430,10 @@ class _SequenceNumericColumn( return {self.key: parsing_ops.VarLenFeature(self.dtype)} def _transform_feature(self, inputs): - return inputs.get(self.key) + input_tensor = inputs.get(self.key) + if self.normalizer_fn is not None: + input_tensor = self.normalizer_fn(input_tensor) + return input_tensor @property def _variable_shape(self): diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py index ee74cf56dc..45d7b74046 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py @@ -28,6 +28,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor +from tensorflow.python.ops import sparse_ops from tensorflow.python.platform import test from tensorflow.python.training import monitored_session @@ -947,6 +948,7 @@ class SequenceNumericColumnTest(test.TestCase): self.assertEqual((1,), a.shape) self.assertEqual(0., a.default_value) self.assertEqual(dtypes.float32, a.dtype) + self.assertIsNone(a.normalizer_fn) def test_shape_saved_as_tuple(self): a = sfc.sequence_numeric_column('aaa', shape=[1, 2]) @@ -965,6 +967,10 @@ class SequenceNumericColumnTest(test.TestCase): ValueError, 'dtype must be convertible to float'): sfc.sequence_numeric_column('aaa', dtype=dtypes.string) + def test_normalizer_fn_must_be_callable(self): + with self.assertRaisesRegexp(TypeError, 'must be a callable'): + sfc.sequence_numeric_column('aaa', normalizer_fn='NotACallable') + def test_get_sequence_dense_tensor(self): sparse_input = sparse_tensor.SparseTensorValue( # example 0, values [[0.], [1]] @@ -985,6 +991,41 @@ class SequenceNumericColumnTest(test.TestCase): self.assertAllEqual( expected_dense_tensor, dense_tensor.eval(session=sess)) + def test_get_sequence_dense_tensor_with_normalizer_fn(self): + + def _increment_two(input_sparse_tensor): + return sparse_ops.sparse_add( + input_sparse_tensor, + sparse_tensor.SparseTensor(((0, 0), (1, 1)), (2.0, 2.0), (2, 2)) + ) + + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[0.], [1]] + # example 1, [[10.]] + indices=((0, 0), (0, 1), (1, 0)), + values=(0., 1., 10.), + dense_shape=(2, 2)) + + # Before _increment_two: + # [[0.], [1.]], + # [[10.], [0.]], + # After _increment_two: + # [[2.], [1.]], + # [[10.], [2.]], + expected_dense_tensor = [ + [[2.], [1.]], + [[10.], [2.]], + ] + numeric_column = sfc.sequence_numeric_column( + 'aaa', normalizer_fn=_increment_two) + + dense_tensor, _ = numeric_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_dense_tensor, dense_tensor.eval(session=sess)) + def test_get_sequence_dense_tensor_with_shape(self): """Tests get_sequence_dense_tensor with shape !=(1,).""" sparse_input = sparse_tensor.SparseTensorValue( diff --git a/tensorflow/contrib/ffmpeg/__init__.py b/tensorflow/contrib/ffmpeg/__init__.py index daba965a98..484ffee3e7 100644 --- a/tensorflow/contrib/ffmpeg/__init__.py +++ b/tensorflow/contrib/ffmpeg/__init__.py @@ -28,7 +28,6 @@ from __future__ import print_function from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_audio from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_video from tensorflow.contrib.ffmpeg.ffmpeg_ops import encode_audio -from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_video from tensorflow.python.util.all_util import remove_undocumented diff --git a/tensorflow/contrib/ffmpeg/ffmpeg_ops.py b/tensorflow/contrib/ffmpeg/ffmpeg_ops.py index 020b5c99c6..b1b5126d9e 100644 --- a/tensorflow/contrib/ffmpeg/ffmpeg_ops.py +++ b/tensorflow/contrib/ffmpeg/ffmpeg_ops.py @@ -21,7 +21,6 @@ from __future__ import print_function from tensorflow.contrib.ffmpeg.ops import gen_decode_audio_op_py from tensorflow.contrib.ffmpeg.ops import gen_decode_video_op_py from tensorflow.contrib.ffmpeg.ops import gen_encode_audio_op_py -from tensorflow.contrib.ffmpeg.ops import gen_decode_video_op_py from tensorflow.contrib.util import loader from tensorflow.python.framework import ops from tensorflow.python.platform import resource_loader diff --git a/tensorflow/contrib/framework/__init__.py b/tensorflow/contrib/framework/__init__.py index 10d1ecc738..dc49383c5c 100644 --- a/tensorflow/contrib/framework/__init__.py +++ b/tensorflow/contrib/framework/__init__.py @@ -119,14 +119,13 @@ from tensorflow.python.framework.smart_cond import smart_cond from tensorflow.python.framework.smart_cond import smart_constant_value from tensorflow.python.framework.tensor_spec import BoundedTensorSpec from tensorflow.python.framework.tensor_spec import TensorSpec -from tensorflow.python.ops.array_ops import broadcast_to from tensorflow.python.ops.init_ops import convolutional_delta_orthogonal from tensorflow.python.ops.init_ops import convolutional_orthogonal_1d from tensorflow.python.ops.init_ops import convolutional_orthogonal_2d from tensorflow.python.ops.init_ops import convolutional_orthogonal_3d from tensorflow.python.util.all_util import remove_undocumented -_allowed_symbols = ['nest', 'broadcast_to'] +_allowed_symbols = ['nest'] _nest_allowed_symbols = [ 'assert_same_structure', 'is_sequence', diff --git a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py index 65cb94b5a4..a955e21b72 100644 --- a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py +++ b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py @@ -301,8 +301,8 @@ class FusedConv2DBiasActivationTest(test.TestCase): conv = tensors[i] value = values[i] ref_value = ref_values[i] - print("expected = ", ref_value) - print("actual = ", value) + tf_logging.info("expected = ", ref_value) + tf_logging.info("actual = ", value) tol = 1e-5 if value.dtype == np.float16: tol = 1e-3 @@ -843,7 +843,8 @@ class FusedConvInt8Tests(test.TestCase): vertical_stride, padding_type) output_width = CalculateConvolvedOutputDim(input_width, filter_width, horizontal_stride, padding_type) - print("output_height=", output_height, ", output_width=", output_width) + tf_logging.info("output_height=", output_height, ", output_width=", + output_width) side_input, _, _ = gen_array_ops.quantize_v2( random_ops.random_uniform( @@ -880,8 +881,8 @@ class FusedConvInt8Tests(test.TestCase): with self.test_session( use_gpu=True, config=NoMemoryOptimizationConfig()) as sess: actual_y, expected_y = sess.run([actual, expected]) - print("actual_y = ", actual_y) - print("expected_y = ", expected_y) + tf_logging.info("actual_y = ", actual_y) + tf_logging.info("expected_y = ", expected_y) self.assertTrue(np.array_equal(actual_y, expected_y)) def testFusedConvInt8(self): diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_impl/hexagon_controller.c b/tensorflow/contrib/hvx/hexagon_controller/src_impl/hexagon_controller.c index 6a5d982dc8..2e5c84704f 100644 --- a/tensorflow/contrib/hvx/hexagon_controller/src_impl/hexagon_controller.c +++ b/tensorflow/contrib/hvx/hexagon_controller/src_impl/hexagon_controller.c @@ -19,7 +19,7 @@ limitations under the License. #include "hexagon_controller.h" -#include +#include #include #include "adspmsgd.h" diff --git a/tensorflow/contrib/lite/download_dependencies.sh b/tensorflow/contrib/lite/download_dependencies.sh index 436c3e1d4c..840015a7fa 100755 --- a/tensorflow/contrib/lite/download_dependencies.sh +++ b/tensorflow/contrib/lite/download_dependencies.sh @@ -30,9 +30,7 @@ if [ ! -f $BZL_FILE_PATH ]; then fi EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)" -# TODO (yongtang): Replace the following with 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' once -# the archive has been propagated in mirror.bazel.build. -GEMMLOWP_URL="$(grep -o 'https://github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" +GEMMLOWP_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz" ABSL_URL="$(grep -o 'https://github.com/abseil/abseil-cpp/.*tar.gz' "${BZL_FILE_PATH}" | head -n1)" NEON_2_SSE_URL="https://github.com/intel/ARM_NEON_2_x86_SSE/archive/master.zip" diff --git a/tensorflow/contrib/lite/examples/minimal/minimal.cc b/tensorflow/contrib/lite/examples/minimal/minimal.cc index 106e3b0270..8b0ace96cc 100644 --- a/tensorflow/contrib/lite/examples/minimal/minimal.cc +++ b/tensorflow/contrib/lite/examples/minimal/minimal.cc @@ -38,7 +38,7 @@ using namespace tflite; int main(int argc, char *argv[]) { if(argc != 2) { - fprintf(stderr, "Usage: %s \n"); + fprintf(stderr, "minimal \n"); return 1; } const char* filename = argv[1]; diff --git a/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md b/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md index bb2e615eac..965273f0f0 100644 --- a/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md +++ b/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md @@ -128,7 +128,6 @@ TensorFlow operation not listed above are likely unsupported. Notably, the following common ops are not supported at the moment: * [tf.depth_to_space](https://www.tensorflow.org/api_docs/python/tf/depth_to_space) -* [tf.gather](https://www.tensorflow.org/api_docs/python/tf/gather) * [tf.image.resize_bilinear](https://www.tensorflow.org/api_docs/python/tf/image/resize_bilinear) * [tf.tanh](https://www.tensorflow.org/api_docs/python/tf/tanh) @@ -306,6 +305,19 @@ Options { } ``` +**GATHER** + +``` +Inputs { + 0: params tensor + 1: indices tensor + 2: axis tensor (optional) +} +Outputs { + 0: a tensor with same type as the params tensor. +} +``` + **GREATER** ``` diff --git a/tensorflow/contrib/lite/java/ovic/README.md b/tensorflow/contrib/lite/java/ovic/README.md index 5efa70987e..26349347fa 100644 --- a/tensorflow/contrib/lite/java/ovic/README.md +++ b/tensorflow/contrib/lite/java/ovic/README.md @@ -2,7 +2,7 @@ This folder contains building code for track one of the [Low Power ImageNet Recognition Challenge workshop at CVPR 2018.](https://rebootingcomputing.ieee.org/home/sitemap/14-lpirc/80-low-power-image-recognition-challenge-lpirc-2018) -## Pre-requesits +## Pre-requisite Follow the steps [here](https://www.tensorflow.org/mobile/tflite/demo_android) to install Tensorflow, Bazel, and the Android NDK and SDK. @@ -49,7 +49,7 @@ Once you have a submission that follows the instructions from the [competition s You can call the validator binary below to verify that your model fits the format requirements. This often helps you to catch size mismatches (e.g. output should be [1, 1001] instead of [1,1,1,1001]). Let say the submission file is located at `/path/to/my_model.lite`, then call: ```sh -bazel build --cxxopt--std=c++11 //tensorflow/contrib/lite/java/ovic:ovic_validator --cxxopt=-Wno-all +bazel build --cxxopt=--std=c++11 //tensorflow/contrib/lite/java/ovic:ovic_validator --cxxopt=-Wno-all bazel-bin/tensorflow/contrib/lite/java/ovic/ovic_validator /path/to/my_model.lite ``` diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index a2f192bbc2..1908f7fa6c 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -1934,7 +1934,7 @@ inline void LstmCell(const float* input_data, const Dims<4>& input_dims, // The quantization of the input, output arrays is as follows: // - The input activations are quantized as uint8 on the interval // [-1, 127/128]. -// The rationale for that is that that is the natural interval for output +// The rationale for that is that is the natural interval for output // activations (see next point) and these need to be concatenated together. // We could accommodate different ranges by re-scaling, but we empirically // found that setting the input activations range to be [-1, 127/128] in the @@ -1999,7 +1999,7 @@ inline void LstmCell(const float* input_data, const Dims<4>& input_dims, // However, for a fixed-point implementation in 16-bit integers, using 5 // integer bits to represent the [-16, 16] range would leave only 11 // fractional bits, giving an increment of 2^-11 = 4.9e-4 between consecutive -// representable values. Notice that that is higher than the +// representable values. Notice that is higher than the // worst-case clamping error with clamping to [-8, 8]: 3.4e-4 for Logistic. // Using [-8, 8] thus seems like the better compromise overall, enjoying // an increment of 2.4e-4 between representable values and a worst-case diff --git a/tensorflow/contrib/lite/python/interpreter.py b/tensorflow/contrib/lite/python/interpreter.py index 9400e757b9..fd90823425 100644 --- a/tensorflow/contrib/lite/python/interpreter.py +++ b/tensorflow/contrib/lite/python/interpreter.py @@ -55,7 +55,7 @@ class Interpreter(object): elif model_content and not model_path: self._interpreter = ( _interpreter_wrapper.InterpreterWrapper_CreateWrapperCPPFromBuffer( - model_content, len(model_content))) + model_content)) if not self._interpreter: raise ValueError( 'Failed to create model from {} bytes'.format(len(model_content))) diff --git a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc index f705551fcb..b283551c45 100644 --- a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc +++ b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc @@ -397,9 +397,14 @@ InterpreterWrapper* InterpreterWrapper::CreateWrapperCPPFromFile( } InterpreterWrapper* InterpreterWrapper::CreateWrapperCPPFromBuffer( - const char* data, size_t len) { + PyObject* data) { + char * buf = nullptr; + Py_ssize_t length; + if (PY_TO_CPPSTRING(data, &buf, &length) == -1) { + return nullptr; + } std::unique_ptr model = - tflite::FlatBufferModel::BuildFromBuffer(data, len); + tflite::FlatBufferModel::BuildFromBuffer(buf, length); return model ? new InterpreterWrapper(std::move(model)) : nullptr; } diff --git a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h index b0ed7c4559..cbeb53bee7 100644 --- a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h +++ b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h @@ -40,8 +40,7 @@ class InterpreterWrapper { static InterpreterWrapper* CreateWrapperCPPFromFile(const char* model_path); // SWIG caller takes ownership of pointer. - static InterpreterWrapper* CreateWrapperCPPFromBuffer(const char* data, - size_t len); + static InterpreterWrapper* CreateWrapperCPPFromBuffer(PyObject* data); ~InterpreterWrapper(); bool AllocateTensors(); diff --git a/tensorflow/contrib/lite/python/lite.py b/tensorflow/contrib/lite/python/lite.py index 0913cd2c5c..88dda7290b 100644 --- a/tensorflow/contrib/lite/python/lite.py +++ b/tensorflow/contrib/lite/python/lite.py @@ -34,6 +34,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from six import PY3 + from google.protobuf import text_format as _text_format from google.protobuf.message import DecodeError from tensorflow.contrib.lite.python import lite_constants as constants @@ -54,6 +56,7 @@ from tensorflow.python.framework.importer import import_graph_def from tensorflow.python.ops.variables import global_variables_initializer from tensorflow.python.saved_model import signature_constants from tensorflow.python.saved_model import tag_constants +# from tensorflow.python.util.all_util import remove_undocumented class TocoConverter(object): @@ -203,6 +206,12 @@ class TocoConverter(object): except (_text_format.ParseError, DecodeError): try: print("Ignore 'tcmalloc: large alloc' warnings.") + + if not isinstance(file_content, str): + if PY3: + file_content = file_content.decode('utf-8') + else: + file_content = file_content.encode('utf-8') _text_format.Merge(file_content, graph_def) except (_text_format.ParseError, DecodeError): raise ValueError( @@ -382,3 +391,5 @@ def _freeze_graph(sess, output_tensors): output_arrays) else: return sess.graph_def + +# remove_undocumented(__name__) diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc index 4465f953ba..caca199d2e 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc @@ -178,7 +178,7 @@ ArrayDataType ConvertDataType(tensorflow::DataType dtype) { else if (dtype == DT_STRING) return ArrayDataType::kString; else - LOG(INFO) << "Unsupported data type in placehoder op: " << dtype; + LOG(INFO) << "Unsupported data type in placeholder op: " << dtype; return ArrayDataType::kNone; } diff --git a/tensorflow/contrib/lite/toco/toco_port.cc b/tensorflow/contrib/lite/toco/toco_port.cc index 1b21c8bc60..de76fd4032 100644 --- a/tensorflow/contrib/lite/toco/toco_port.cc +++ b/tensorflow/contrib/lite/toco/toco_port.cc @@ -20,6 +20,12 @@ limitations under the License. #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/logging.h" +#if defined(__ANDROID__) && defined(__ARM_ARCH_7A__) +namespace std { +double round(double x) { return ::round(x); } +} // namespace std +#endif + namespace toco { namespace port { void CopyToBuffer(const string& src, char* dest) { diff --git a/tensorflow/contrib/lite/toco/toco_port.h b/tensorflow/contrib/lite/toco/toco_port.h index 5c019cb2bf..17f82b9dd7 100644 --- a/tensorflow/contrib/lite/toco/toco_port.h +++ b/tensorflow/contrib/lite/toco/toco_port.h @@ -34,6 +34,24 @@ limitations under the License. #define TFLITE_PROTO_NS google::protobuf #endif +#ifdef __ANDROID__ +#include +namespace std { + +template +std::string to_string(T value) +{ + std::ostringstream os ; + os << value ; + return os.str() ; +} + +#ifdef __ARM_ARCH_7A__ +double round(double x); +#endif +} +#endif + namespace toco { namespace port { diff --git a/tensorflow/contrib/makefile/compile_nsync.sh b/tensorflow/contrib/makefile/compile_nsync.sh index e8c6edd7ba..a28fc3a87f 100755 --- a/tensorflow/contrib/makefile/compile_nsync.sh +++ b/tensorflow/contrib/makefile/compile_nsync.sh @@ -270,7 +270,7 @@ for arch in $archs; do PLATFORM_LDFLAGS=-pthread MKDEP=${CC} -M -std=c++11 PLATFORM_C=../../platform/c++11/src/nsync_semaphore_mutex.cc \ - ../../platform/c++11/src/per_thread_waiter.cc \ + ../../platform/posix/src/per_thread_waiter.c \ ../../platform/c++11/src/yield.cc \ ../../platform/c++11/src/time_rep_timespec.cc \ ../../platform/c++11/src/nsync_panic.cc diff --git a/tensorflow/contrib/makefile/download_dependencies.sh b/tensorflow/contrib/makefile/download_dependencies.sh index eff9081e35..48953e2e38 100755 --- a/tensorflow/contrib/makefile/download_dependencies.sh +++ b/tensorflow/contrib/makefile/download_dependencies.sh @@ -27,9 +27,7 @@ if [ ! -f $BZL_FILE_PATH ]; then fi EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)" -# TODO (yongtang): Replace the following with 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' once -# the archive has been propagated in mirror.bazel.build. -GEMMLOWP_URL="$(grep -o 'https://github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" +GEMMLOWP_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz" NSYNC_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/nsync/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)" PROTOBUF_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/protobuf/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)" diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py index 2ed99d50a4..a6be2084aa 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py @@ -2503,7 +2503,7 @@ def _compute_recall_at_precision(tp, fp, fn, precision, name): name: An optional variable_scope name. Returns: - The recall at a the given `precision`. + The recall at a given `precision`. """ precisions = math_ops.div(tp, tp + fp + _EPSILON) tf_index = math_ops.argmin( diff --git a/tensorflow/contrib/mpi_collectives/kernels/ring.h b/tensorflow/contrib/mpi_collectives/kernels/ring.h index 1d56d588bc..c001615d3f 100644 --- a/tensorflow/contrib/mpi_collectives/kernels/ring.h +++ b/tensorflow/contrib/mpi_collectives/kernels/ring.h @@ -129,7 +129,7 @@ cudaStream_t CudaStreamForMPI(); * has the fully accumulated Segment 1; and so on. The scatter-reduce is * complete. * - * Next, the allgather distributes these fully accumululated chunks across all + * Next, the allgather distributes these fully accumulated chunks across all * nodes. Communication proceeds in the same ring, once again in N-1 steps. At * the ith step, node j will send chunk (j - i + 1) and receive chunk (j - i). * For example, at the first iteration, the following transfers will occur: diff --git a/tensorflow/contrib/opt/python/training/adamax_test.py b/tensorflow/contrib/opt/python/training/adamax_test.py index 21bf3f5313..915e6504e1 100644 --- a/tensorflow/contrib/opt/python/training/adamax_test.py +++ b/tensorflow/contrib/opt/python/training/adamax_test.py @@ -224,8 +224,10 @@ class AdaMaxOptimizerTest(test.TestCase): var1_np, m1, v1 = adamax_update_numpy(var1_np, grads1_np, t, m1, v1) # Validate updated params - self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) - self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) + self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0), + rtol=1e-2) + self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1), + rtol=1e-2) if use_resource: self.assertEqual("var0_%d/AdaMax:0" % (i,), opt.get_slot(var=var0, name="m").name) diff --git a/tensorflow/contrib/opt/python/training/model_average_optimizer.py b/tensorflow/contrib/opt/python/training/model_average_optimizer.py index a7c97a1da2..b6b10e500b 100644 --- a/tensorflow/contrib/opt/python/training/model_average_optimizer.py +++ b/tensorflow/contrib/opt/python/training/model_average_optimizer.py @@ -62,7 +62,7 @@ class ModelAverageCustomGetter(object): """ def __init__(self, worker_device): - """Create a new `ElasticAverageCustomGetter`. + """Create a new `ModelAverageCustomGetter`. Args: worker_device: String. Name of the `worker` job. diff --git a/tensorflow/contrib/periodic_resample/BUILD b/tensorflow/contrib/periodic_resample/BUILD index 6ca7fe8b6e..f2171efc95 100644 --- a/tensorflow/contrib/periodic_resample/BUILD +++ b/tensorflow/contrib/periodic_resample/BUILD @@ -6,12 +6,13 @@ exports_files(["LICENSE"]) load( "//tensorflow:tensorflow.bzl", - "py_test", + "tf_cc_test", "tf_gen_op_libs", "tf_custom_op_library", "tf_custom_op_py_library", "tf_gen_op_wrapper_py", ) +load("//tensorflow:tensorflow.bzl", "py_test") cc_library( name = "all_ops", @@ -84,6 +85,22 @@ py_test( ":init_py", "//tensorflow/contrib/util:util_py", "//tensorflow/python:framework_test_lib", + "//tensorflow/python:gradient_checker", + ], +) + +tf_cc_test( + name = "periodic_resample_op_cc_test", + size = "small", + srcs = [ + "ops/array_ops_test.cc", + ], + deps = [ + ":all_ops", + "//tensorflow/core:framework", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", ], ) diff --git a/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.cc b/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.cc index e18923c8aa..514689cf45 100644 --- a/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.cc +++ b/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.cc @@ -22,4 +22,9 @@ namespace tensorflow { REGISTER_KERNEL_BUILDER(Name("PeriodicResample").Device(DEVICE_CPU), PeriodicResampleOp); + +REGISTER_KERNEL_BUILDER(Name("PeriodicResampleOpGrad") + .Device(DEVICE_CPU), + PeriodicResampleOpGrad); + } // namespace tensorflow diff --git a/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h b/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h index 3ab588c458..42fba81a5c 100644 --- a/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h +++ b/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h @@ -25,92 +25,202 @@ #include "tensorflow/core/framework/shape_inference.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/util/work_sharder.h" namespace { -template -IndexT compute_input_index( - IndexVecT* target_dimensions, const IndexT& output_index, - const IndexVecT& original_dimensions, const int& adjustable_dimension, - const std::vector& dimension_ceiling, - const std::vector& cumulative_dimensions, IndexT* result, - std::vector* output_indices, const int& rank) { - *result = 0; - output_indices->clear(); +// Computes input tensor index for given output index during forward +// propagation through periodic_resample operation. +class InputIndexer { + public: + InputIndexer(const std::vector& output_dimensions, + const tensorflow::TensorShape& input_shape, + int adjustable_dimension) + : output_dimensions_(output_dimensions), + adjustable_dimension_(adjustable_dimension), + rank_(input_shape.dims()), + linear_output_index_(0), + linear_input_index_(0), + adjustable_dimension_carriage_sum_(0) { + auto input_dimensions = TensorShapeToVector(input_shape); + // factors by which input_dimensions increases/decreases w.r.t. + // output_dimensions + dimension_ceiling_ = + ComputeDimensionCeiling(output_dimensions, input_dimensions); + cumulative_dimensions_ = ComputeCumulativeDimensions(); + + output_indices_.resize(output_dimensions_.size()); + input_indices_.resize(output_dimensions_.size()); + + // Compute index_factors + index_factors_.resize(rank_); + tensorflow::int64 last_index_factor = 1; + for (auto r = rank_ - 1; r >= 0; --r) { + index_factors_[r] = last_index_factor; + last_index_factor *= input_dimensions[r]; + } + } + + tensorflow::int64 linear_input_index() const { return linear_input_index_; } + + void MoveToOutputIndex(tensorflow::int64 output_index); + void IncrementOutputIndex(); + + private: + void RecomputeInputAdjustableDimensionIndex() { + tensorflow::int64 index = adjustable_dimension_carriage_sum_; + index *= output_dimensions_[adjustable_dimension_]; + index += output_indices_[adjustable_dimension_]; + input_indices_[adjustable_dimension_] = index; + } + + std::vector TensorShapeToVector( + const tensorflow::TensorShape& tensor_shape); + + std::vector ComputeDimensionCeiling( + const std::vector& output_dimensions, + const std::vector& input_dimensions); + + std::vector ComputeCumulativeDimensions(); + + const std::vector output_dimensions_; + std::vector dimension_ceiling_; + std::vector index_factors_; + std::vector cumulative_dimensions_; + std::vector output_indices_; + std::vector input_indices_; + + const int adjustable_dimension_; + const int rank_; + tensorflow::int64 linear_output_index_; + tensorflow::int64 linear_input_index_; + tensorflow::int64 adjustable_dimension_carriage_sum_; +}; + +void InputIndexer::MoveToOutputIndex(tensorflow::int64 output_index) { + linear_output_index_ = output_index; + linear_input_index_ = 0; // un-rasterize the output index auto last_reduced_i = output_index; - for (auto r = rank - 1; r >= 0; --r) { - (*output_indices)[r] = last_reduced_i % (*target_dimensions)[r]; + for (auto r = rank_ - 1; r >= 0; --r) { + output_indices_[r] = last_reduced_i % output_dimensions_[r]; last_reduced_i = - (last_reduced_i - (*output_indices)[r]) / (*target_dimensions)[r]; + (last_reduced_i - output_indices_[r]) / output_dimensions_[r]; } + tensorflow::int64 carriage_sum = 0; + for (int qi = 0; qi < rank_; ++qi) { + if (qi == adjustable_dimension_) continue; + carriage_sum += cumulative_dimensions_[qi] * + (output_indices_[qi] % dimension_ceiling_[qi]); + } + adjustable_dimension_carriage_sum_ = carriage_sum; + // rasterize the input index - IndexT last_index_factor = 1; - for (auto r = rank - 1; r >= 0; --r) { - IndexT index = 0; - if (r != adjustable_dimension) - index = (*output_indices)[r] / dimension_ceiling[r]; - else { - for (int qi = 0; qi < rank; ++qi) { - if (qi == adjustable_dimension) continue; - index += cumulative_dimensions[qi] * - ((*output_indices)[qi] % dimension_ceiling[qi]); - } - index *= (*target_dimensions)[adjustable_dimension]; - index += (*output_indices)[r]; + for (auto r = rank_ - 1; r >= 0; --r) { + if (r != adjustable_dimension_) { + input_indices_[r] = output_indices_[r] / dimension_ceiling_[r]; + } else { + RecomputeInputAdjustableDimensionIndex(); } - *result += last_index_factor * index; - last_index_factor *= original_dimensions[r]; } + for (auto r = rank_ - 1; r >= 0; --r) { + linear_input_index_ += index_factors_[r] * input_indices_[r]; + } +} + +void InputIndexer::IncrementOutputIndex() { + linear_output_index_++; + for (auto r = rank_ - 1; r >= 0; --r) { + auto old_carriage_sum_increment = + cumulative_dimensions_[r] * + (output_indices_[r] % dimension_ceiling_[r]); + output_indices_[r] = (output_indices_[r] + 1) % output_dimensions_[r]; + if (r != adjustable_dimension_) { + auto new_input_index = output_indices_[r] / dimension_ceiling_[r]; + linear_input_index_ += + (new_input_index - input_indices_[r]) * index_factors_[r]; + + input_indices_[r] = new_input_index; + + auto new_carriage_sum_increment = + cumulative_dimensions_[r] * + (output_indices_[r] % dimension_ceiling_[r]); - return *result; + adjustable_dimension_carriage_sum_ = adjustable_dimension_carriage_sum_ - + old_carriage_sum_increment + + new_carriage_sum_increment; + } + + if (output_indices_[r] != 0) { + // No more carries to higher indices. + break; + } + } + auto old_adjustable_dimension_input_index = + input_indices_[adjustable_dimension_]; + RecomputeInputAdjustableDimensionIndex(); + linear_input_index_ += (input_indices_[adjustable_dimension_] - + old_adjustable_dimension_input_index) * + index_factors_[adjustable_dimension_]; } -template // both types are needed here b/c IndexVecT and - // InputDataT are not related - void - fill_periodic_tensor( - tensorflow::OpKernelContext* context, - const IndexVecT& desired_shape, - const tensorflow::Tensor& input_tensor) { - // input is a strided array (last index is fastest, C-ordered) - auto input = input_tensor.flat(); - const int rank = input_tensor.dims(); - // original and target dimensions - std::vector original_dimensions(rank), - target_dimensions(rank); - tensorflow::int64 total_size(input_tensor.NumElements()), new_sliced_size(1); - // factors by which original_dimensions increases/decreases w.r.t. - // target_dimensions - std::vector dimension_ceiling(rank), - cumulative_dimensions(rank); - // index of adjustable dimension - int adjustable_dimension; - tensorflow::TensorShape output_shape; +std::vector InputIndexer::TensorShapeToVector( + const tensorflow::TensorShape& tensor_shape) { + std::vector result(tensor_shape.dims()); + int count = 0; + for (const auto dim_info : tensor_shape) { + result[count] = dim_info.size; + ++count; + } + return result; +} - // requires that the rank of the input tensor and length of the desired shape - // are equal - OP_REQUIRES(context, rank == desired_shape.size(), - tensorflow::errors::InvalidArgument( - "periodic_resample expects the rank of the input tensor, ", - rank, ", to be the same as the length of the desired shape, ", - desired_shape.size(), ".")); +std::vector InputIndexer::ComputeDimensionCeiling( + const std::vector& output_dimensions, + const std::vector& input_dimensions) { + std::vector dimension_ceiling(input_dimensions.size()); + for (size_t i = 0; i < input_dimensions.size(); ++i) { + dimension_ceiling[i] = (output_dimensions[i] + input_dimensions[i] - 1) / + input_dimensions[i]; + } + return dimension_ceiling; +} - bool found = false; - const auto& input_tensor_shape = input_tensor.shape(); +std::vector InputIndexer::ComputeCumulativeDimensions() { + std::vector cumulative_dimensions(rank_); + int count = 0; + for (int i = 0; i < rank_; ++i) { + if (count == 0) { + cumulative_dimensions[count] = 1; + } else { + cumulative_dimensions[count] = + cumulative_dimensions[count - 1] * dimension_ceiling_[count - 1]; + } + ++count; + } + return cumulative_dimensions; +} +template +void process_desired_shape(tensorflow::OpKernelContext* context, + const tensorflow::TensorShape& input_tensor_shape, + const IndexVecT& desired_shape, + int* adjustable_dimension, + std::vector* target_dimensions, + tensorflow::int64* output_size) { + tensorflow::int64 new_sliced_size = 1; + bool found = false; + const int rank = input_tensor_shape.dims(); for (int i = 0; i < rank; ++i) { - // if (desired_shape(i) < 1) { if (desired_shape[i] < 1) { // only one index can be adjustable OP_REQUIRES(context, !found, tensorflow::errors::InvalidArgument( "periodic_resample expects only " "one index to be marked as adjustable.")); - adjustable_dimension = i; + *adjustable_dimension = i; found = true; } else { OP_REQUIRES( @@ -122,9 +232,8 @@ template +void +do_periodic_resample_op(tensorflow::OpKernelContext* context, + const tensorflow::TensorShape& original_shape, + const tensorflow::PartialTensorShape& desired_shape, + const tensorflow::Tensor& source_tensor) { + const int rank = source_tensor.dims(); + + // requires that the rank of the input tensor and length of the desired shape + // are equal + OP_REQUIRES(context, rank == desired_shape.dims(), + tensorflow::errors::InvalidArgument( + "periodic_resample expects the rank of the input tensor, ", + rank, ", to be the same as the length of the desired shape, ", + desired_shape.dims(), ".")); + + std::vector target_dimensions(rank); + tensorflow::int64 new_size = 0; + // index of adjustable dimension + int adjustable_dimension = 0; + process_desired_shape(context, original_shape, desired_shape.dim_sizes(), + &adjustable_dimension, &target_dimensions, &new_size); // ensure that the new dimension is greater than zero OP_REQUIRES(context, target_dimensions[adjustable_dimension] > 0, @@ -160,11 +293,14 @@ template allocate_output(0, output_shape, &output_tensor)); auto output = output_tensor->flat(); - // memory is allocated for these variables outside the inner loop for - // efficiency (although, I could create a separate class scope for - // this purpose instead) - tensorflow::int64 result = 0; - std::vector output_indices(target_dimensions.size()); + // input is a strided array (last index is fastest, C-ordered) + auto input = source_tensor.flat(); // Fill output tensor with periodically resampled input tensor values - for (tensorflow::int64 output_index = 0; output_index < new_size; - ++output_index) { - output(output_index) = input(compute_input_index( - &target_dimensions, output_index, original_dimensions, - adjustable_dimension, dimension_ceiling, cumulative_dimensions, &result, - &output_indices, rank)); - } + InputIndexer input_indexer(target_dimensions, original_shape, + adjustable_dimension); + + auto worker_threads = *(context->device()->tensorflow_cpu_worker_threads()); + auto fill_output_tensor = [&input_indexer, &output, &input]( + tensorflow::int64 start, tensorflow::int64 limit) { + InputIndexer local_indexer(input_indexer); + local_indexer.MoveToOutputIndex(start); + for (tensorflow::int64 output_index = start; output_index < limit; + ++output_index) { + if (mode == Mode::kForward) { + output(output_index) = input(local_indexer.linear_input_index()); + } else { + output(local_indexer.linear_input_index()) = input(output_index); + } + local_indexer.IncrementOutputIndex(); + } + }; + ::tensorflow::Shard(worker_threads.num_threads, worker_threads.workers, + new_size, costPerFillIndex, fill_output_tensor); } +#define DATA_TYPE_SWITCH(data_type, context, CASE) \ + switch (data_type) { \ + CASE(float) \ + CASE(double) \ + CASE(tensorflow::int32) \ + CASE(tensorflow::int64) \ + default: \ + context->CtxFailure(__FILE__, __LINE__, \ + tensorflow::errors::InvalidArgument( \ + "Unsuppored tensor elements type")); \ + break; \ + } + void create_output_tensor( tensorflow::OpKernelContext* context, const tensorflow::Tensor& input_tensor, const tensorflow::DataType& input_tensor_type, - const tensorflow::PartialTensorShape& desired_shape_tensor) { - auto desired_shape = desired_shape_tensor.dim_sizes(); - - // obligatory type switch - switch (input_tensor_type) { - case tensorflow::DataTypeToEnum::value: - fill_periodic_tensor(context, desired_shape, input_tensor); + const tensorflow::PartialTensorShape& desired_shape) { +#define CASE(type) \ + case tensorflow::DataTypeToEnum::value: \ + do_periodic_resample_op( \ + context, input_tensor.shape(), desired_shape, input_tensor); \ break; - case tensorflow::DataTypeToEnum::value: - fill_periodic_tensor(context, desired_shape, input_tensor); - break; - case tensorflow::DataTypeToEnum::value: - fill_periodic_tensor(context, desired_shape, - input_tensor); - break; - case tensorflow::DataTypeToEnum::value: - fill_periodic_tensor(context, desired_shape, - input_tensor); + + DATA_TYPE_SWITCH(input_tensor_type, context, CASE); +#undef CASE +} + +void create_grad_tensor(tensorflow::OpKernelContext* context, + const tensorflow::Tensor& grad_tensor, + const tensorflow::DataType& grad_tensor_type, + const tensorflow::TensorShape& original_shape, + const tensorflow::PartialTensorShape& desired_shape) { +#define CASE(type) \ + case tensorflow::DataTypeToEnum::value: \ + do_periodic_resample_op( \ + context, original_shape, desired_shape, grad_tensor); \ break; - default:; - } + + DATA_TYPE_SWITCH(grad_tensor_type, context, CASE); +#undef CASE } } // namespace @@ -238,4 +400,25 @@ class PeriodicResampleOp : public tensorflow::OpKernel { tensorflow::PartialTensorShape desired_shape; }; +class PeriodicResampleOpGrad : public tensorflow::OpKernel { + public: + explicit PeriodicResampleOpGrad(tensorflow::OpKernelConstruction* context) + : tensorflow::OpKernel(context) { + OP_REQUIRES_OK(context, + context->GetAttr("original_shape", &original_shape)); + OP_REQUIRES_OK(context, context->GetAttr("desired_shape", &desired_shape)); + } + + void Compute(tensorflow::OpKernelContext* context) override { + const tensorflow::Tensor& grad_tensor = context->input(0); + const tensorflow::DataType grad_tensor_type = context->input_dtype(0); + create_grad_tensor(context, grad_tensor, grad_tensor_type, original_shape, + desired_shape); + } + + private: + tensorflow::TensorShape original_shape; + tensorflow::PartialTensorShape desired_shape; +}; + #endif // TENSORFLOW_KERNELS_PERIODICRESAMPLE_OP_H_ diff --git a/tensorflow/contrib/periodic_resample/ops/array_ops.cc b/tensorflow/contrib/periodic_resample/ops/array_ops.cc index 82bd796956..fd38cd09b4 100644 --- a/tensorflow/contrib/periodic_resample/ops/array_ops.cc +++ b/tensorflow/contrib/periodic_resample/ops/array_ops.cc @@ -26,7 +26,42 @@ REGISTER_OP("PeriodicResample") .Input("values: T") .Attr("shape: shape") .Output("output: T") - .SetShapeFn(shape_inference::ExplicitShape) + .SetShapeFn([](shape_inference::InferenceContext* c) { + tensorflow::PartialTensorShape desired_shape; + TF_RETURN_IF_ERROR(c->GetAttr("shape", &desired_shape)); + shape_inference::ShapeHandle input_tensor_shape = c->input(0); + shape_inference::DimensionHandle num_input_elements = + c->NumElements(input_tensor_shape); + shape_inference::ShapeHandle result_shape_handle; + if (!shape_inference::InferenceContext::ValueKnown(num_input_elements)) { + TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape( + desired_shape, &result_shape_handle)); + } else { + const int rank = c->Rank(input_tensor_shape); + std::vector target_dimensions(rank); + tensorflow::int64 new_sliced_size = 1; + int adjustable_dimension = 0; + for (int i = 0; i < rank; ++i) { + if (desired_shape.dim_size(i) < 1) { + adjustable_dimension = i; + } else { + target_dimensions[i] = desired_shape.dim_size(i); + new_sliced_size *= target_dimensions[i]; + } + } + target_dimensions[adjustable_dimension] = + shape_inference::InferenceContext::Value( + num_input_elements) / new_sliced_size; + tensorflow::TensorShape result_shape; + for (int i = 0; i < rank; ++i) { + result_shape.AddDim(target_dimensions[i]); + } + TF_RETURN_IF_ERROR(c->MakeShapeFromTensorShape( + result_shape, &result_shape_handle)); + } + c->set_output(0, result_shape_handle); + return Status::OK(); + }) .Doc(R"doc( Periodically resample elements of a tensor to conform to `shape`. @@ -101,4 +136,20 @@ output: Periodically resampled tensor that has dimensions specified as in )doc"); + +REGISTER_OP("PeriodicResampleOpGrad") + .Attr("T: numbertype") + .Input("grad: T") + .Attr("original_shape: shape") + .Attr("desired_shape: shape") + .Output("grad_values: T") + .SetShapeFn([](shape_inference::InferenceContext* c) { + tensorflow::TensorShape original_shape; + TF_RETURN_IF_ERROR(c->GetAttr("original_shape", &original_shape)); + shape_inference::ShapeHandle s; + TF_RETURN_IF_ERROR(c->MakeShapeFromTensorShape(original_shape, &s)); + c->set_output(0, s); + return Status::OK(); +}); + } // namespace tensorflow diff --git a/tensorflow/contrib/periodic_resample/ops/array_ops_test.cc b/tensorflow/contrib/periodic_resample/ops/array_ops_test.cc new file mode 100644 index 0000000000..43b7c1799f --- /dev/null +++ b/tensorflow/contrib/periodic_resample/ops/array_ops_test.cc @@ -0,0 +1,41 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/shape_inference_testutil.h" +#include "tensorflow/core/framework/tensor_shape.pb.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { + +TEST(ArrayOpsTest, PeriodicResample_ShapeFn) { + ShapeInferenceTestOp op("PeriodicResample"); + // Case 1: output shape can be fully inferreed. + PartialTensorShape shape({4, 4, -1}); + TensorShapeProto shape_proto; + shape.AsProto(&shape_proto); + + TF_ASSERT_OK(NodeDefBuilder("test", "PeriodicResample") + .Input({"values", 0, DT_INT32}) + .Attr("shape", shape_proto) + .Finalize(&op.node_def)); + INFER_OK(op, "[2,2,4]", "[4,4,1]"); + // Case 2: output shape can not be inferred - report desired shape. + INFER_OK(op, "[2,2,?]", "[4,4,?]"); +} + +} // end namespace tensorflow diff --git a/tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py b/tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py index a25de55e18..31a6fe1d94 100644 --- a/tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py +++ b/tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py @@ -21,8 +21,11 @@ from __future__ import print_function import numpy from tensorflow.contrib.periodic_resample import periodic_resample +from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors_impl from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import variables from tensorflow.python.platform import googletest @@ -93,7 +96,6 @@ class PeriodicResampleTest(test_util.TensorFlowTestCase): def testPeriodicResampleErrors(self): input_tensor = numpy.zeros(shape=[1, 2, 2, 4]) with self.test_session(): - variables.global_variables_initializer().run() with self.assertRaisesWithPredicateMatch( errors_impl.InvalidArgumentError, 'Dimension 3 input tensor has size 4, desired shape has size 1'): @@ -103,6 +105,29 @@ class PeriodicResampleTest(test_util.TensorFlowTestCase): '4, to be the same as the length of the desired shape, 3'): periodic_resample(input_tensor, [None, 4, 4]).eval() + def testPeriodicResampleGradient(self): + desired_shape = numpy.array([4, 4, None]) + result_shape = (4, 4, 1) + input_shape = (2, 2, 4) + with self.test_session() as sess: + x = array_ops.placeholder(dtypes.float32, shape=input_shape) + output = periodic_resample(x, desired_shape) + error = gradient_checker.compute_gradient_error( + x, input_shape, output, result_shape) + self.assertLess(error, 1e-4) + + def testPeriodicResampleShapeInference(self): + with self.test_session() as sess: + # Case 1: output shape can be fully inferreed. + x = array_ops.placeholder(dtypes.float32, shape=(2, 2, 4)) + output = periodic_resample(x, [4, 4, None]) + self.assertEqual(output.shape, [4, 4, 1]) + # Case 2: output shape can not be inferred - report desired shape. + x = array_ops.placeholder(dtypes.float32, shape=(2, 2, None)) + output = periodic_resample(x, [4, 4, None]) + self.assertTrue(output.shape.is_compatible_with([4, 4, None])) + self.assertEqual(output.shape[2].value, None) + if __name__ == '__main__': googletest.main() diff --git a/tensorflow/contrib/periodic_resample/python/ops/periodic_resample_op.py b/tensorflow/contrib/periodic_resample/python/ops/periodic_resample_op.py index 348623d8f8..470e300ccb 100644 --- a/tensorflow/contrib/periodic_resample/python/ops/periodic_resample_op.py +++ b/tensorflow/contrib/periodic_resample/python/ops/periodic_resample_op.py @@ -21,11 +21,17 @@ from __future__ import print_function # pylint: disable=unused-import from tensorflow.contrib.periodic_resample.python.ops import gen_periodic_resample_op -from tensorflow.contrib.periodic_resample.python.ops.gen_periodic_resample_op import periodic_resample +from tensorflow.contrib.periodic_resample.python.ops.gen_periodic_resample_op import periodic_resample, periodic_resample_op_grad from tensorflow.contrib.util import loader +from tensorflow.python.framework import ops from tensorflow.python.platform import resource_loader # pylint: enable=unused-import _periodic_resample_op = loader.load_op_library( resource_loader.get_path_to_datafile('_periodic_resample_op.so')) + +@ops.RegisterGradient("PeriodicResample") +def _periodic_resample_grad_cc(op, grad): + return periodic_resample_op_grad( + grad, op.inputs[0].shape, op.get_attr('shape')) diff --git a/tensorflow/contrib/predictor/contrib_estimator_predictor.py b/tensorflow/contrib/predictor/contrib_estimator_predictor.py index b7a98c68e2..af3b2ad1b5 100644 --- a/tensorflow/contrib/predictor/contrib_estimator_predictor.py +++ b/tensorflow/contrib/predictor/contrib_estimator_predictor.py @@ -34,7 +34,8 @@ class ContribEstimatorPredictor(predictor.Predictor): prediction_input_fn, input_alternative_key=None, output_alternative_key=None, - graph=None): + graph=None, + config=None): """Initialize a `ContribEstimatorPredictor`. Args: @@ -48,6 +49,7 @@ class ContribEstimatorPredictor(predictor.Predictor): multi-headed models. graph: Optional. The Tensorflow `graph` in which prediction should be done. + config: `ConfigProto` proto used to configure the session. """ self._graph = graph or ops.Graph() with self._graph.as_default(): @@ -58,6 +60,7 @@ class ContribEstimatorPredictor(predictor.Predictor): checkpoint_path = saver.latest_checkpoint(estimator.model_dir) self._session = monitored_session.MonitoredSession( session_creator=monitored_session.ChiefSessionCreator( + config=config, checkpoint_filename_with_path=checkpoint_path)) input_alternative_key = ( diff --git a/tensorflow/contrib/predictor/core_estimator_predictor.py b/tensorflow/contrib/predictor/core_estimator_predictor.py index d78d94c269..a725072e72 100644 --- a/tensorflow/contrib/predictor/core_estimator_predictor.py +++ b/tensorflow/contrib/predictor/core_estimator_predictor.py @@ -51,7 +51,8 @@ class CoreEstimatorPredictor(predictor.Predictor): estimator, serving_input_receiver_fn, output_key=None, - graph=None): + graph=None, + config=None): """Initialize a `CoreEstimatorPredictor`. Args: @@ -62,6 +63,7 @@ class CoreEstimatorPredictor(predictor.Predictor): `None`, then `DEFAULT_SERVING_SIGNATURE_DEF_KEY` is used. graph: Optional. The Tensorflow `graph` in which prediction should be done. + config: `ConfigProto` proto used to configure the session. """ self._graph = graph or ops.Graph() with self._graph.as_default(): @@ -71,6 +73,7 @@ class CoreEstimatorPredictor(predictor.Predictor): checkpoint_dir = estimator.model_dir self._session = monitored_session.MonitoredSession( session_creator=monitored_session.ChiefSessionCreator( + config=config, checkpoint_dir=checkpoint_dir)) feed_tensor_info = signature_def.inputs diff --git a/tensorflow/contrib/predictor/predictor_factories.py b/tensorflow/contrib/predictor/predictor_factories.py index 6e77e934fe..f275bc15ad 100644 --- a/tensorflow/contrib/predictor/predictor_factories.py +++ b/tensorflow/contrib/predictor/predictor_factories.py @@ -30,7 +30,8 @@ def from_contrib_estimator(estimator, prediction_input_fn, input_alternative_key=None, output_alternative_key=None, - graph=None): + graph=None, + config=None): """Constructs a `Predictor` from a `tf.contrib.learn.Estimator`. Args: @@ -44,6 +45,7 @@ def from_contrib_estimator(estimator, multi-headed models. graph: Optional. The Tensorflow `graph` in which prediction should be done. + config: `ConfigProto` proto used to configure the session. Returns: An initialized `Predictor`. @@ -62,13 +64,15 @@ def from_contrib_estimator(estimator, prediction_input_fn, input_alternative_key=input_alternative_key, output_alternative_key=output_alternative_key, - graph=graph) + graph=graph, + config=config) def from_estimator(estimator, serving_input_receiver_fn, output_key=None, - graph=None): + graph=None, + config=None): """Constructs a `Predictor` from a `tf.python.estimator.Estimator`. Args: @@ -79,6 +83,7 @@ def from_estimator(estimator, `None`, then `DEFAULT_SERVING_SIGNATURE_DEF_KEY` is used. graph: Optional. The Tensorflow `graph` in which prediction should be done. + config: `ConfigProto` proto used to configure the session. Returns: An initialized `Predictor`. @@ -93,14 +98,19 @@ def from_estimator(estimator, 'tf.contrib.learn.Estimator. You likely want to call ' 'from_contrib_estimator.') return core_estimator_predictor.CoreEstimatorPredictor( - estimator, serving_input_receiver_fn, output_key=output_key, graph=graph) + estimator, + serving_input_receiver_fn, + output_key=output_key, + graph=graph, + config=config) def from_saved_model(export_dir, signature_def_key=None, signature_def=None, tags=None, - graph=None): + graph=None, + config=None): """Constructs a `Predictor` from a `SavedModel` on disk. Args: @@ -115,6 +125,7 @@ def from_saved_model(export_dir, `SignatureDef`. Defaults to `DEFAULT_TAGS`. graph: Optional. The Tensorflow `graph` in which prediction should be done. + config: `ConfigProto` proto used to configure the session. Returns: An initialized `Predictor`. @@ -128,4 +139,5 @@ def from_saved_model(export_dir, signature_def_key=signature_def_key, signature_def=signature_def, tags=tags, - graph=graph) + graph=graph, + config=config) diff --git a/tensorflow/contrib/predictor/predictor_factories_test.py b/tensorflow/contrib/predictor/predictor_factories_test.py index 578d9424b2..a2ef1dc3af 100644 --- a/tensorflow/contrib/predictor/predictor_factories_test.py +++ b/tensorflow/contrib/predictor/predictor_factories_test.py @@ -20,6 +20,7 @@ from __future__ import print_function from tensorflow.contrib.predictor import predictor_factories from tensorflow.contrib.predictor import testing_common +from tensorflow.core.protobuf import config_pb2 from tensorflow.python.platform import test MODEL_DIR_NAME = 'contrib/predictor/test_export_dir' @@ -41,6 +42,11 @@ class PredictorFactoriesTest(test.TestCase): """Test loading from_saved_model with tags.""" predictor_factories.from_saved_model(self._export_dir, tags='serve') + def testFromSavedModelWithSessionConfig(self): + """Test loading from_saved_model with session config.""" + predictor_factories.from_saved_model( + self._export_dir, config=config_pb2.ConfigProto()) + def testFromSavedModelWithBadTags(self): """Test that loading fails for bad tags.""" bad_tags_regex = ('.*? could not be found in SavedModel') @@ -53,6 +59,13 @@ class PredictorFactoriesTest(test.TestCase): predictor_factories.from_contrib_estimator( estimator, input_fn, output_alternative_key='sum') + def testFromContribEstimatorWithSessionConfig(self): + estimator = testing_common.get_arithmetic_estimator(core=False) + input_fn = testing_common.get_arithmetic_input_fn(core=False) + predictor_factories.from_contrib_estimator( + estimator, input_fn, output_alternative_key='sum', + config=config_pb2.ConfigProto()) + def testFromContribEstimatorWithCoreEstimatorRaises(self): estimator = testing_common.get_arithmetic_estimator(core=True) input_fn = testing_common.get_arithmetic_input_fn(core=True) @@ -64,6 +77,12 @@ class PredictorFactoriesTest(test.TestCase): input_fn = testing_common.get_arithmetic_input_fn(core=True) predictor_factories.from_estimator(estimator, input_fn) + def testFromCoreEstimatorWithSessionConfig(self): + estimator = testing_common.get_arithmetic_estimator(core=True) + input_fn = testing_common.get_arithmetic_input_fn(core=True) + predictor_factories.from_estimator( + estimator, input_fn, config=config_pb2.ConfigProto()) + def testFromCoreEstimatorWithContribEstimatorRaises(self): estimator = testing_common.get_arithmetic_estimator(core=False) input_fn = testing_common.get_arithmetic_input_fn(core=False) diff --git a/tensorflow/contrib/predictor/saved_model_predictor.py b/tensorflow/contrib/predictor/saved_model_predictor.py index 0dbca0f813..95da6d04ed 100644 --- a/tensorflow/contrib/predictor/saved_model_predictor.py +++ b/tensorflow/contrib/predictor/saved_model_predictor.py @@ -121,7 +121,8 @@ class SavedModelPredictor(predictor.Predictor): input_names=None, output_names=None, tags=None, - graph=None): + graph=None, + config=None): """Initialize a `CoreEstimatorPredictor`. Args: @@ -142,6 +143,7 @@ class SavedModelPredictor(predictor.Predictor): the correct `SignatureDef`. Defaults to `DEFAULT_TAGS`. graph: Optional. The Tensorflow `graph` in which prediction should be done. + config: `ConfigProto` proto used to configure the session. Raises: ValueError: If more than one of signature_def_key OR signature_def OR (input_names AND output_names) is specified. @@ -152,7 +154,7 @@ class SavedModelPredictor(predictor.Predictor): self._graph = graph or ops.Graph() with self._graph.as_default(): - self._session = session.Session() + self._session = session.Session(config=config) loader.load(self._session, tags.split(','), export_dir) if input_names is None: diff --git a/tensorflow/contrib/quantize/README.md b/tensorflow/contrib/quantize/README.md index c83623ec94..27a933c0f9 100644 --- a/tensorflow/contrib/quantize/README.md +++ b/tensorflow/contrib/quantize/README.md @@ -6,7 +6,7 @@ inference. The details of the transformation implemented in this package is described here [1]. This is done using the -[fake quantization op](https://www.tensorflow.org/versions/r0.12/api_docs/python/array_ops/fake_quantization). +[fake quantization op](https://www.tensorflow.org/api_guides/python/array_ops#Fake_quantization). Literature has shown that fixed point networks provide comparable performance to floating point networks [2]. This is achieved by modeling the quantization diff --git a/tensorflow/contrib/slim/python/slim/evaluation_test.py b/tensorflow/contrib/slim/python/slim/evaluation_test.py index 94fc12ca81..3d0308aaf3 100644 --- a/tensorflow/contrib/slim/python/slim/evaluation_test.py +++ b/tensorflow/contrib/slim/python/slim/evaluation_test.py @@ -26,7 +26,6 @@ import time import numpy as np from tensorflow.contrib.framework.python.ops import variables as variables_lib -from tensorflow.contrib.metrics.python.ops import metric_ops from tensorflow.contrib.slim.python.slim import evaluation from tensorflow.contrib.training.python.training import evaluation as evaluation_lib from tensorflow.core.protobuf import saver_pb2 @@ -37,6 +36,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import metrics from tensorflow.python.ops import variables from tensorflow.python.platform import flags from tensorflow.python.platform import gfile @@ -89,8 +89,8 @@ class EvaluationTest(test.TestCase): self._predictions, self._scale = TestModel(self._inputs) def testFinalOpsOnEvaluationLoop(self): - value_op, update_op = metric_ops.streaming_accuracy(self._predictions, - self._labels) + value_op, update_op = metrics.accuracy( + labels=self._labels, predictions=self._predictions) init_op = control_flow_ops.group(variables.global_variables_initializer(), variables.local_variables_initializer()) # Create checkpoint and log directories: @@ -136,9 +136,10 @@ class EvaluationTest(test.TestCase): self.assertTrue(obj.hook_was_run) def _create_names_to_metrics(self, predictions, labels): - accuracy0, update_op0 = metric_ops.streaming_accuracy(predictions, labels) - accuracy1, update_op1 = metric_ops.streaming_accuracy(predictions + 1, - labels) + accuracy0, update_op0 = metrics.accuracy( + labels=labels, predictions=predictions) + accuracy1, update_op1 = metrics.accuracy( + labels=labels, predictions=predictions + 1) names_to_values = {'Accuracy': accuracy0, 'Another_accuracy': accuracy1} names_to_updates = {'Accuracy': update_op0, 'Another_accuracy': update_op1} @@ -198,8 +199,8 @@ class EvaluationTest(test.TestCase): predictions_limited = input.limit_epochs(self._predictions, num_epochs=1) labels_limited = input.limit_epochs(self._labels, num_epochs=1) - value_op, update_op = metric_ops.streaming_accuracy( - predictions_limited, labels_limited) + value_op, update_op = metrics.accuracy( + labels=labels_limited, predictions=predictions_limited) init_op = control_flow_ops.group(variables.global_variables_initializer(), variables.local_variables_initializer()) @@ -260,8 +261,8 @@ class SingleEvaluationTest(test.TestCase): self._prepareCheckpoint(checkpoint_path) # Next, determine the metric to evaluate: - value_op, update_op = metric_ops.streaming_accuracy(self._predictions, - self._labels) + value_op, update_op = metrics.accuracy( + labels=self._labels, predictions=self._predictions) # Run the evaluation and verify the results: accuracy_value = evaluation.evaluate_once( @@ -276,8 +277,8 @@ class SingleEvaluationTest(test.TestCase): self._prepareCheckpoint(checkpoint_path) # Next, determine the metric to evaluate: - value_op, update_op = metric_ops.streaming_accuracy(self._predictions, - self._labels) + value_op, update_op = metrics.accuracy( + labels=self._labels, predictions=self._predictions) dumping_root = os.path.join(self.get_temp_dir(), 'tfdbg_dump_dir') dumping_hook = hooks.DumpingDebugHook(dumping_root, log_usage=False) diff --git a/tensorflow/contrib/summary/summary.py b/tensorflow/contrib/summary/summary.py index 99ced53e11..d22b80ac88 100644 --- a/tensorflow/contrib/summary/summary.py +++ b/tensorflow/contrib/summary/summary.py @@ -21,6 +21,7 @@ from @{tf.summary.merge_all} to @{tf.summary.FileWriter}. To use with eager execution enabled, write your code as follows: +```python global_step = tf.train.get_or_create_global_step() summary_writer = tf.contrib.summary.create_file_writer( train_dir, flush_millis=10000) @@ -30,9 +31,11 @@ with summary_writer.as_default(), tf.contrib.summary.always_record_summaries(): tf.contrib.summary.scalar("loss", my_loss) # In this case every call to tf.contrib.summary.scalar will generate a record # ... +``` To use it with graph execution, write your code as follows: +```python global_step = tf.train.get_or_create_global_step() summary_writer = tf.contrib.summary.create_file_writer( train_dir, flush_millis=10000) @@ -53,7 +56,7 @@ with tf.Session(...) as sess: while not_done_training: sess.run([train_op, tf.contrib.summary.all_summary_ops()]) # ... - +``` """ from __future__ import absolute_import diff --git a/tensorflow/contrib/tensor_forest/client/eval_metrics.py b/tensorflow/contrib/tensor_forest/client/eval_metrics.py index e893e1d1c8..d8236a0a6f 100644 --- a/tensorflow/contrib/tensor_forest/client/eval_metrics.py +++ b/tensorflow/contrib/tensor_forest/client/eval_metrics.py @@ -21,10 +21,10 @@ import numpy as np from tensorflow.contrib import losses from tensorflow.contrib.learn.python.learn.estimators import prediction_key -from tensorflow.contrib.metrics.python.ops import metric_ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import metrics from tensorflow.python.ops import nn INFERENCE_PROB_NAME = prediction_key.PredictionKey.PROBABILITIES @@ -38,12 +38,13 @@ def _top_k_generator(k): targets = math_ops.to_int32(targets) if targets.get_shape().ndims > 1: targets = array_ops.squeeze(targets, axis=[1]) - return metric_ops.streaming_mean(nn.in_top_k(probabilities, targets, k)) + return metrics.mean(nn.in_top_k(probabilities, targets, k)) return _top_k def _accuracy(predictions, targets, weights=None): - return metric_ops.streaming_accuracy(predictions, targets, weights=weights) + return metrics.accuracy( + labels=targets, predictions=predictions, weights=weights) def _r2(probabilities, targets, weights=None): @@ -53,7 +54,7 @@ def _r2(probabilities, targets, weights=None): squares_residuals = math_ops.reduce_sum( math_ops.square(targets - probabilities), 0) score = 1 - math_ops.reduce_sum(squares_residuals / squares_total) - return metric_ops.streaming_mean(score, weights=weights) + return metrics.mean(score, weights=weights) def _squeeze_and_onehot(targets, depth): @@ -62,7 +63,7 @@ def _squeeze_and_onehot(targets, depth): def _sigmoid_entropy(probabilities, targets, weights=None): - return metric_ops.streaming_mean( + return metrics.mean( losses.sigmoid_cross_entropy(probabilities, _squeeze_and_onehot( targets, @@ -71,7 +72,7 @@ def _sigmoid_entropy(probabilities, targets, weights=None): def _softmax_entropy(probabilities, targets, weights=None): - return metric_ops.streaming_mean( + return metrics.mean( losses.sparse_softmax_cross_entropy(probabilities, math_ops.to_int32(targets)), weights=weights) @@ -82,7 +83,7 @@ def _predictions(predictions, unused_targets, **unused_kwargs): def _class_log_loss(probabilities, targets, weights=None): - return metric_ops.streaming_mean( + return metrics.mean( losses.log_loss(probabilities, _squeeze_and_onehot(targets, array_ops.shape(probabilities)[1])), @@ -90,34 +91,36 @@ def _class_log_loss(probabilities, targets, weights=None): def _precision(predictions, targets, weights=None): - return metric_ops.streaming_precision(predictions, targets, weights=weights) + return metrics.precision( + labels=targets, predictions=predictions, weights=weights) def _precision_at_thresholds(predictions, targets, weights=None): - return metric_ops.streaming_precision_at_thresholds( - array_ops.slice(predictions, [0, 1], [-1, 1]), - targets, - np.arange( - 0, 1, 0.01, dtype=np.float32), + return metrics.precision_at_thresholds( + labels=targets, + predictions=array_ops.slice(predictions, [0, 1], [-1, 1]), + thresholds=np.arange(0, 1, 0.01, dtype=np.float32), weights=weights) def _recall(predictions, targets, weights=None): - return metric_ops.streaming_recall(predictions, targets, weights=weights) + return metrics.recall( + labels=targets, predictions=predictions, weights=weights) def _recall_at_thresholds(predictions, targets, weights=None): - return metric_ops.streaming_recall_at_thresholds( - array_ops.slice(predictions, [0, 1], [-1, 1]), - targets, - np.arange( - 0, 1, 0.01, dtype=np.float32), + return metrics.recall_at_thresholds( + labels=targets, + predictions=array_ops.slice(predictions, [0, 1], [-1, 1]), + thresholds=np.arange(0, 1, 0.01, dtype=np.float32), weights=weights) def _auc(probs, targets, weights=None): - return metric_ops.streaming_auc(array_ops.slice(probs, [0, 1], [-1, 1]), - targets, weights=weights) + return metrics.auc( + labels=targets, + predictions=array_ops.slice(probs, [0, 1], [-1, 1]), + weights=weights) _EVAL_METRICS = { diff --git a/tensorflow/contrib/tensor_forest/python/tensor_forest.py b/tensorflow/contrib/tensor_forest/python/tensor_forest.py index 7a35a70bbe..6f62cd11a9 100644 --- a/tensorflow/contrib/tensor_forest/python/tensor_forest.py +++ b/tensorflow/contrib/tensor_forest/python/tensor_forest.py @@ -295,7 +295,7 @@ def get_epoch_variable(): # A simple container to hold the training variables for a single tree. -class TreeTrainingVariables(object): +class TreeVariables(object): """Stores tf.Variables for training a single random tree. Uses tf.get_variable to get tree-specific names so that this can be used @@ -303,7 +303,7 @@ class TreeTrainingVariables(object): then relies on restoring that model to evaluate). """ - def __init__(self, params, tree_num, training): + def __init__(self, params, tree_num, training, tree_config='', tree_stat=''): if (not hasattr(params, 'params_proto') or not isinstance(params.params_proto, _params_proto.TensorForestParams)): @@ -315,27 +315,28 @@ class TreeTrainingVariables(object): # TODO(gilberth): Manually shard this to be able to fit it on # multiple machines. self.stats = stats_ops.fertile_stats_variable( - params, '', self.get_tree_name('stats', tree_num)) + params, tree_stat, self.get_tree_name('stats', tree_num)) self.tree = model_ops.tree_variable( - params, '', self.stats, self.get_tree_name('tree', tree_num)) + params, tree_config, self.stats, self.get_tree_name('tree', tree_num)) def get_tree_name(self, name, num): return '{0}-{1}'.format(name, num) -class ForestTrainingVariables(object): +class ForestVariables(object): """A container for a forests training data, consisting of multiple trees. - Instantiates a TreeTrainingVariables object for each tree. We override the + Instantiates a TreeVariables object for each tree. We override the __getitem__ and __setitem__ function so that usage looks like this: - forest_variables = ForestTrainingVariables(params) + forest_variables = ForestVariables(params) ... forest_variables.tree ... """ def __init__(self, params, device_assigner, training=True, - tree_variables_class=TreeTrainingVariables): + tree_variables_class=TreeVariables, + tree_configs=None, tree_stats=None): self.variables = [] # Set up some scalar variables to run through the device assigner, then # we can use those to colocate everything related to a tree. @@ -347,7 +348,13 @@ class ForestTrainingVariables(object): for i in range(params.num_trees): with ops.device(self.device_dummies[i].device): - self.variables.append(tree_variables_class(params, i, training)) + kwargs = {} + if tree_configs is not None: + kwargs.update(dict(tree_config=tree_configs[i])) + if tree_stats is not None: + kwargs.update(dict(tree_stat=tree_stats[i])) + self.variables.append(tree_variables_class( + params, i, training, **kwargs)) def __setitem__(self, t, val): self.variables[t] = val @@ -361,9 +368,11 @@ class RandomForestGraphs(object): def __init__(self, params, + tree_configs=None, + tree_stats=None, device_assigner=None, variables=None, - tree_variables_class=TreeTrainingVariables, + tree_variables_class=TreeVariables, tree_graphs=None, training=True): self.params = params @@ -371,9 +380,10 @@ class RandomForestGraphs(object): device_assigner or framework_variables.VariableDeviceChooser()) logging.info('Constructing forest with params = ') logging.info(self.params.__dict__) - self.variables = variables or ForestTrainingVariables( + self.variables = variables or ForestVariables( self.params, device_assigner=self.device_assigner, training=training, - tree_variables_class=tree_variables_class) + tree_variables_class=tree_variables_class, + tree_configs=tree_configs, tree_stats=tree_stats) tree_graph_class = tree_graphs or RandomTreeGraphs self.trees = [ tree_graph_class(self.variables[i], self.params, i) diff --git a/tensorflow/contrib/tensor_forest/python/tensor_forest_test.py b/tensorflow/contrib/tensor_forest/python/tensor_forest_test.py index bbe627b157..1c9c81827e 100644 --- a/tensorflow/contrib/tensor_forest/python/tensor_forest_test.py +++ b/tensorflow/contrib/tensor_forest/python/tensor_forest_test.py @@ -18,10 +18,14 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from google.protobuf.json_format import ParseDict +from tensorflow.contrib.decision_trees.proto import generic_tree_model_pb2 as _tree_proto from tensorflow.contrib.tensor_forest.python import tensor_forest from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import test_util +from tensorflow.python.ops import resources +from tensorflow.python.ops import variables from tensorflow.python.platform import googletest @@ -110,6 +114,47 @@ class TensorForestTest(test_util.TensorFlowTestCase): self.assertTrue(isinstance(paths, ops.Tensor)) self.assertTrue(isinstance(var, ops.Tensor)) + def testInfrenceFromRestoredModel(self): + input_data = [[-1., 0.], [-1., 2.], # node 1 + [1., 0.], [1., -2.]] # node 2 + expected_prediction = [[0.0, 1.0], [0.0, 1.0], + [0.0, 1.0], [0.0, 1.0]] + hparams = tensor_forest.ForestHParams( + num_classes=2, + num_features=2, + num_trees=1, + max_nodes=1000, + split_after_samples=25).fill() + tree_weight = {'decisionTree': + {'nodes': + [{'binaryNode': + {'rightChildId': 2, + 'leftChildId': 1, + 'inequalityLeftChildTest': + {'featureId': {'id': '0'}, + 'threshold': {'floatValue': 0}}}}, + {'leaf': {'vector': + {'value': [{'floatValue': 0.0}, + {'floatValue': 1.0}]}}, + 'nodeId': 1}, + {'leaf': {'vector': + {'value': [{'floatValue': 0.0}, + {'floatValue': 1.0}]}}, + 'nodeId': 2}]}} + restored_tree_param = ParseDict(tree_weight, + _tree_proto.Model()).SerializeToString() + graph_builder = tensor_forest.RandomForestGraphs(hparams, + [restored_tree_param]) + probs, paths, var = graph_builder.inference_graph(input_data) + self.assertTrue(isinstance(probs, ops.Tensor)) + self.assertTrue(isinstance(paths, ops.Tensor)) + self.assertTrue(isinstance(var, ops.Tensor)) + with self.test_session(): + variables.global_variables_initializer().run() + resources.initialize_resources(resources.shared_resources()).run() + self.assertEquals(probs.eval().shape, (4, 2)) + self.assertEquals(probs.eval().tolist(), expected_prediction) + def testTrainingConstructionClassificationSparse(self): input_data = sparse_tensor.SparseTensor( indices=[[0, 0], [0, 3], [1, 0], [1, 7], [2, 1], [3, 9]], diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index b7b26cfb1c..da4dd5a14c 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -91,8 +91,11 @@ void GetSubGraphIncomingEdges(const tensorflow::Graph& graph, if (!subgraph_node_ids.count(edge->src()->id()) && !edge->src()->IsSource() && !edge->IsControlEdge()) { incoming_edges->insert(edge); + VLOG(2) << "INCOMING " << edge->src()->name() << " -> " << node->name() + << " Y, "; } else { - VLOG(2) << node->name() << " -> " << edge->src()->name() << " N, "; + VLOG(2) << "INCOMING " << edge->src()->name() << " -> " << node->name() + << " N, "; } } } @@ -106,10 +109,12 @@ void GetSubGraphOutgoingEdges(const tensorflow::Graph& graph, for (const tensorflow::Edge* edge : node->out_edges()) { if (!subgraph_node_ids.count(edge->dst()->id()) && !edge->dst()->IsSink() && !edge->IsControlEdge()) { - VLOG(2) << node->name() << " -> " << edge->dst()->name() << " Y, "; + VLOG(2) << "OUTGOING " << node->name() << " -> " << edge->dst()->name() + << " Y, "; outgoing_edges->insert(edge); } else { - VLOG(2) << node->name() << " -> " << edge->dst()->name() << " N, "; + VLOG(2) << "OUTGOING " << node->name() << " -> " << edge->dst()->name() + << " N, "; } } } @@ -181,29 +186,27 @@ struct ConvertGraphParams { static tensorflow::Status FillSubGraphEdgeSets(ConvertGraphParams* p) { GetSubGraphIncomingEdges(p->graph, p->subgraph_node_ids, &p->subgraph_incoming_edges); + + std::set> unique_tensors; + // Add only unique input source nodes. If output of an outside node is shared + // between multiple nodes inside the engine, only one edge should be created for (const tensorflow::Edge* edge : p->subgraph_incoming_edges) { - p->subgraph_inputs.push_back({edge->src()->id(), edge->src_output()}); - } - auto output_name_to_index_map = BuildTensorNameMap(p->output_names); - std::set> subgraph_outputs_set; - // Collect outputs referenced from output_names - for (int node_id : p->subgraph_node_ids) { - tensorflow::Node* node = p->graph.FindNodeId(node_id); - if (output_name_to_index_map.count(node->name())) { - for (int index : output_name_to_index_map.at(node->name())) { - subgraph_outputs_set.insert({node_id, index}); - } - } + unique_tensors.insert({edge->src()->id(), edge->src_output()}); } + p->subgraph_inputs.insert(p->subgraph_inputs.begin(), unique_tensors.begin(), + unique_tensors.end()); GetSubGraphOutgoingEdges(p->graph, p->subgraph_node_ids, &p->subgraph_outgoing_edges); + unique_tensors.clear(); + // Similar to above, if multiple ouside nodes are sharing the output of an + // internal node only one output port should be created and shared between + // outputs for (const tensorflow::Edge* edge : p->subgraph_outgoing_edges) { - subgraph_outputs_set.insert({edge->src()->id(), edge->src_output()}); + unique_tensors.insert({edge->src()->id(), edge->src_output()}); } - p->subgraph_outputs.reserve(subgraph_outputs_set.size()); + p->subgraph_outputs.reserve(unique_tensors.size()); p->subgraph_outputs.insert(p->subgraph_outputs.begin(), - subgraph_outputs_set.begin(), - subgraph_outputs_set.end()); + unique_tensors.begin(), unique_tensors.end()); return tensorflow::Status::OK(); } @@ -225,7 +228,6 @@ tensorflow::Status GetCalibNode(ConvertGraphParams* params) { for (auto in_edge : params->subgraph_incoming_edges) { // loop over incoming edges and // attach them to calib node - // tensorflow::Node* src_node = in_edge->src(); auto src_output = in_edge->src_output(); auto dst_node = in_edge->dst(); auto dst_input = in_edge->dst_input(); @@ -257,19 +259,24 @@ tensorflow::Status ConvertSubGraphToTensorRT(ConvertGraphParams* params) { for (size_t i = 0; i < params->subgraph_inputs.size(); ++i) { subgraph_edge_to_input_map.insert({params->subgraph_inputs.at(i), i}); } + std::set> unique_tensors; for (const tensorflow::Edge* edge : params->subgraph_incoming_edges) { std::pair old_src = {edge->src()->id(), edge->src_output()}; + if (unique_tensors.count(old_src)) continue; + unique_tensors.insert(old_src); int new_src_output = subgraph_edge_to_input_map.at(old_src); params->graph.AddEdge(edge->src(), edge->src_output(), trt_node, new_src_output); + VLOG(1) << "Wire " << edge->src()->name() << ":" << edge->src_output() + << " -> " << trt_node->name() << ":" << new_src_output; params->graph.RemoveEdge(edge); } - - VLOG(2) << "new wiring edges: " << trt_node->in_edges().size(); - for (const tensorflow::Edge* edge : trt_node->in_edges()) { - VLOG(2) << edge->src()->name() << " port: " << edge->src_output(); + if (VLOG_IS_ON(2)) { + VLOG(2) << "new edge count: " << trt_node->in_edges().size(); + for (const tensorflow::Edge* edge : trt_node->in_edges()) { + VLOG(2) << edge->src()->name() << " port: " << edge->src_output(); + } } - TF_RETURN_IF_ERROR(status); // Re-map outgoing edges to use the new TRT node instead of the orig subgraph @@ -283,6 +290,8 @@ tensorflow::Status ConvertSubGraphToTensorRT(ConvertGraphParams* params) { int new_src_output = subgraph_edge_to_output_map.at(old_src); TF_RETURN_IF_ERROR(params->graph.UpdateEdge( trt_node, new_src_output, edge->dst(), edge->dst_input())); + VLOG(1) << "Wire " << trt_node->name() << ":" << new_src_output << " -> " + << edge->dst()->name() << ":" << edge->dst_input(); } // Remove the original subgraph for (int node_id : params->subgraph_node_ids) { @@ -317,9 +326,12 @@ tensorflow::Status ConvertCalibGraphToInferGraph( tensorflow::GraphConstructorOptions(), graph_def, &graph)); // get calib nodes std::vector calib_nodes; - for (auto node : graph.op_nodes()) { + std::vector topo_order; + tensorflow::GetPostOrder(graph, &topo_order); + for (auto rit = topo_order.rbegin(); rit != topo_order.rend(); ++rit) { + auto node = *rit; if (node->type_string() == "TRTCalibOp") { - VLOG(1) << "Found Calib Node"; + VLOG(1) << "Found Calib Node " << node->name(); calib_nodes.push_back(node); } } diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 96e0700862..4e4d295538 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -362,10 +362,11 @@ void ReorderCKtoKC(const TRT_ShapedWeights& iweights, break; } case tensorflow::DataType::DT_HALF: { - Reorder2({k, c}, static_cast(iweights.GetValues()), - istrides, static_cast( - const_cast(oweights->GetValues())), - ostrides); + Reorder2( + {k, c}, static_cast(iweights.GetValues()), + istrides, + static_cast(const_cast(oweights->GetValues())), + ostrides); break; } default: @@ -1179,9 +1180,9 @@ tensorflow::Status BinaryTensorOpTensor( CHECK_EQ_TYPE(tensor_r->getType(), dtype); auto op_pair = ops.find(node_def.op()); if (op_pair == ops.end()) - return tensorflow::errors::Unimplemented("binary op: " + node_def.op() + - " not supported at: " + - node_def.name()); + return tensorflow::errors::Unimplemented( + "binary op: " + node_def.op() + + " not supported at: " + node_def.name()); nvinfer1::IElementWiseLayer* layer = ctx.network()->addElementWise( *const_cast(tensor_l), @@ -2138,9 +2139,7 @@ void Converter::register_op_converters() { } } // namespace -tensorflow::Status GetTensorRTGraph(tensorrt::convert::SubGraphParams& s) { - return tensorflow::errors::Unimplemented("Not implemented yet"); -} + tensorflow::Status ConvertCalibrationNodeToEngineNode( tensorflow::Graph& graph, tensorflow::Node* c_node) { const auto ndef = c_node->def(); @@ -2164,9 +2163,23 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode( for (auto n : graph.op_nodes()) { node_maps.insert({n->name(), n}); } + std::set subgraph_ids; + for (const auto internal_node : segment_nodes) { + subgraph_ids.insert(node_maps.at(internal_node)->id()); + } + if (VLOG_IS_ON(2)) { + string node_names = StrCat(c_node->name(), " segment nodes= "); + + for (const auto& node_name : segment_nodes) { + StrAppend(&node_names, node_name, ", "); + } + VLOG(2) << node_names; + } + VLOG(1) << "Output Nodes:"; std::vector out_types; std::vector out_edges; + for (auto& i : output_nodes) { auto node_port = tensorflow::str_util::Split(i, ":"); VLOG(1) << " " << i << " in graph " << node_maps.count(i); @@ -2186,18 +2199,24 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode( out_types.push_back(out_node->output_type(0)); } for (auto out_edge : out_node->out_edges()) { + if (subgraph_ids.count(out_edge->dst()->id())) + continue; // skip internal edges; if (out_edge->src_output() == port) { out_edges.push_back(out_edge); - break; + VLOG(1) << "OUTPUT EDGE " << out_edge->src()->name() << ":" + << out_edge->src_output() << " -> " << out_edge->dst()->name() + << ":" << out_edge->dst_input(); } } } else { LOG(WARNING) << " couldn't find output node " << out_node_name; } } - VLOG(1) << "Input Nodes:"; - for (auto& i : input_names) { - VLOG(1) << " " << i << " in graph " << node_maps.count(i); + if (VLOG_IS_ON(1)) { + VLOG(1) << c_node->name() << " Input Nodes:"; + for (auto& i : input_names) { + VLOG(1) << " Input " << i << " in graph " << node_maps.count(i); + } } auto trt_rm = tensorflow::tensorrt::TRTResourceManager::instance(); auto resmgr = trt_rm->getManager("TRTCalibOps"); @@ -2231,14 +2250,24 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode( calib_res->builder_ = nullptr; tensorflow::NodeDefBuilder op_builder(engine_name, "TRTEngineOp"); std::vector income_edges; + income_edges.resize(c_node->num_inputs()); for (const auto in_edge : c_node->in_edges()) { auto src = in_edge->src(); int dest_port = in_edge->dst_input(); - income_edges.emplace_back(src->name(), in_edge->src_output(), - c_node->input_type(dest_port)); + VLOG(1) << "Incoming connection " << src->name() << ":" + << in_edge->src_output() << " -> " << c_node->name() << ":" + << dest_port; + income_edges.at(dest_port) = {src->name(), in_edge->src_output(), + c_node->input_type(dest_port)}; } tensorflow::gtl::ArraySlice input_list( income_edges); + if (VLOG_IS_ON(2)) { + for (const auto& inp : input_list) { + VLOG(2) << " Input from inputlist " << inp.node << ":" << inp.index << " " + << tensorflow::DataTypeString(inp.data_type); + } + } op_builder.Input(input_list); tensorflow::NodeDef engine_node; const char* engine_plan_data = static_cast(engine_plan->data()); @@ -2255,13 +2284,26 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode( } auto trt_engine_node = graph.AddNode(engine_node, &status); TF_RETURN_IF_ERROR(status); - for (size_t i = 0; i < out_edges.size(); i++) { - VLOG(1) << "Connecting trt_engine_node output " << i << " with " - << out_edges.at(i)->dst()->name() << " port " - << out_edges.at(i)->dst_input(); - TF_RETURN_IF_ERROR(graph.UpdateEdge(trt_engine_node, i, - out_edges.at(i)->dst(), - out_edges.at(i)->dst_input())); + std::map port_map; + for (size_t t = 0; t < output_nodes.size(); t++) { + port_map.insert({output_nodes.at(t), t}); + } + for (auto& i : out_edges) { + string s(i->src()->name()); + if (i->src_output()) StrAppend(&s, ":", i->src_output()); + int out_port = port_map.at(s); + VLOG(1) << "Connecting " << trt_engine_node->name() << ":" << out_port + << " -> " << i->dst()->name() << ":" << i->dst_input(); + TF_RETURN_IF_ERROR( + graph.UpdateEdge(trt_engine_node, out_port, i->dst(), i->dst_input())); + } + for (const auto ed : trt_engine_node->in_edges()) { + VLOG(1) << "In Edge " << ed->src()->name() << ":" << ed->src_output() + << " -> " << ed->dst()->name() << ":" << ed->dst_input(); + } + for (const auto ed : trt_engine_node->out_edges()) { + VLOG(1) << "Out Edge " << ed->src()->name() << ":" << ed->src_output() + << " -> " << ed->dst()->name() << ":" << ed->dst_input(); } VLOG(1) << "Segment nodes:"; for (auto& i : segment_nodes) { @@ -2332,6 +2374,7 @@ tensorflow::Status ConvertSubgraph( std::vector* output_names, std::vector* output_dtypes, const string& engine_name) { + std::set added_tensors; for (const std::pair& input : s.input_inds) { VLOG(2) << "parsing input. Node id= " << input.first; int node_id = input.first; @@ -2374,7 +2417,6 @@ tensorflow::Status ConvertSubgraph( auto op_info = op_info_vec.at(shape_inference_output_idx); tensorflow::DataType tf_dtype = op_info.dtype(); - input_dtypes->push_back(tf_dtype); nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); auto type_status = ConvertDType(tf_dtype, &dtype); @@ -2410,8 +2452,10 @@ tensorflow::Status ConvertSubgraph( if (output_idx != 0) { input_tensor_name = StrCat(node_name, ":", output_idx); } - + if (added_tensors.count(input_tensor_name)) continue; + added_tensors.insert(input_tensor_name); input_names->push_back(input_tensor_name); + input_dtypes->push_back(tf_dtype); nvinfer1::ITensor* input_tensor = converter.network()->addInput( input_tensor_name.c_str(), dtype, input_dim_pseudo_chw); @@ -2435,6 +2479,7 @@ tensorflow::Status ConvertSubgraph( // Gather output metadata int trt_engine_op_output_idx = 0; + added_tensors.clear(); for (const std::pair& output : s.output_inds) { int node_id = output.first; int output_idx = output.second; @@ -2451,6 +2496,8 @@ tensorflow::Status ConvertSubgraph( if (output_idx != 0) tensorflow::strings::StrAppend(&tensor_name, ":", output_idx); VLOG(2) << "Output tensor name: " << tensor_name; + if (added_tensors.count(tensor_name)) continue; + added_tensors.insert(tensor_name); output_names->push_back(tensor_name); auto tensor_or_weights = converter.get_tensor(tensor_name); if (!tensor_or_weights.is_tensor()) { diff --git a/tensorflow/contrib/tpu/python/tpu/datasets.py b/tensorflow/contrib/tpu/python/tpu/datasets.py index 2e472a2805..d879170b68 100644 --- a/tensorflow/contrib/tpu/python/tpu/datasets.py +++ b/tensorflow/contrib/tpu/python/tpu/datasets.py @@ -166,11 +166,21 @@ def StreamingFilesDataset(files, return remote_iterator.get_next() def MapFn(unused_input): - return functional_ops.remote_call( + if isinstance(source_dataset.output_types, dtypes.DType): + output_types = [source_dataset.output_types] + elif isinstance(source_dataset.output_types, (list, tuple)): + output_types = source_dataset.output_types + else: + raise ValueError('source dataset has invalid output types') + remote_calls = functional_ops.remote_call( args=[source_handle], - Tout=[dtypes.string], + Tout=output_types, f=LoadingFunc, - target='/job:%s/replica:0/task:0/cpu:0' % file_reader_job)[0] + target='/job:%s/replica:0/task:0/cpu:0' % file_reader_job) + if len(remote_calls) == 1: + return remote_calls[0] + else: + return remote_calls with ops.device('/job:%s' % worker_job): output_dataset = dataset_ops.Dataset.range(2).repeat().map( diff --git a/tensorflow/contrib/tpu/python/tpu/datasets_test.py b/tensorflow/contrib/tpu/python/tpu/datasets_test.py index 918cf0ed8e..b58d05eac5 100644 --- a/tensorflow/contrib/tpu/python/tpu/datasets_test.py +++ b/tensorflow/contrib/tpu/python/tpu/datasets_test.py @@ -26,6 +26,8 @@ from tensorflow.core.protobuf import config_pb2 from tensorflow.python.client import session from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.ops import readers +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import tensor_shape from tensorflow.python.lib.io import python_io from tensorflow.python.platform import test from tensorflow.python.training import server_lib @@ -162,6 +164,30 @@ class DatasetsTest(test.TestCase): self.assertEqual(set(all_contents), set(retrieved_values)) + def testArbitraryReaderFuncFromDatasetGenerator(self): + + def my_generator(): + yield (1, [1] * 10) + + def gen_dataset(dummy): + return dataset_ops.Dataset.from_generator( + my_generator, (dtypes.int64, dtypes.int64), + (tensor_shape.TensorShape([]), tensor_shape.TensorShape([10]))) + + dataset = datasets.StreamingFilesDataset( + dataset_ops.Dataset.range(10), filetype=gen_dataset) + + iterator = dataset.make_initializable_iterator() + self._sess.run(iterator.initializer) + get_next = iterator.get_next() + + retrieved_values = self._sess.run(get_next) + + self.assertIsInstance(retrieved_values, (list, tuple)) + self.assertEqual(len(retrieved_values), 2) + self.assertEqual(retrieved_values[0], 1) + self.assertItemsEqual(retrieved_values[1], [1] * 10) + def testUnexpectedFiletypeString(self): with self.assertRaises(ValueError): datasets.StreamingFilesDataset( diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index c72ba2daff..a0cf59852b 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -700,7 +700,9 @@ cc_library( srcs = ["platform/stacktrace_handler.cc"], hdrs = ["platform/stacktrace_handler.h"], deps = [ + ":abi", ":lib_platform", + ":stacktrace", ], ) @@ -3090,6 +3092,8 @@ cc_library( # we now need at least "str_util". ":lib", ":lib_platform", + ":stacktrace_handler", + ":test_lite", "//tensorflow/core/platform/default/build_config:test_lite_main", ], alwayslink = 1, @@ -3570,7 +3574,10 @@ tf_cc_tests_gpu( tf_cc_test_mkl( name = "mkl_runtime_tests", size = "small", - srcs = ["common_runtime/mkl_cpu_allocator_test.cc"], + srcs = [ + "common_runtime/mkl_cpu_allocator_test.cc", + "common_runtime/mkl_threadpool_device_test.cc", + ], linkstatic = 1, deps = [ ":core", diff --git a/tensorflow/core/api_def/base_api/api_def_Selu.pbtxt b/tensorflow/core/api_def/base_api/api_def_Selu.pbtxt index cbe76de415..985f09312f 100644 --- a/tensorflow/core/api_def/base_api/api_def_Selu.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_Selu.pbtxt @@ -4,6 +4,10 @@ op { description: < 0`, limit of the split of the result. +END + } + summary: "Split elements of `source` based on `sep` into a `SparseTensor`." + description: <2<><>3"` and +sep of `"<>"` returns `["1", "2", "", "3"]`. If `sep` is None or an empty +string, consecutive whitespace are regarded as a single separator, and the +result will contain no empty strings at the startor end if the string has +leading or trailing whitespace. + +Note that the above mentioned behavior matches python's str.split. +END +} diff --git a/tensorflow/core/api_def/python_api/api_def_StringSplitV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_StringSplitV2.pbtxt new file mode 100644 index 0000000000..0e8576fb01 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_StringSplitV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "StringSplitV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/common_runtime/bfc_allocator.cc b/tensorflow/core/common_runtime/bfc_allocator.cc index 8f2a419756..9cda17867b 100644 --- a/tensorflow/core/common_runtime/bfc_allocator.cc +++ b/tensorflow/core/common_runtime/bfc_allocator.cc @@ -86,7 +86,7 @@ BFCAllocator::Chunk* BFCAllocator::ChunkFromHandle(ChunkHandle h) { return &(chunks_[h]); } -bool BFCAllocator::Extend(size_t rounded_bytes) { +bool BFCAllocator::Extend(size_t alignment, size_t rounded_bytes) { size_t available_bytes = memory_limit_ - total_region_allocated_bytes_; // Rounds available_bytes down to the nearest multiple of kMinAllocationSize. available_bytes = (available_bytes / kMinAllocationSize) * kMinAllocationSize; @@ -108,7 +108,7 @@ bool BFCAllocator::Extend(size_t rounded_bytes) { // Try allocating. size_t bytes = std::min(curr_region_allocation_bytes_, available_bytes); - void* mem_addr = suballocator_->Alloc(32, bytes); + void* mem_addr = suballocator_->Alloc(alignment, bytes); if (mem_addr == nullptr && !started_backpedal_) { // Only backpedal once. started_backpedal_ = true; @@ -119,7 +119,7 @@ bool BFCAllocator::Extend(size_t rounded_bytes) { while (mem_addr == nullptr) { bytes = RoundedBytes(bytes * kBackpedalFactor); if (bytes < rounded_bytes) break; - mem_addr = suballocator_->Alloc(32, bytes); + mem_addr = suballocator_->Alloc(alignment, bytes); } } @@ -261,7 +261,7 @@ void* BFCAllocator::AllocateRawInternal(size_t unused_alignment, } // Try to extend - if (Extend(rounded_bytes)) { + if (Extend(unused_alignment, rounded_bytes)) { ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes); if (ptr != nullptr) { return ptr; diff --git a/tensorflow/core/common_runtime/bfc_allocator.h b/tensorflow/core/common_runtime/bfc_allocator.h index ba5a3eea3a..52aedb1e9c 100644 --- a/tensorflow/core/common_runtime/bfc_allocator.h +++ b/tensorflow/core/common_runtime/bfc_allocator.h @@ -305,7 +305,8 @@ class BFCAllocator : public VisitableAllocator { // Try to add a new memory region that can satisfy an allocation of // 'rounded_bytes' bytes. Returns true on success and false on // failure. - bool Extend(size_t rounded_bytes) EXCLUSIVE_LOCKS_REQUIRED(lock_); + bool Extend(size_t alignment, size_t rounded_bytes) + EXCLUSIVE_LOCKS_REQUIRED(lock_); // Returns a pointer to an underlying allocated chunk of size // 'rounded_bytes'. diff --git a/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc b/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc index 6e08e33f8e..486f0be698 100644 --- a/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc +++ b/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc @@ -105,9 +105,25 @@ TEST(DirectSessionWithTrackingAllocTest, CostModelTest) { EXPECT_EQ(2, shape.dim(0).size()); EXPECT_EQ(1, shape.dim(1).size()); if (node->name() == y->name()) { +#ifdef INTEL_MKL + // if MKL is used, it goes through various additional + // graph rewrite pass. In TF, everytime a graph pass + // happens, "constant" nodes are allocated + // and deallocated. Each allocation calls the + // (FindChunkPtr of BFCAllocator), + // which increments the value of AllocationId. + // Thus AllocationId becomes more than 3 and 4 if + // MKL is used. Now they are 9 and 10 for MKL. + EXPECT_EQ(19, cm->AllocationId(node, 0)); +#else EXPECT_EQ(21, cm->AllocationId(node, 0)); +#endif } else { +#ifdef INTEL_MKL + EXPECT_EQ(20, cm->AllocationId(node, 0)); +#else EXPECT_EQ(22, cm->AllocationId(node, 0)); +#endif } } EXPECT_LE(0, cm->MaxExecutionTime(node)); diff --git a/tensorflow/core/common_runtime/mkl_threadpool_device_test.cc b/tensorflow/core/common_runtime/mkl_threadpool_device_test.cc new file mode 100644 index 0000000000..5d583a8360 --- /dev/null +++ b/tensorflow/core/common_runtime/mkl_threadpool_device_test.cc @@ -0,0 +1,53 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifdef INTEL_MKL + +#include "tensorflow/core/common_runtime/threadpool_device.h" + +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/cpu_info.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/public/session_options.h" + +namespace tensorflow { + +#ifdef _OPENMP +TEST(MKLThreadPoolDeviceTest, TestOmpDefaults) { + SessionOptions options; + unsetenv("OMP_NUM_THREADS"); + + ThreadPoolDevice* tp = new ThreadPoolDevice( + options, "/device:CPU:0", Bytes(256), DeviceLocality(), cpu_allocator()); + + const int ht = port::NumHyperthreadsPerCore(); + EXPECT_EQ(omp_get_max_threads(), (port::NumSchedulableCPUs() + ht - 1) / ht); +} + +TEST(MKLThreadPoolDeviceTest, TestOmpPreSets) { + SessionOptions options; + setenv("OMP_NUM_THREADS", "314", 1); + + ThreadPoolDevice* tp = new ThreadPoolDevice( + options, "/device:CPU:0", Bytes(256), DeviceLocality(), cpu_allocator()); + + EXPECT_EQ(omp_get_max_threads(), 314); +} +#endif // _OPENMP + +} // namespace tensorflow + +#endif // INTEL_MKL diff --git a/tensorflow/core/common_runtime/process_util.cc b/tensorflow/core/common_runtime/process_util.cc index 21912236d0..a5d31b75c7 100644 --- a/tensorflow/core/common_runtime/process_util.cc +++ b/tensorflow/core/common_runtime/process_util.cc @@ -16,8 +16,10 @@ limitations under the License. #include "tensorflow/core/common_runtime/process_util.h" #ifdef INTEL_MKL +#ifdef _OPENMP #include -#endif +#endif // _OPENMP +#endif // INTEL_MKL #include #include "tensorflow/core/lib/core/threadpool.h" @@ -57,7 +59,10 @@ int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) { // MKL library executes ops in parallel using OMP threads // Set inter_op conservatively to avoid thread oversubscription that could // lead to severe perf degradations and OMP resource exhaustion - const int mkl_intra_op = omp_get_max_threads(); + int mkl_intra_op = 1; +#ifdef _OPENMP + mkl_intra_op = omp_get_max_threads(); +#endif // _OPENMP CHECK_GE(mkl_intra_op, 1); const int32 mkl_inter_op = std::max( (port::NumSchedulableCPUs() + mkl_intra_op - 1) / mkl_intra_op, 2); @@ -68,7 +73,7 @@ int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) { #else // Default to using the number of cores available in the process. return port::NumSchedulableCPUs(); -#endif +#endif // INTEL_MKL } thread::ThreadPool* NewThreadPoolFromSessionOptions( diff --git a/tensorflow/core/common_runtime/threadpool_device.cc b/tensorflow/core/common_runtime/threadpool_device.cc index f7a07fe503..74a87215e1 100644 --- a/tensorflow/core/common_runtime/threadpool_device.cc +++ b/tensorflow/core/common_runtime/threadpool_device.cc @@ -31,7 +31,11 @@ limitations under the License. #include "tensorflow/core/public/session_options.h" #ifdef INTEL_MKL +#ifdef _OPENMP +#include +#endif #include "tensorflow/core/common_runtime/mkl_cpu_allocator.h" +#include "tensorflow/core/platform/cpu_info.h" #endif namespace tensorflow { @@ -43,7 +47,26 @@ ThreadPoolDevice::ThreadPoolDevice(const SessionOptions& options, : LocalDevice(options, Device::BuildDeviceAttributes( name, DEVICE_CPU, memory_limit, locality)), allocator_(allocator), - scoped_allocator_mgr_(new ScopedAllocatorMgr(name)) {} + scoped_allocator_mgr_(new ScopedAllocatorMgr(name)) { +#ifdef INTEL_MKL +#ifdef _OPENMP + const char* user_omp_threads = getenv("OMP_NUM_THREADS"); + if (user_omp_threads == nullptr) { + // OMP_NUM_THREADS controls MKL's intra-op parallelization + // Default to available physical cores + const int mkl_intra_op = port::NumSchedulableCPUs(); + const int ht = port::NumHyperthreadsPerCore(); + omp_set_num_threads((mkl_intra_op + ht - 1) / ht); + } else { + uint64 user_val = 0; + if (strings::safe_strtou64(user_omp_threads, &user_val)) { + // Superflous but triggers OpenMP loading + omp_set_num_threads(user_val); + } + } +#endif // _OPENMP +#endif // INTEL_MKL +} ThreadPoolDevice::~ThreadPoolDevice() {} diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.cc b/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.cc index 1cea1b1462..770a0fcf14 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.cc @@ -147,7 +147,9 @@ MasterService::Stub::Stub( } MasterService::AsyncService::AsyncService() { - for (int i = 0; i < 10; ++i) { + int method_len = sizeof(grpcMasterService_method_names) / + sizeof(grpcMasterService_method_names[0]); + for (int i = 0; i < method_len; ++i) { AddMethod(new ::grpc::internal::RpcServiceMethod( grpcMasterService_method_names[i], ::grpc::internal::RpcMethod::NORMAL_RPC, nullptr)); diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_testlib.cc b/tensorflow/core/distributed_runtime/rpc/grpc_testlib.cc index 89f83f9f24..a8508d2d4f 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_testlib.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_testlib.cc @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/core/distributed_runtime/rpc/grpc_session.h" #include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/env.h" #include "tensorflow/core/util/device_name_utils.h" namespace tensorflow { @@ -50,9 +51,14 @@ Status TestCluster::MakeTestCluster(const SessionOptions& options, int n, } for (int i = 0; i < n; ++i) { + string server_file = + strings::StrCat(testing::TensorFlowSrcRoot(), + "/core/distributed_runtime/rpc/grpc_testlib_server"); + if (!options.env->FileExists(server_file).ok()) { + return errors::Internal("Could not find grpc_testlib_server"); + } const std::vector argv( - {strings::StrCat(testing::TensorFlowSrcRoot(), - "/core/distributed_runtime/rpc/grpc_testlib_server"), + {server_file, /* see grpc_testlib_server.cc for flags */ tf_jobs, "--tf_job=localhost", strings::StrCat("--tf_task=", i), strings::StrCat("--num_cpus=", num_cpus), diff --git a/tensorflow/core/framework/allocator.h b/tensorflow/core/framework/allocator.h index 2c87156dca..2bb4d32d57 100644 --- a/tensorflow/core/framework/allocator.h +++ b/tensorflow/core/framework/allocator.h @@ -67,13 +67,8 @@ struct AllocatorStats { // device memory. class Allocator { public: -#ifdef EIGEN_VECTORIZE_AVX512 // Align to 64 byte boundary. static constexpr size_t kAllocatorAlignment = 64; -#else - // Align to 32 byte boundary. - static constexpr size_t kAllocatorAlignment = 32; -#endif virtual ~Allocator(); diff --git a/tensorflow/core/framework/op_gen_lib.cc b/tensorflow/core/framework/op_gen_lib.cc index 3d7920a6e2..4b56d807df 100644 --- a/tensorflow/core/framework/op_gen_lib.cc +++ b/tensorflow/core/framework/op_gen_lib.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/core/framework/op_gen_lib.h" +#include #include #include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/lib/core/errors.h" diff --git a/tensorflow/core/framework/remote_fused_graph_execute_info.proto b/tensorflow/core/framework/remote_fused_graph_execute_info.proto index eb689ec1e6..10072724d2 100644 --- a/tensorflow/core/framework/remote_fused_graph_execute_info.proto +++ b/tensorflow/core/framework/remote_fused_graph_execute_info.proto @@ -5,7 +5,7 @@ option cc_enable_arenas = true; option java_outer_classname = "RemoteFusedGraphExecuteInfoProto"; option java_multiple_files = true; option java_package = "org.tensorflow.framework"; -//add go_package externally +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework"; import "tensorflow/core/framework/graph.proto"; import "tensorflow/core/framework/tensor_shape.proto"; import "tensorflow/core/framework/types.proto"; diff --git a/tensorflow/core/framework/tensor_test.cc b/tensorflow/core/framework/tensor_test.cc index b613effd18..80e168df97 100644 --- a/tensorflow/core/framework/tensor_test.cc +++ b/tensorflow/core/framework/tensor_test.cc @@ -1147,29 +1147,29 @@ TEST(Tensor, FailureToAllocate) { // On the alignment. // -// As of 2015/8, tensorflow::Tensor allocates its buffer with 32-byte +// As of 2018/5, tensorflow::Tensor allocates its buffer with 64-byte // alignment. Tensor::tensor/flat/vec/matrix methods requires the // buffer satisfies Eigen::Aligned (e.g., 16-bytes aligned usually, -// and 32-bytes for AVX). Tensor::Slice requires the caller to ensure -// its result is aligned if the caller intends to use those methods. -// In this test case, we simply make sure each slice is 32-byte -// aligned: sizeof(float) * 4 * 2 = 32. +// 32-bytes for AVX, and 64-bytes for AVX512). Tensor::Slice requires +// the caller to ensure its result is aligned if the caller intends +// to use those methods. In this test case, we simply make sure each +// slice is 64-byte aligned: sizeof(float) * 4 * 36 = 576. 576 % 64 = 0. TEST(Tensor, Slice_Basic) { Tensor saved; { // General - Tensor x(DT_FLOAT, TensorShape({10, 4, 34})); + Tensor x(DT_FLOAT, TensorShape({10, 4, 36})); // Fills in known values. for (int i = 0; i < 10; ++i) { x.Slice(i, i + 1).flat().setConstant(i * 1.f); } // A simple slice along dim0. Tensor y = x.Slice(4, 8); - EXPECT_TRUE(y.shape().IsSameSize(TensorShape({4, 4, 34}))); + EXPECT_TRUE(y.shape().IsSameSize(TensorShape({4, 4, 36}))); auto tx = x.tensor(); auto ty = y.tensor(); for (int i = 0; i < 4; ++i) { for (int j = 0; j < 4; ++j) { - for (int k = 0; k < 34; ++k) { + for (int k = 0; k < 36; ++k) { EXPECT_EQ(ty(i, j, k), 4.0 + i); EXPECT_EQ(&tx(4 + i, j, k), &ty(i, j, k)); } @@ -1186,7 +1186,7 @@ TEST(Tensor, Slice_Basic) { auto tz = z.tensor(); EXPECT_EQ(1, z.dim_size(0)); for (int j = 0; j < 4; ++j) { - for (int k = 0; k < 34; ++k) { + for (int k = 0; k < 36; ++k) { EXPECT_EQ(tz(0, j, k), 6.0); } } @@ -1198,16 +1198,16 @@ TEST(Tensor, Slice_Basic) { EXPECT_EQ(1, saved.dim_size(0)); auto tsaved = saved.tensor(); for (int j = 0; j < 4; ++j) { - for (int k = 0; k < 34; ++k) { + for (int k = 0; k < 36; ++k) { EXPECT_EQ(tsaved(0, j, k), 6.0); } } } { // Empty - Tensor x(DT_FLOAT, TensorShape({10, 0, 34})); + Tensor x(DT_FLOAT, TensorShape({10, 0, 36})); x.flat().setRandom(); Tensor y = x.Slice(4, 8); - EXPECT_TRUE(y.shape().IsSameSize(TensorShape({4, 0, 34}))); + EXPECT_TRUE(y.shape().IsSameSize(TensorShape({4, 0, 36}))); } { diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 72a13d4da7..b9667998d6 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -2691,14 +2691,14 @@ class MklLayoutRewritePass : public GraphOptimizationPass { // If Op has been specifically assigned to a non-CPU device, then No. if (!n->assigned_device_name().empty() && - !str_util::StrContains(n->assigned_device_name(),kCPUDeviceSubStr)) { + !str_util::StrContains(n->assigned_device_name(), kCPUDeviceSubStr)) { result = false; reason = "Op has been assigned a runtime device that is not CPU."; } // If user has specifically assigned this op to a non-CPU device, then No. if (!n->def().device().empty() && - !str_util::StrContains(n->def().device(),kCPUDeviceSubStr)) { + !str_util::StrContains(n->def().device(), kCPUDeviceSubStr)) { result = false; reason = "User has assigned a device that is not CPU."; } @@ -2865,9 +2865,9 @@ class MklLayoutRewritePass : public GraphOptimizationPass { return false; } - // If the depth_radius of LRN is not 2, then MKL DNN takes unoptimized - // path. The unoptimized path is slow. Thus we dont rewrite the node - // and use default Eigen. But for depth_radius=2, MKL DNN optimized + // If the depth_radius of LRN is not 2, then MKL DNN takes unoptimized + // path. The unoptimized path is slow. Thus we dont rewrite the node + // and use default Eigen. But for depth_radius=2, MKL DNN optimized // path is taken, i.e., eigen node is rewritten by MKl DNN node. static bool LrnRewrite(const Node* n) { CHECK_NOTNULL(n); @@ -2876,13 +2876,13 @@ class MklLayoutRewritePass : public GraphOptimizationPass { CHECK_EQ(GetNodeAttr(n->def(), "depth_radius", &depth_radius).ok(), true); // if the depth_radius of LRN is not 2, don't rewrite the node by MKL DNN - // and use eigen node instead + // and use eigen node instead if (depth_radius == 2) { return true; } VLOG(1) << "LrnRewrite: The model sets depth_radius as not 2 which" << "case is not optimized by Intel MKL, thus using Eigen op" - << "for LRN " ; + << "for LRN "; return false; } @@ -3015,6 +3015,35 @@ class MklLayoutRewritePass : public GraphOptimizationPass { std::vector* ws_tensors, bool* are_ws_tensors_added); + // Helper function used by FixMklMetaDataEdges. Fixes the metadata edge + // pointed by 'e_metadata' corresponding to the data edge 'e_data' in graph + // 'g'. Returns true is fixup was done; otherwise, it returns false. + bool FixMklMetaDataEdgeIfNeeded(std::unique_ptr* g, + const Edge* e_data, const Edge* e_metadata); + + // Are the input Mkl metadata edges for node 'n' in graph 'g' correctly + // connected? If not, then fix them. This is needed because a graph may have + // some input Mkl metadata edges incorrectly setup after node merge and + // rewrite passes. This could happen because GetReversePostOrder function may + // not provide topologically sorted order if a graph contains cycles. The + // function returns true if at least one Mkl metadata edge for node 'n' was + // fixed. Otherwise, it returns false. + // + // Example: + // + // X = MklConv2D(_, _, _) + // Y = MklConv2DWithBias(_, _, _, _, _, _) + // Z = MklAdd(X, Y, DummyMklTensor, Y:1) + // + // For a graph such as shown above, note that 3rd argument of MklAdd contains + // DummyMklTensor. Actually, it should be getting the Mkl metadata from + // MklConv2D op (specifically, X:2). This incorrect plumbing could be possible + // (although rare) if the Mkl NodeMerge + NodeRewrite passes visit Z before X + // (possible if X, Y, Z are part of a loop.) This function fixes the Mkl + // metadata edges only - it does not rewrite nodes nor does it modify the Mkl + // data edges (1st and 2nd arguments of MklAdd). + bool FixMklMetaDataEdges(std::unique_ptr* g, Node* n); + // Functions specific to operators to copy attributes // We need operator-specific function to copy attributes because the framework // does not provide any generic function for it. @@ -4241,6 +4270,92 @@ MklLayoutRewritePass::CheckForNodeRewrite(const Node* n) const { return nullptr; } +/////////////////////////////////////////////////////////////////////////////// +// Post-rewrite Mkl metadata fixup pass +/////////////////////////////////////////////////////////////////////////////// +bool MklLayoutRewritePass::FixMklMetaDataEdgeIfNeeded(std::unique_ptr* g, + const Edge* e_data, const Edge* e_metadata) { + if (g == nullptr || e_data == nullptr || e_metadata == nullptr) { + return false; + } + + Node* n_data = e_data->src(); + int n_data_op_slot = e_data->src_output(); + int n_metadata_op_slot = GetTensorMetaDataIndex(n_data_op_slot, + n_data->num_outputs()); + + // If the source of meta edge is a constant node (producing dummy Mkl metadata + // tensor), then we will need to fix. + if (IsConstant(e_metadata->src())) { + Node* e_metadata_dst = e_metadata->dst(); + int e_metadata_in_slot = e_metadata->dst_input(); + CHECK_NOTNULL((*g)->AddEdge(n_data, n_metadata_op_slot, + e_metadata_dst, e_metadata_in_slot)); + + (*g)->RemoveEdge(e_metadata); + return true; + } + + return false; +} + +bool MklLayoutRewritePass::FixMklMetaDataEdges(std::unique_ptr* g, + Node* n) { + bool result = false; + + // If graph node is not Mkl node, then return. + DataType T = DT_INVALID; + if (!GetNodeAttr(n->def(), "T", &T).ok() || + !mkl_op_registry::IsMklOp(n->type_string(), T)) { + return result; + } + + // If it is Mkl node, then check if the input edges to this node that carry + // Mkl metadata are linked up correctly with the source node. + + // For Mkl nodes, we generate twice the number of input tensors (n for Mkl + // data tensors + n for Mkl metadata tensors). We need to check for correct + // connection of n metadata tensors only. + int num_data_inputs = n->num_inputs() / 2; + for (int idx = 0; idx < num_data_inputs; idx++) { + // Get the edge connecting input slot with index (idx). + const Edge* e = nullptr; + TF_CHECK_OK(n->input_edge(idx, &e)); + + // If e is control edge, then skip. + if (e->IsControlEdge()) { + continue; + } + + // Check that the source node for edge 'e' is Mkl node. If it is not an Mkl + // node, then we don't need to do anything. + Node* e_src = e->src(); + if (GetNodeAttr(e_src->def(), "T", &T).ok() && + mkl_op_registry::IsMklOp(e_src->type_string(), T)) { + // Source node for edge 'e' is Mkl node. + // Destination node and destination input slot of e is node 'n' and 'idx' + // resp. + CHECK_EQ(e->dst(), n); + CHECK_EQ(e->dst_input(), idx); + + // Let's get edge that carries Mkl metadata corresponding to Mkl data edge + // 'e'. For that, let's first get the input slot of 'n' where the meta + // edge will feed the value. + int e_meta_in_slot = GetTensorMetaDataIndex(e->dst_input(), + n->num_inputs()); + const Edge* e_meta = nullptr; + TF_CHECK_OK(n->input_edge(e_meta_in_slot, &e_meta)); + + // Let's check if we need to fix this meta edge. + if (FixMklMetaDataEdgeIfNeeded(g, e, e_meta)) { + result = true; + } + } + } + + return result; +} + /////////////////////////////////////////////////////////////////////////////// // Run function for the pass /////////////////////////////////////////////////////////////////////////////// @@ -4307,6 +4422,25 @@ bool MklLayoutRewritePass::RunPass(std::unique_ptr* g) { DumpGraph("After running MklLayoutRewritePass(NodeMerge+Rewrite)", &**g); + order.clear(); + GetReversePostOrder(**g, &order); // This will give us topological sort. + for (Node* n : order) { + // If node is not an op or it cannot run on CPU device, then skip. + if (!n->IsOp() || !CanOpRunOnCPUDevice(n)) { + continue; + } + if (FixMklMetaDataEdges(g, n)) { + string node_name = n->name(); + string op_name = n->type_string(); + + VLOG(1) << "MklLayoutRewritePass: fixed metadata edges for node " + << node_name << " with op " << op_name; + result = true; + } + } + DumpGraph("After running MklLayoutRewritePass(NodeMerge+Rewrite+Fixup)", + &**g); + return result; } diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc index 029cdcf94a..7645b4a7f0 100644 --- a/tensorflow/core/graph/mkl_layout_pass_test.cc +++ b/tensorflow/core/graph/mkl_layout_pass_test.cc @@ -3518,6 +3518,37 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_DeviceTest) { "B->C:1;C->E;D->E:1;E->Z;M->C:2;N->C:3;Y->Z:1"); } +///////////////////////////////////////////////////////////////////// +// Post-rewrite fixup pass test + +TEST_F(MklLayoutPassTest, PostRewriteFixUpPass) { + InitGraph( + "node { name: 'A' op: 'Input'}" + "node { name: 'B' op: 'Input'}" + "node { name: 'M' op: '_MklInput'}" + "node { name: 'N' op: '_MklInput'}" + "node { name: 'C' op: '_MklConv2D'" + " attr { key: 'T' value { type: DT_FLOAT } }" + " attr { key: 'data_format' value { s: 'NCHW' } }" + " attr { key: 'use_cudnn_on_gpu' value { b: false } }" + " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" + " attr { key: 'padding' value { s: 'SAME' } }" + " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" + " input: ['A', 'B', 'M', 'N']}" + "node { name: 'D' op: 'Const' " + " attr { key: 'dtype' value { type: DT_UINT8 } }" + " attr { key: 'value' value { " + " tensor { dtype: DT_UINT8 tensor_shape { dim { size: 1 } } " + " int_val: 0 } } } }" + "node { name: 'E' op: '_MklAdd'" + " attr {key: 'T' value { type: DT_FLOAT } }" + " input: ['C', 'A', 'D', 'D']}"); + EXPECT_EQ(DoMklLayoutOptimizationPass(), + "A(Input);B(Input);C(_MklConv2D);D(Const);E(_MklAdd);" + "M(_MklInput);N(_MklInput)|A->C;A->E:1;B->C:1;C->E;C:2->E:2;" + "D->E:3;M->C:2;N->C:3"); +} + ///////////////////////////////////////////////////////////////////// static void BM_MklLayoutRewritePass(int iters, int op_nodes) { diff --git a/tensorflow/core/grappler/clusters/single_machine_test.cc b/tensorflow/core/grappler/clusters/single_machine_test.cc index 352f08fede..31b19cfcfd 100644 --- a/tensorflow/core/grappler/clusters/single_machine_test.cc +++ b/tensorflow/core/grappler/clusters/single_machine_test.cc @@ -546,7 +546,7 @@ TEST_F(SingleMachineTest, ReleaseMemoryAfterDestruction) { TF_CHECK_OK(cluster_->GetPeakMemoryUsage(&device_peak_memory_before)); EXPECT_EQ(device_peak_memory_before.size(), 1); // There might be a bit memory used before session's running anything. - EXPECT_LT(device_peak_memory_before.begin()->second, 200); + EXPECT_LT(device_peak_memory_before.begin()->second, 400); RunMetadata metadata; TF_CHECK_OK(cluster_->Run(item.graph, item.feed, item.fetch, &metadata)); @@ -567,8 +567,8 @@ TEST_F(SingleMachineTest, ReleaseMemoryAfterDestruction) { // Check memory used by resources are released after cluster destruction. EXPECT_EQ(device_peak_memory_before.size(), 1); EXPECT_EQ(device_peak_memory_after.size(), 1); - EXPECT_LT(device_peak_memory_before.begin()->second, 200); - EXPECT_LT(device_peak_memory_after.begin()->second, 200); + EXPECT_LT(device_peak_memory_before.begin()->second, 400); + EXPECT_LT(device_peak_memory_after.begin()->second, 400); } TEST_F(SingleMachineTest, PeakMemory) { @@ -597,7 +597,7 @@ TEST_F(SingleMachineTest, PeakMemory) { device_peak_memory.end()); cpu_memory = device_peak_memory["/job:localhost/replica:0/task:0/device:CPU:0"]; - EXPECT_LT(cpu_memory, 100); + EXPECT_LT(cpu_memory, 200); } TEST_F(SingleMachineTest, PeakMemoryStatsNotEnabled) { diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc index 6749a7c571..0c02876ac5 100644 --- a/tensorflow/core/grappler/costs/graph_properties.cc +++ b/tensorflow/core/grappler/costs/graph_properties.cc @@ -610,7 +610,6 @@ class SymbolicShapeRefiner { } }; - // Compute the shape of the tensors outputed by node 'node' at output port // 'port_index' as the union of shape1 and shape2. ShapeHandle OutputAsUnion(const NodeDef* node, int port_index, ShapeHandle shape1, ShapeHandle shape2) { diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 1b18087cdf..8ca726df0b 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -679,6 +679,7 @@ cc_library( deps = [ ":constant_folding", ":graph_optimizer", + "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core/grappler:graph_view", "//tensorflow/core/grappler:grappler_item", @@ -780,7 +781,6 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", - "//tensorflow/core:scoped_allocator_ops_op_lib", "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:op_types", "//tensorflow/core/grappler:utils", diff --git a/tensorflow/core/grappler/optimizers/remapper.cc b/tensorflow/core/grappler/optimizers/remapper.cc index 4dde7ed1b4..03e36a7b9c 100644 --- a/tensorflow/core/grappler/optimizers/remapper.cc +++ b/tensorflow/core/grappler/optimizers/remapper.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/grappler/op_types.h" #include "tensorflow/core/grappler/optimizers/constant_folding.h" #include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/platform/logging.h" namespace tensorflow { namespace grappler { @@ -200,8 +201,7 @@ Status Remapper::Optimize(Cluster* /*cluster*/, const GrapplerItem& item, } } if (optimizable) { - VLOG(2) << "Optimizing fused batch norm node " << node.DebugString() - << std::endl; + VLOG(1) << "Optimizing fused batch norm node " << node.DebugString(); AddBatchNormNodes(optimized_graph, node); continue; } diff --git a/tensorflow/core/kernels/as_string_op.cc b/tensorflow/core/kernels/as_string_op.cc index 66c4aff3e3..a7757d1361 100644 --- a/tensorflow/core/kernels/as_string_op.cc +++ b/tensorflow/core/kernels/as_string_op.cc @@ -73,6 +73,7 @@ class AsStringOp : public OpKernel { } switch (dtype) { case DT_INT8: + case DT_INT16: case DT_INT32: strings::Appendf(&format_, "d"); break; @@ -129,6 +130,7 @@ class AsStringOp : public OpKernel { ENCODE_TYPE(DT_FLOAT, float, format_); ENCODE_TYPE(DT_DOUBLE, double, format_); ENCODE_TYPE(DT_INT8, int8, format_); + ENCODE_TYPE(DT_INT16, int16, format_); case (DT_BOOL): { const auto& input_flat = input_tensor->flat(); for (int i = 0; i < input_flat.size(); ++i) { diff --git a/tensorflow/core/kernels/cwise_op_clip.cc b/tensorflow/core/kernels/cwise_op_clip.cc index 14d889e8e3..49b90e855b 100644 --- a/tensorflow/core/kernels/cwise_op_clip.cc +++ b/tensorflow/core/kernels/cwise_op_clip.cc @@ -33,52 +33,41 @@ class ClipOp : public OpKernel { const Tensor& in0 = ctx->input(0); const Tensor& in1 = ctx->input(1); const Tensor& in2 = ctx->input(2); + OP_REQUIRES(ctx, (in0.shape() == in1.shape() || + TensorShapeUtils::IsScalar(in1.shape())) && + (in0.shape() == in2.shape() || + TensorShapeUtils::IsScalar(in2.shape())), + errors::InvalidArgument( + "clip_value_min and clip_value_max must be either of " + "the same shape as input, or a scalar. ", + "input shape: ", in0.shape().DebugString(), + "clip_value_min shape: ", in1.shape().DebugString(), + "clip_value_max shape: ", in2.shape().DebugString())); + + Tensor* out = nullptr; + OP_REQUIRES_OK( + ctx, ctx->forward_input_or_allocate_output({0}, 0, in0.shape(), &out)); + if (out->NumElements() == 0) return; // Nothing to do for empty output auto in0_flat = in0.flat(); auto in1_flat = in1.flat(); auto in2_flat = in2.flat(); + auto out_flat = out->flat(); const Device& d = ctx->eigen_device(); - Tensor* out = nullptr; - OP_REQUIRES_OK( - ctx, ctx->forward_input_or_allocate_output({0}, 0, in0.shape(), &out)); - auto out_flat = out->flat(); if (in1.shape() == in2.shape()) { if (in0.shape() == in1.shape()) { functor::TernaryClipOp()(d, in0_flat, in1_flat, in2_flat, out_flat); } else { - OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(in1.shape()), - errors::InvalidArgument( - "clip_value_min and clip_value_max must be either of " - "the same shape as input, or a scalar. ", - "input shape: ", in0.shape().DebugString(), - "clip_value_min shape: ", in1.shape().DebugString(), - "clip_value_max shape: ", in2.shape().DebugString())); functor::UnaryClipOp()(d, in0_flat, in1_flat, in2_flat, out_flat); } } else { if (in0.shape() == in1.shape()) { - OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(in2.shape()), - errors::InvalidArgument( - "clip_value_min and clip_value_max must be either of " - "the same shape as input, or a scalar. ", - "input shape: ", in0.shape().DebugString(), - "clip_value_min shape: ", in1.shape().DebugString(), - "clip_value_max shape: ", in2.shape().DebugString())); functor::BinaryLeftClipOp()(d, in0_flat, in1_flat, in2_flat, out_flat); } else { - OP_REQUIRES(ctx, - (in0.shape() == in2.shape() && - TensorShapeUtils::IsScalar(in1.shape())), - errors::InvalidArgument( - "clip_value_min and clip_value_max must be either of " - "the same shape as input, or a scalar. ", - "input shape: ", in0.shape().DebugString(), - "clip_value_min shape: ", in1.shape().DebugString(), - "clip_value_max shape: ", in2.shape().DebugString())); functor::BinaryRightClipOp()(d, in0_flat, in1_flat, in2_flat, out_flat); } diff --git a/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc b/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc index 9a3b2303a3..17a85d9773 100644 --- a/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc +++ b/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc @@ -57,6 +57,7 @@ struct DenseUpdate { template struct functor::DenseUpdate; \ template struct functor::DenseUpdate; TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS); +TF_CALL_int32(DEFINE_GPU_KERNELS); TF_CALL_int64(DEFINE_GPU_KERNELS); #undef DEFINE_GPU_KERNELS diff --git a/tensorflow/core/kernels/gather_functor.cc b/tensorflow/core/kernels/gather_functor.cc index e6fefe643b..5cd8e04927 100644 --- a/tensorflow/core/kernels/gather_functor.cc +++ b/tensorflow/core/kernels/gather_functor.cc @@ -37,6 +37,7 @@ namespace functor { DECLARE_GPU_SPECS_INDEX(T, int32); \ DECLARE_GPU_SPECS_INDEX(T, int64) +TF_CALL_int64(DECLARE_GPU_SPECS); TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS); TF_CALL_complex64(DECLARE_GPU_SPECS); TF_CALL_complex128(DECLARE_GPU_SPECS); diff --git a/tensorflow/core/kernels/gather_functor_gpu.cu.cc b/tensorflow/core/kernels/gather_functor_gpu.cu.cc index 39b6924d74..4563fc6353 100644 --- a/tensorflow/core/kernels/gather_functor_gpu.cu.cc +++ b/tensorflow/core/kernels/gather_functor_gpu.cu.cc @@ -31,6 +31,7 @@ typedef Eigen::GpuDevice GPUDevice; DEFINE_GPU_SPECS_INDEX(T, int32); \ DEFINE_GPU_SPECS_INDEX(T, int64); +TF_CALL_int64(DEFINE_GPU_SPECS); TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_SPECS); TF_CALL_complex64(DEFINE_GPU_SPECS); TF_CALL_complex128(DEFINE_GPU_SPECS); diff --git a/tensorflow/core/kernels/gather_nd_op.cc b/tensorflow/core/kernels/gather_nd_op.cc index 7e5a9e1ec5..4e53291b7f 100644 --- a/tensorflow/core/kernels/gather_nd_op.cc +++ b/tensorflow/core/kernels/gather_nd_op.cc @@ -228,6 +228,8 @@ namespace functor { DECLARE_GPU_SPECS_INDEX(T, int32); \ DECLARE_GPU_SPECS_INDEX(T, int64) +TF_CALL_int32(DECLARE_GPU_SPECS); +TF_CALL_int64(DECLARE_GPU_SPECS); TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS); TF_CALL_complex64(DECLARE_GPU_SPECS); TF_CALL_complex128(DECLARE_GPU_SPECS); @@ -239,6 +241,8 @@ TF_CALL_complex128(DECLARE_GPU_SPECS); // Registration of the GPU implementations. #define REGISTER_GATHER_ND_GPU(type) REGISTER_GATHER_ND_ALL_INDICES(GPU, type) +TF_CALL_int32(REGISTER_GATHER_ND_GPU); +TF_CALL_int64(REGISTER_GATHER_ND_GPU); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GATHER_ND_GPU); TF_CALL_complex64(REGISTER_GATHER_ND_GPU); TF_CALL_complex128(REGISTER_GATHER_ND_GPU); diff --git a/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc b/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc index b03efc684f..da8d2e9e3c 100644 --- a/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc +++ b/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc @@ -119,6 +119,8 @@ struct GatherNdSlice { DEFINE_GPU_SPECS_INDEX(T, int32); \ DEFINE_GPU_SPECS_INDEX(T, int64); +TF_CALL_int32(DEFINE_GPU_SPECS); +TF_CALL_int64(DEFINE_GPU_SPECS); TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_SPECS); TF_CALL_complex64(DEFINE_GPU_SPECS); TF_CALL_complex128(DEFINE_GPU_SPECS); diff --git a/tensorflow/core/kernels/gather_op.cc b/tensorflow/core/kernels/gather_op.cc index ef332ebee3..094504d6b9 100644 --- a/tensorflow/core/kernels/gather_op.cc +++ b/tensorflow/core/kernels/gather_op.cc @@ -153,6 +153,7 @@ TF_CALL_uint64(REGISTER_GATHER_CPU); // Registration of the GPU implementations. #define REGISTER_GATHER_GPU(type) REGISTER_GATHER_ALL_INDICES(GPU, type) +TF_CALL_int64(REGISTER_GATHER_GPU); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GATHER_GPU); TF_CALL_complex64(REGISTER_GATHER_GPU); TF_CALL_complex128(REGISTER_GATHER_GPU); diff --git a/tensorflow/core/kernels/mkl_concat_op.cc b/tensorflow/core/kernels/mkl_concat_op.cc index 5eeb23d810..31d1b949ef 100644 --- a/tensorflow/core/kernels/mkl_concat_op.cc +++ b/tensorflow/core/kernels/mkl_concat_op.cc @@ -14,6 +14,7 @@ limitations under the License. #include #include +#include #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/op_kernel.h" @@ -590,8 +591,8 @@ class MklConcatOp : public OpKernel { const int N = input_tensors.size(); // Get Tensor shapes. - std::vector input_shapes(N); - GetMklShapeList(context, "values", &input_shapes); + std::vector mkl_input_shapes(N); + GetMklShapeList(context, "values", &mkl_input_shapes); const Tensor& concat_dim_tensor = (AxisArgName == NAME_IS_CONCAT_DIM) ? MklGetInput(context, 0) @@ -610,19 +611,14 @@ class MklConcatOp : public OpKernel { int i = 0; bool invoke_eigen = false; bool are_all_mkl_inputs = true, are_all_tf_inputs = true; - const TensorShape expected_shape = input_shapes[0].IsMklTensor() - ? input_shapes[0].GetTfShape() - : input_tensors[0].shape(); + const TensorShape expected_shape = mkl_input_shapes[0].IsMklTensor() + ? mkl_input_shapes[0].GetTfShape() + : input_tensors[0].shape(); size_t expected_dims = expected_shape.dims(); if (concat_dim < 0) concat_dim = expected_dims + concat_dim; - for (auto& s : input_shapes) { - if (s == expected_shape) { - ++i; - continue; - } - + for (auto& s : mkl_input_shapes) { TensorShape s_shape = s.IsMklTensor() ? s.GetTfShape() : input_tensors[i].shape(); size_t s_dims = s_shape.dims(); @@ -665,21 +661,14 @@ class MklConcatOp : public OpKernel { // Call Eigen library if (invoke_eigen) { - TensorShapeList tf_input_shapes; - i = 0; - for (auto& s : input_shapes) { - TensorShape s_shape = - s.IsMklTensor() ? s.GetTfShape() : input_tensors[i].shape(); - tf_input_shapes.push_back(s_shape); - ++i; - } - CallEigenVersion(context, input_tensors, tf_input_shapes); + CallEigenVersion(context, input_tensors, mkl_input_shapes); return; } memory::dims dst_dims; + if (are_all_mkl_inputs) - dst_dims = TFShapeToMklDnnDims(input_shapes[0].GetTfShape()); + dst_dims = TFShapeToMklDnnDims(mkl_input_shapes[0].GetTfShape()); else // When all the inputs are in Tensorflow format, we don't know // what is the input data format. In that case, we just use @@ -689,26 +678,61 @@ class MklConcatOp : public OpKernel { std::vector srcs_pd; std::vector> srcs(N, MklDnnData(&cpu_engine)); int64 dst_concat_dim_size = 0; - for (int k = 0; k < N; k++) { - bool is_mkl_tensor = input_shapes[k].IsMklTensor(); - memory::dims src_dims; - - // Same comment as dst_dims for src_dims. - src_dims = (is_mkl_tensor) - ? TFShapeToMklDnnDims(input_shapes[k].GetTfShape()) - : TFShapeToMklDnnDims(input_tensors[k].shape()); - - dst_concat_dim_size += src_dims[concat_dim]; - auto src_md = - is_mkl_tensor ? input_shapes[k].GetMklLayout() : - // It does not matter what data format we use here - // (NHWC or NCHW). We just need to ensure that output - // of Concat uses same data format as input. - memory::desc(src_dims, MklDnnType(), memory::format::nchw); - - srcs[k].SetUsrMem(src_md, &input_tensors[k]); - auto src_mpd = srcs[k].GetUsrMemPrimDesc(); - srcs_pd.push_back(src_mpd); + + bool isMklReorderNeeded = false; + memory::format mkl_common_format = memory::format::any; + if (are_all_mkl_inputs) { + mkl_common_format = + FindMklCommonFormat(mkl_input_shapes, concat_dim, + &isMklReorderNeeded, &dst_concat_dim_size); + + if (!isMklReorderNeeded) { + // All MKL tensors have a same format. Reorder is not needed. + for (int k = 0; k < N; k++) { + if (input_tensors[k].NumElements() == 0) + continue; + + auto src_md = mkl_input_shapes[k].GetMklLayout(); + srcs[k].SetUsrMem(src_md, &input_tensors[k]); + auto src_mpd = srcs[k].GetUsrMemPrimDesc(); + srcs_pd.push_back(src_mpd); + } + } else { + // MKL tensors have different formats. + // Reorder them to most common format. + for (int k = 0; k < N; k++) { + if (input_tensors[k].NumElements() == 0) + continue; + + auto src_dims = TFShapeToMklDnnDims( + mkl_input_shapes[k].GetTfShape()); + auto src_md = mkl_input_shapes[k].GetMklLayout(); + srcs[k].SetUsrMem(src_md, &input_tensors[k]); + + if (src_md.data.format != mkl_common_format) + src_md = memory::desc(src_dims, MklDnnType(), + mkl_common_format); + + srcs_pd.push_back(memory::primitive_desc(src_md, cpu_engine)); + } + } + } else { // All TF inputs + for (int k = 0; k < N; k++) { + if (input_tensors[k].NumElements() == 0) + continue; + + memory::dims src_dims = TFShapeToMklDnnDims(input_tensors[k].shape()); + dst_concat_dim_size += src_dims[concat_dim]; + + // It does not matter what data format to be used (NHWC versus NCHW). + // We just need to ensure that output uses same data format as inputs. + auto src_md = + memory::desc(src_dims, MklDnnType(), memory::format::nchw); + + srcs[k].SetUsrMem(src_md, &input_tensors[k]); + auto src_mpd = srcs[k].GetUsrMemPrimDesc(); + srcs_pd.push_back(src_mpd); + } } dst_dims[concat_dim] = dst_concat_dim_size; @@ -718,25 +742,33 @@ class MklConcatOp : public OpKernel { if (are_all_mkl_inputs) { // Since we are passing a specific format for destination, // we need to have dst_dims in MklDnn order (NCHW). - auto orig_tf_format = input_shapes[0].GetTfDataFormat(); + auto orig_tf_format = mkl_input_shapes[0].GetTfDataFormat(); dst_dims_in_nchw = MklDnnDimsInNCHW( dst_dims, MklDnnDataFormatToTFDataFormat(orig_tf_format)); - // We will set the output in the same format as input to avoid layout - // conversions. - // Currently we are setting dst format same as input format. - // See if we can make this choice in a better way. + // Set the output format same as the most common format of inputs + // to avoid layout conversions. dst_md = memory::desc( - dst_dims_in_nchw, MklDnnType(), - (memory::format)input_shapes[0].GetMklLayout().data.format); + dst_dims_in_nchw, MklDnnType(), mkl_common_format); } else { - // Again, format does not matter here. We just need to make it same as - // input format. + // All inputs are TF tensors. + // Set the output format same as input format (nchw). dst_md = memory::desc(dst_dims, MklDnnType(), memory::format::nchw); } std::vector inputs; - for (int k = 0; k < input_tensors.size(); k++) - inputs.push_back(srcs[k].GetOpMem()); + std::vector net; + if (isMklReorderNeeded) { + for (int k = 0; k < input_tensors.size(); k++) { + if (input_tensors[k].NumElements() > 0) { + srcs[k].CheckReorderToOpMem(srcs_pd[k], &net); + } + } + } + for (int k = 0; k < input_tensors.size(); k++) { + if (input_tensors[k].NumElements() > 0) { + inputs.push_back(srcs[k].GetOpMem()); + } + } // If all inputs are in MKL format, then meaning of concat_dim needs to // change. Value of concat_dim is tied to input Tensorflow data format @@ -745,7 +777,8 @@ class MklConcatOp : public OpKernel { // But ifinput tensors are in NHWC order, then semantics need to change. // E.g., if we are concatinating over Channel (dimension 3 for NHWC), // then since MklDnn order is NCHW, concat_dim needs to be 1. - if (are_all_mkl_inputs) concat_dim = input_shapes[0].TfDimIdx(concat_dim); + if (are_all_mkl_inputs) + concat_dim = mkl_input_shapes[0].TfDimIdx(concat_dim); auto concat_pd = concat::primitive_desc(dst_md, concat_dim, srcs_pd); @@ -758,7 +791,7 @@ class MklConcatOp : public OpKernel { dnn_shape_dst.SetMklLayout(&dst_pd); dnn_shape_dst.SetElemType(MklDnnType()); dnn_shape_dst.SetTfLayout(dst_dims.size(), dst_dims_in_nchw, - input_shapes[0].GetTfDataFormat()); + mkl_input_shapes[0].GetTfDataFormat()); tf_shape_dst.AddDim((dst_pd.get_size() / sizeof(T))); } else { dnn_shape_dst.SetMklTensor(false); @@ -773,7 +806,6 @@ class MklConcatOp : public OpKernel { dst.SetUsrMem(dst_md, dst_tensor); auto concat_op = concat(concat_pd, inputs, dst.GetOpMem()); - std::vector net; net.push_back(concat_op); stream(stream::kind::eager).submit(net).wait(); } catch (mkldnn::error& e) { @@ -787,15 +819,27 @@ class MklConcatOp : public OpKernel { } void CallEigenVersion(OpKernelContext* context, const OpInputList& values, - const TensorShapeList& input_shapes) { - CHECK_EQ(values.size(), input_shapes.size()); + const MklDnnShapeList& mkl_input_shapes) { + CHECK_EQ(values.size(), mkl_input_shapes.size()); std::vector converted_values; - for (int i = 0; i < input_shapes.size(); i++) - converted_values.push_back(values[i]); + TensorShapeList tf_input_shapes; + for (int i = 0; i < mkl_input_shapes.size(); i++) { + if (mkl_input_shapes[i].IsMklTensor()) { + // do conversion from MKL to TF + Tensor tmp_tensor = + ConvertMklToTF(context, values[i], mkl_input_shapes[i]); + converted_values.push_back(tmp_tensor); + tf_input_shapes.push_back(mkl_input_shapes[i].GetTfShape()); + } else { + // no conversion since it is TF tensor already + converted_values.push_back(values[i]); + tf_input_shapes.push_back(values[i].shape()); + } + } // Call Eigen concat. - eigen_concat_op_.Compute(context, converted_values, input_shapes); + eigen_concat_op_.Compute(context, converted_values, tf_input_shapes); // Set output Mkl tensor for this op. MklDnnShape dnn_shape_output; @@ -812,6 +856,55 @@ class MklConcatOp : public OpKernel { output_tensor->flat().data(), output_tensor->flat().size() * sizeof(uint8)); } + + // This method finds the most commom format accross all MKL inputs + // Inputs: + // 1. input_shapes: shapes of input (MKL) tensors. + // 2. concat_dim: concat dimension. + // Outputs: + // 1. is_reorder_needed is set to true if inputs have difference formats + // It is set to false otherwise. + // 2. concat_dim_size is the size of concat_dim. + // Return: + // return the common MKL format. + memory::format FindMklCommonFormat(const MklDnnShapeList& input_shapes, + int concat_dim, bool* is_reorder_needed, int64* concat_dim_size) { + *is_reorder_needed = false; + *concat_dim_size = 0; + std::unordered_map occurrence_map; + if (input_shapes.size() == 0) + return memory::format::any; + + // Compute ocurrences of each format of all inputs. + for (int k=0; k ( + input_shapes[k].GetMklLayout().data.format); + occurrence_map[fmt] += 1; + } + + if (occurrence_map.size() == 1) { + // this means that all inputs have a same format + // return it with is_reorder_needed set false. + return static_cast( + input_shapes[0].GetMklLayout().data.format); + } + + // Input tensors have different formats. Thus, reorder is needed. + // We pick up the most common format to minimize the total + // number of input reorder. + memory::format commonest_format = memory::format::any; + int max_occurrence = 0; + *is_reorder_needed = true; + for (auto item : occurrence_map) { + if (item.second > max_occurrence) { + commonest_format = static_cast(item.first); + max_occurrence = item.second; + } + } + return commonest_format; + } }; #endif diff --git a/tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc index c1da0ded1d..f857be6c32 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc @@ -18,6 +18,7 @@ limitations under the License. // bias. #ifdef INTEL_MKL +#ifdef INTEL_MKL_ML #define USE_EIGEN_TENSOR #define EIGEN_USE_THREADS @@ -264,4 +265,5 @@ class MklConv2DCustomBackpropBiasOp : public OpKernel { TF_CALL_float(REGISTER_CPU_KERNELS); #undef REGISTER_CPU_KERNELS } /* namespace tensorflow */ +#endif /* INTEL_MKL_ML */ #endif /* INTEL_MKL */ diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.h b/tensorflow/core/kernels/mkl_pooling_ops_common.h index 279167aba2..c0dfed7d7d 100644 --- a/tensorflow/core/kernels/mkl_pooling_ops_common.h +++ b/tensorflow/core/kernels/mkl_pooling_ops_common.h @@ -199,13 +199,15 @@ class MklPoolingForwardOpBase : public MklPoolingOpBase { CHECK_NOTNULL(pool_params); CHECK_NOTNULL(dnn_data_input); TensorShape input_tensor_shape = input_tensor.shape(); - memory::desc input_md = + if (input_tensor.NumElements() != 0) { + memory::desc input_md = input_mkl_shape.IsMklTensor() ? input_mkl_shape.GetMklLayout() : memory::desc(TFShapeToMklDnnDimsInNCHW(input_tensor_shape, this->data_format_tf_), MklDnnType(), this->data_format_mkldnn_); - dnn_data_input->SetUsrMem(input_md, &input_tensor); + dnn_data_input->SetUsrMem(input_md, &input_tensor); + } this->InitMklPoolParameters(context, pool_params, input_mkl_shape, input_tensor_shape); } diff --git a/tensorflow/core/kernels/scatter_nd_op.cc b/tensorflow/core/kernels/scatter_nd_op.cc index 43c5b29509..e1fc2ea128 100644 --- a/tensorflow/core/kernels/scatter_nd_op.cc +++ b/tensorflow/core/kernels/scatter_nd_op.cc @@ -292,6 +292,7 @@ TF_CALL_string(REGISTER_SCATTER_ND_CPU); REGISTER_SCATTER_ND_UPDATE_GPU(type); \ REGISTER_SCATTER_ND_GPU(type); +TF_CALL_int32(REGISTER_SCATTER_ND_ALL_GPU); // TODO(b/66916790): Support half types in ScatterNd. TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ND_ALL_GPU); TF_CALL_complex64(REGISTER_SCATTER_ND_ALL_GPU); @@ -306,6 +307,8 @@ TF_CALL_complex128(REGISTER_SCATTER_ND_ALL_GPU); #define REGISTER_SCATTER_ND_UPDATE_SYCL(type) \ REGISTER_SCATTER_ND_UPDATE(type, SYCL); +TF_CALL_int32(REGISTER_SCATTER_ND_ADD_SUB_SYCL); +TF_CALL_int32(REGISTER_SCATTER_ND_UPDATE_SYCL); TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ND_ADD_SUB_SYCL); TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ND_UPDATE_SYCL); #undef REGISTER_SCATTER_ND_ADD_SUB_SYCL @@ -576,6 +579,7 @@ namespace functor { DECLARE_GPU_SPECS_INDEX(T, int32); \ DECLARE_GPU_SPECS_INDEX(T, int64) +TF_CALL_int32(DECLARE_GPU_SPECS); // TODO(b/66916790): Support half types in ScatterNd. TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS); TF_CALL_complex64(DECLARE_GPU_SPECS); diff --git a/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc b/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc index a3c21edc15..08b657f4c3 100644 --- a/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc +++ b/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc @@ -170,6 +170,7 @@ struct ScatterNdFunctor { DECLARE_GPU_SPECS_INDEX(T, int32); \ DECLARE_GPU_SPECS_INDEX(T, int64) +TF_CALL_int32(DECLARE_GPU_SPECS); TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS); TF_CALL_complex64(DECLARE_GPU_SPECS); TF_CALL_complex128(DECLARE_GPU_SPECS); diff --git a/tensorflow/core/kernels/scoped_allocator_ops_test.cc b/tensorflow/core/kernels/scoped_allocator_ops_test.cc index bb0129fa6f..634f9ba887 100644 --- a/tensorflow/core/kernels/scoped_allocator_ops_test.cc +++ b/tensorflow/core/kernels/scoped_allocator_ops_test.cc @@ -216,8 +216,13 @@ TEST_F(ScopedAllocatorConcatOpTest, Success3) { } TEST_F(ScopedAllocatorConcatOpTest, Reshape) { - MakeOp({2, 2, 2}, DT_DOUBLE, true, "test", 120, 2); - ExecOp(DT_DOUBLE, 120, {{2, 2}, {2, 2}}); + MakeOp({2, 2, 4}, DT_DOUBLE, true, "test", 120, 2); + + // The elements of the third parameter to ExecOp must be multiples of + // Allocator::kAllocatorAlignment in size. If they are not, the backing + // tensor allocated by PrepOp will have too many elements and reshaping + // will fail. + ExecOp(DT_DOUBLE, 120, {{2, 4}, {2, 4}}); } TEST_F(ScopedAllocatorConcatOpTest, NoReshapeAttr) { diff --git a/tensorflow/core/kernels/segment_reduction_ops.h b/tensorflow/core/kernels/segment_reduction_ops.h index 7796bf3587..d65692a552 100644 --- a/tensorflow/core/kernels/segment_reduction_ops.h +++ b/tensorflow/core/kernels/segment_reduction_ops.h @@ -16,6 +16,14 @@ limitations under the License. #ifndef TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ #define TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ + +// This file requires the following include because it uses CudaAtomicMax: +// #include "tensorflow/core/util/cuda_kernel_helper.h" + +// Unfortunately we can't add the #include, since it breaks compilation for +// non-GPU targets. This only breaks in clang, because it's more strict for +// template code and CudaAtomicMax is used in template context. + // This file requires the following include because it uses CudaAtomicMax: // #include "tensorflow/core/util/cuda_kernel_helper.h" @@ -130,4 +138,4 @@ struct Highest { } // namespace functor } // namespace tensorflow -#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ +#endif // TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ diff --git a/tensorflow/core/kernels/sparse_matmul_op.cc b/tensorflow/core/kernels/sparse_matmul_op.cc index a1f9667b78..866c5dcd52 100644 --- a/tensorflow/core/kernels/sparse_matmul_op.cc +++ b/tensorflow/core/kernels/sparse_matmul_op.cc @@ -1490,7 +1490,7 @@ inline void LibxsmmSparseMatMul::Compute( #endif // TENSORFLOW_USE_LIBXSMM -// Here is a an overview of the SparseMatMul code. Note that we assume that the +// Here is an overview of the SparseMatMul code. Note that we assume that the // left matrix is sparse. // // The matrix "left" is divided into a grid with blocksize of (M, KL). Each diff --git a/tensorflow/core/kernels/string_split_op.cc b/tensorflow/core/kernels/string_split_op.cc index 4c2b312c34..26ab72f12e 100644 --- a/tensorflow/core/kernels/string_split_op.cc +++ b/tensorflow/core/kernels/string_split_op.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/strings/str_util.h" namespace tensorflow { @@ -43,6 +44,63 @@ std::vector Split(const string& str, const string& delimiter, return char_vector; } +std::vector SplitV2(const string& str, StringPiece sep, int maxsplit) { + // This SplitV2 method matches the behavior of python's str.split: + // If sep is given, consecutive delimiters are not grouped together + // and are deemed to delimit empty strings (for example, '1,,2'.split(',') + // returns ['1', '', '2']). The sep argument may consist of multiple + // characters (for example, '1<>2<>3'.split('<>') returns ['1', '2', '3']). + // Splitting an empty string with a specified separator returns ['']. + // + // If sep is not specified or is None, a different splitting algorithm is + // applied: runs of consecutive whitespace are regarded as a single + // separator, and the result will contain no empty strings at the start or + // end if the string has leading or trailing whitespace. Consequently, + // splitting an empty string or a string consisting of just whitespace + // with a None separator returns []. + + std::vector result; + + StringPiece text(str); + if (maxsplit == 0) { + result.emplace_back(std::string(text)); + return result; + } + + if (sep.empty()) { + StringPiece token; + // Remove leading whitespaces. + str_util::RemoveLeadingWhitespace(&text); + int split = 0; + while (str_util::ConsumeNonWhitespace(&text, &token)) { + result.emplace_back(std::string(token)); + str_util::RemoveLeadingWhitespace(&text); + ++split; + if (maxsplit > 0 && split == maxsplit) { + result.emplace_back(std::string(text)); + return result; + } + } + return result; + } + auto p = std::search(text.begin(), text.end(), sep.begin(), sep.end()); + int split = 0; + while (p != text.end()) { + StringPiece token = text.substr(0, p - text.begin()); + result.emplace_back(std::string(token)); + text.remove_prefix(token.size()); + text.remove_prefix(sep.size()); + ++split; + if (maxsplit > 0 && split == maxsplit) { + result.emplace_back(std::string(text)); + return result; + } + p = std::search(text.begin(), text.end(), sep.begin(), sep.end()); + } + result.emplace_back(std::string(text)); + return result; +} + } // namespace class StringSplitOp : public OpKernel { @@ -122,6 +180,78 @@ class StringSplitOp : public OpKernel { bool skip_empty_; }; +class StringSplitV2Op : public OpKernel { + public: + explicit StringSplitV2Op(OpKernelConstruction* context) + : OpKernel(context), maxsplit_(-1) { + OP_REQUIRES_OK(context, context->GetAttr("maxsplit", &maxsplit_)); + } + + void Compute(OpKernelContext* ctx) override { + const Tensor* input_tensor; + OP_REQUIRES_OK(ctx, ctx->input("input", &input_tensor)); + OP_REQUIRES(ctx, TensorShapeUtils::IsVector(input_tensor->shape()), + errors::InvalidArgument("input must be a vector, got shape: ", + input_tensor->shape().DebugString())); + + const auto input_vec = input_tensor->vec(); + const int64 batch_size = input_vec.dimension(0); + + const Tensor* sep_tensor; + OP_REQUIRES_OK(ctx, ctx->input("sep", &sep_tensor)); + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(sep_tensor->shape()), + errors::InvalidArgument("sep must be a scalar, got shape: ", + sep_tensor->shape().DebugString())); + const auto sep_vec = sep_tensor->flat(); + StringPiece sep(sep_vec(0)); + std::vector tokens; + // Guess that we'll be unpacking a handful of tokens per example. + static constexpr int kReserveSize = 4; + tokens.reserve(batch_size * kReserveSize); + + int64 output_size = 0; + int64 max_num_entries = 0; + std::vector num_indices(batch_size); + for (int64 i = 0; i < batch_size; ++i) { + std::vector parts = SplitV2(input_vec(i), sep, maxsplit_); + int64 n_entries = parts.size(); + num_indices[i] = n_entries; + output_size += n_entries; + max_num_entries = std::max(max_num_entries, n_entries); + tokens.insert(tokens.end(), parts.begin(), parts.end()); + } + + Tensor* sp_indices_t; + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({output_size, 2}), + &sp_indices_t)); + Tensor* sp_tokens_t; + OP_REQUIRES_OK( + ctx, ctx->allocate_output(1, TensorShape({output_size}), &sp_tokens_t)); + Tensor* sp_shape_t; + OP_REQUIRES_OK(ctx, ctx->allocate_output(2, TensorShape({2}), &sp_shape_t)); + + auto sp_indices = sp_indices_t->matrix(); + auto sp_tokens = sp_tokens_t->vec(); + auto sp_shape = sp_shape_t->vec(); + sp_shape(0) = batch_size; + sp_shape(1) = max_num_entries; + size_t c = 0; + for (size_t i = 0; i < batch_size; ++i) { + for (size_t j = 0; j < num_indices[i]; ++j) { + sp_indices(c, 0) = i; + sp_indices(c, 1) = j; + sp_tokens(c) = tokens[c]; + ++c; + } + } + } + + private: + int maxsplit_; +}; + REGISTER_KERNEL_BUILDER(Name("StringSplit").Device(DEVICE_CPU), StringSplitOp); +REGISTER_KERNEL_BUILDER(Name("StringSplitV2").Device(DEVICE_CPU), + StringSplitV2Op); } // namespace tensorflow diff --git a/tensorflow/core/ops/candidate_sampling_ops.cc b/tensorflow/core/ops/candidate_sampling_ops.cc index 6e4d100b04..6e589c8d1c 100644 --- a/tensorflow/core/ops/candidate_sampling_ops.cc +++ b/tensorflow/core/ops/candidate_sampling_ops.cc @@ -145,12 +145,15 @@ REGISTER_OP("ComputeAccidentalHits") int64 num_true; TF_RETURN_IF_ERROR(c->GetAttr("num_true", &num_true)); - // Validate true_classes. + // Validate true_classes, must be a matrix. ShapeHandle true_classes; TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &true_classes)); DimensionHandle unused; TF_RETURN_IF_ERROR( c->WithValue(c->Dim(true_classes, 1), num_true, &unused)); + // Validate sampled_candidates, must be a vector. + ShapeHandle sampled_candidates; + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &sampled_candidates)); // All three outputs are the same shape. ShapeHandle v = c->Vector(InferenceContext::kUnknownDim); diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc index 15e0ca8af9..9dca5f53ce 100644 --- a/tensorflow/core/ops/dataset_ops.cc +++ b/tensorflow/core/ops/dataset_ops.cc @@ -218,7 +218,17 @@ REGISTER_OP("MapAndBatchDataset") .Attr("Targuments: list(type) >= 0") .Attr("output_types: list(type) >= 1") .Attr("output_shapes: list(shape) >= 1") - .SetShapeFn(shape_inference::ScalarShape); + .SetShapeFn([](shape_inference::InferenceContext* c) { + // Use index from the end to retrieve the Input shapes, + // so that to avoid guessing the length of "other_arguments". + // batch_size, num_parallel_batches, and drop_remainder are 0-D scalars. + shape_inference::ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(c->num_inputs() - 3), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(c->num_inputs() - 2), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(c->num_inputs() - 1), 0, &unused)); + + return shape_inference::ScalarShape(c); + }); REGISTER_OP("MapAndBatchDatasetV2") .Input("input_dataset: variant") @@ -231,7 +241,17 @@ REGISTER_OP("MapAndBatchDatasetV2") .Attr("Targuments: list(type) >= 0") .Attr("output_types: list(type) >= 1") .Attr("output_shapes: list(shape) >= 1") - .SetShapeFn(shape_inference::ScalarShape); + .SetShapeFn([](shape_inference::InferenceContext* c) { + // Use index from the end to retrieve the Input shapes, + // so that to avoid guessing the length of "other_arguments". + // batch_size, num_parallel_calls, and drop_remainder are 0-D scalars. + shape_inference::ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(c->num_inputs() - 3), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(c->num_inputs() - 2), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(c->num_inputs() - 1), 0, &unused)); + + return shape_inference::ScalarShape(c); + }); REGISTER_OP("PrefetchDataset") .Input("input_dataset: variant") diff --git a/tensorflow/core/ops/image_ops.cc b/tensorflow/core/ops/image_ops.cc index d949e70c66..87f4991134 100644 --- a/tensorflow/core/ops/image_ops.cc +++ b/tensorflow/core/ops/image_ops.cc @@ -454,7 +454,9 @@ REGISTER_OP("DrawBoundingBoxes") DimensionHandle unused; TF_RETURN_IF_ERROR(c->WithValue(c->Dim(boxes, 2), 4, &unused)); - return shape_inference::UnchangedShapeWithRankAtLeast(c, 3); + // The rank of the input image (rank = 4) has already been restricted + // above, and the output is of the same shape as the input. + return shape_inference::UnchangedShape(c); }); // -------------------------------------------------------------------------- diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index 1740fa152c..b3487122e2 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -1084,7 +1084,7 @@ REGISTER_OP("UnsortedSegmentProd") .Input("segment_ids: Tindices") .Input("num_segments: Tnumsegments") .Output("output: T") - .Attr("T: realnumbertype") + .Attr("T: numbertype") .Attr("Tindices: {int32,int64}") .Attr("Tnumsegments: {int32,int64} = DT_INT32") .SetShapeFn(UnsortedSegmentReductionShapeFn); diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index fc60e807b9..41efa49ce3 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -1453,6 +1453,7 @@ REGISTER_OP("QuantizedReluX") ShapeHandle unused; TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); c->set_output(1, c->Scalar()); c->set_output(2, c->Scalar()); return Status::OK(); diff --git a/tensorflow/core/ops/string_ops.cc b/tensorflow/core/ops/string_ops.cc index 1d5c743a56..4423062362 100644 --- a/tensorflow/core/ops/string_ops.cc +++ b/tensorflow/core/ops/string_ops.cc @@ -78,7 +78,7 @@ REGISTER_OP("ReduceJoin") REGISTER_OP("AsString") .Input("input: T") .Output("output: string") - .Attr("T: {int32, int64, complex64, float, double, bool, int8}") + .Attr("T: {int8, int16, int32, int64, complex64, float, double, bool}") .Attr("precision: int = -1") .Attr("scientific: bool = false") .Attr("shortest: bool = false") @@ -134,6 +134,24 @@ REGISTER_OP("StringSplit") return Status::OK(); }); +REGISTER_OP("StringSplitV2") + .Input("input: string") + .Input("sep: string") + .Output("indices: int64") + .Output("values: string") + .Output("shape: int64") + .Attr("maxsplit: int = -1") + .SetShapeFn([](InferenceContext* c) { + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); + + c->set_output(0, c->Matrix(InferenceContext::kUnknownDim, 2)); + c->set_output(1, c->Vector(InferenceContext::kUnknownDim)); + c->set_output(2, c->Vector(2)); + return Status::OK(); + }); + REGISTER_OP("StringStrip") .Input("input: string") .Output("output: string") diff --git a/tensorflow/core/platform/cpu_info.cc b/tensorflow/core/platform/cpu_info.cc index 99de364042..e9da3d8e32 100644 --- a/tensorflow/core/platform/cpu_info.cc +++ b/tensorflow/core/platform/cpu_info.cc @@ -344,5 +344,28 @@ int CPUModelNum() { #endif } +int CPUIDNumSMT() { +#ifdef PLATFORM_IS_X86 + // https://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration + // https://software.intel.com/en-us/articles/intel-sdm (Vol 3A) + // Section: Detecting Hardware Multi-threads Support and Topology + // Uses CPUID Leaf 11 to enumerate system topology on Intel x86 architectures + // Other cases not supported + uint32 eax, ebx, ecx, edx; + // Check if system supports Leaf 11 + GETCPUID(eax, ebx, ecx, edx, 0, 0); + if (eax >= 11) { + // 1) Leaf 11 available? CPUID.(EAX=11, ECX=0):EBX != 0 + // 2) SMT_Mask_Width = CPUID.(EAX=11, ECX=0):EAX[4:0] if CPUID.(EAX=11, + // ECX=0):ECX[15:8] is 1 + GETCPUID(eax, ebx, ecx, edx, 11, 0); + if (ebx != 0 && ((ecx & 0xff00) >> 8) == 1) { + return 1 << (eax & 0x1f); // 2 ^ SMT_Mask_Width + } + } +#endif // PLATFORM_IS_X86 + return 0; +} + } // namespace port } // namespace tensorflow diff --git a/tensorflow/core/platform/cpu_info.h b/tensorflow/core/platform/cpu_info.h index b5be7e8b54..175c9ae8b1 100644 --- a/tensorflow/core/platform/cpu_info.h +++ b/tensorflow/core/platform/cpu_info.h @@ -35,6 +35,10 @@ namespace port { // software can change it dynamically. int NumSchedulableCPUs(); +// Returns an estimate of the number of hyperthreads per physical core +// on the CPU +int NumHyperthreadsPerCore(); + // Mostly ISA related features that we care about enum CPUFeature { // Do not change numeric assignments. @@ -107,6 +111,9 @@ int CPUModelNum(); // Returns nominal core processor cycles per second of each processor. double NominalCPUFrequency(); +// Returns num of hyperthreads per physical core +int CPUIDNumSMT(); + } // namespace port } // namespace tensorflow diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl index ae81f9b5b3..a319ccbdbe 100644 --- a/tensorflow/core/platform/default/build_config.bzl +++ b/tensorflow/core/platform/default/build_config.bzl @@ -71,6 +71,8 @@ def pyx_library( name = filename + "_cython_translation", srcs = [filename], outs = [filename.split(".")[0] + ".cpp"], + # Optionally use PYTHON_BIN_PATH on Linux platforms so that python 3 + # works. Windows has issues with cython_binary so skip PYTHON_BIN_PATH. cmd = "PYTHONHASHSEED=0 $(location @cython//:cython_binary) --cplus $(SRCS) --output-file $(OUTS)", tools = ["@cython//:cython_binary"] + pxd_srcs, ) diff --git a/tensorflow/core/platform/hadoop/hadoop_file_system.cc b/tensorflow/core/platform/hadoop/hadoop_file_system.cc index 72c12318ca..ff4b4436bb 100644 --- a/tensorflow/core/platform/hadoop/hadoop_file_system.cc +++ b/tensorflow/core/platform/hadoop/hadoop_file_system.cc @@ -115,18 +115,17 @@ class LibHDFS { const char* kLibHdfsDso = "libhdfs.so"; #endif char* hdfs_home = getenv("HADOOP_HDFS_HOME"); - if (hdfs_home == nullptr) { - status_ = errors::FailedPrecondition( - "Environment variable HADOOP_HDFS_HOME not set"); - return; - } - string path = io::JoinPath(hdfs_home, "lib", "native", kLibHdfsDso); - status_ = TryLoadAndBind(path.c_str(), &handle_); - if (!status_.ok()) { - // try load libhdfs.so using dynamic loader's search path in case - // libhdfs.so is installed in non-standard location - status_ = TryLoadAndBind(kLibHdfsDso, &handle_); + if (hdfs_home != nullptr) { + string path = io::JoinPath(hdfs_home, "lib", "native", kLibHdfsDso); + status_ = TryLoadAndBind(path.c_str(), &handle_); + if (status_.ok()) { + return; + } } + + // Try to load the library dynamically in case it has been installed + // to a in non-standard location. + status_ = TryLoadAndBind(kLibHdfsDso, &handle_); } Status status_; diff --git a/tensorflow/core/platform/posix/port.cc b/tensorflow/core/platform/posix/port.cc index 8e316472fe..708f32ba80 100644 --- a/tensorflow/core/platform/posix/port.cc +++ b/tensorflow/core/platform/posix/port.cc @@ -74,6 +74,11 @@ int NumSchedulableCPUs() { return kDefaultCores; } +int NumHyperthreadsPerCore() { + static const int ht_per_core = tensorflow::port::CPUIDNumSMT(); + return (ht_per_core > 0) ? ht_per_core : 1; +} + void* AlignedMalloc(size_t size, int minimum_alignment) { #if defined(__ANDROID__) return memalign(minimum_alignment, size); diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 522a9d84fd..cb1fd09dbb 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -19,12 +19,12 @@ limitations under the License. // TensorFlow uses semantic versioning, see http://semver.org/. #define TF_MAJOR_VERSION 1 -#define TF_MINOR_VERSION 8 +#define TF_MINOR_VERSION 9 #define TF_PATCH_VERSION 0 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1", // "-beta", "-rc", "-rc.1") -#define TF_VERSION_SUFFIX "" +#define TF_VERSION_SUFFIX "-rc0" #define TF_STR_HELPER(x) #x #define TF_STR(x) TF_STR_HELPER(x) diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index dffc965b14..90b6533690 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -42,6 +42,7 @@ limitations under the License. #ifndef INTEL_MKL_ML #include "mkldnn.hpp" +#include "tensorflow/core/lib/core/stringpiece.h" using mkldnn::engine; using mkldnn::memory; @@ -712,15 +713,48 @@ inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor, return output_tensor; } #else +using mkldnn::stream; +template class MklDnnData; + template inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor, const MklDnnShape& mkl_shape) { Tensor output_tensor; - TensorShape output_shape; - - TF_CHECK_OK( - Status(error::Code::UNIMPLEMENTED, "Unimplemented conversion function")); - + try { + if (!mkl_shape.IsMklTensor()) + return mkl_tensor; // return input since it is already TF tensor + + TensorShape output_shape = mkl_shape.GetTfShape();; + + // Allocate output tensor. + context->allocate_temp(DataTypeToEnum::v(), + output_shape, &output_tensor); + + auto cpu_engine = engine(engine::cpu, 0); + MklDnnData input(&cpu_engine); + + // Get Mkl layout of input tensor. + auto input_mkl_md = mkl_shape.GetMklLayout(); + auto output_tf_md = mkl_shape.GetTfLayout(); + auto output_tf_pd = memory::primitive_desc(output_tf_md, cpu_engine); + input.SetUsrMem(input_mkl_md, &mkl_tensor); + + // reorder + if (input.IsReorderNeeded(output_tf_pd)) { + std::vector net; + CHECK_EQ(input.CheckReorderToOpMem(output_tf_pd, &output_tensor, &net), + true); + stream(stream::kind::eager).submit(net).wait(); + } else { + // If not, just forward input tensor to output tensor. + CHECK(output_tensor.CopyFrom(mkl_tensor, output_shape)); + } + } catch (mkldnn::error& e) { + string error_msg = "Status: " + std::to_string(e.status) + + ", message: " + string(e.message) + ", in file " + + string(__FILE__) + ":" + std::to_string(__LINE__); + LOG(FATAL) << "Operation received an exception: " << error_msg; + } return output_tensor; } #endif @@ -1843,7 +1877,7 @@ class FactoryKeyCreator { template void AddAsKey(const T data) { auto buffer = reinterpret_cast(&data); - Append(absl::string_view(buffer, sizeof(T))); + Append(StringPiece(buffer, sizeof(T))); } std::string GetKey() { @@ -1854,8 +1888,8 @@ class FactoryKeyCreator { string key_; const char delimiter = 'x'; const int kMaxKeyLength = 256; - void Append(absl::string_view s) { - key_.append(string(s)); + void Append(StringPiece s) { + key_.append(s.ToString()); key_.append(1, delimiter); } }; diff --git a/tensorflow/docs_src/community/groups.md b/tensorflow/docs_src/community/groups.md index d92f5775fa..0b07d413da 100644 --- a/tensorflow/docs_src/community/groups.md +++ b/tensorflow/docs_src/community/groups.md @@ -1,17 +1,38 @@ # User Groups -TensorFlow has communities around the world. +TensorFlow has communities around the world. [Submit your community!](https://docs.google.com/forms/d/e/1FAIpQLSc_RQIUYtVgLLihzATaO_WUXkEyBDE_OoRoOXYDPmBEvHuEBA/viewform) ## Asia -* [TensorFlow Korea (TF-KR) User Group](https://www.facebook.com/groups/TensorFlowKR/) _(Korean language)_ -* [TensorFlow User Group Tokyo](https://tfug-tokyo.connpass.com/) _(Japanese Language)_ -* [Soleil Data Dojo](https://soleildatadojo.connpass.com/) _(Japanese language)_ +* [TensorFlow China community](https://www.tensorflowers.cn) +* [TensorFlow Korea (TF-KR) User Group](https://www.facebook.com/groups/TensorFlowKR/) +* [TensorFlow User Group Tokyo](https://tfug-tokyo.connpass.com/) +* [Soleil Data Dojo](https://soleildatadojo.connpass.com/) * [TensorFlow User Group Utsunomiya](https://tfug-utsunomiya.connpass.com/) +* [TensorFlow Philippines Community](https://www.facebook.com/groups/TensorFlowPH/) +* [TensorFlow and Deep Learning Singapore](https://www.meetup.com/TensorFlow-and-Deep-Learning-Singapore/) +* [TensorFlow India](https://www.facebook.com/tensorflowindia) ## Europe * [TensorFlow Barcelona](https://www.meetup.com/Barcelona-Machine-Learning-Meetup/) * [TensorFlow Madrid](https://www.meetup.com/TensorFlow-Madrid/) +* [Tensorflow Belgium](https://www.meetup.com/TensorFlow-Belgium) +* [TensorFlow x Rome Meetup](https://www.meetup.com/it-IT/TensorFlow-x-Rome-Meetup) +* [TensorFlow London](https://www.meetup.com/TensorFlow-London/) +* [TensorFlow Edinburgh](https://www.meetup.com/tensorflow-edinburgh/) + +## America + +* [TensorFlow Buenos Aires](https://www.meetup.com/TensorFlow-Buenos-Aires/) + + +## Oceania +* [Melbourne TensorFlow Meetup](https://www.meetup.com/Melbourne-TensorFlow-Meetup) + + +## Africa + +* [TensorFlow Tunis Meetup](https://www.meetup.com/fr-FR/TensorFlow-Tunis-Meetup/) diff --git a/tensorflow/docs_src/get_started/eager.md b/tensorflow/docs_src/get_started/eager.md index f08ac74425..bbb25e20c6 100644 --- a/tensorflow/docs_src/get_started/eager.md +++ b/tensorflow/docs_src/get_started/eager.md @@ -1,3 +1,3 @@ # Get Started with Eager Execution -[Colab notebook](https://colab.research.google.com/github/tensorflow/models/blob/r1.8.0/samples/core/get_started/eager.ipynb) +[Colab notebook](https://colab.research.google.com/github/tensorflow/models/blob/r1.9.0/samples/core/get_started/eager.ipynb) diff --git a/tensorflow/docs_src/get_started/index.md b/tensorflow/docs_src/get_started/index.md index 55579d52fb..232d2f1547 100644 --- a/tensorflow/docs_src/get_started/index.md +++ b/tensorflow/docs_src/get_started/index.md @@ -10,9 +10,9 @@ course prior to diving into TensorFlow documentation: TensorFlow is a tool for machine learning. While it contains a wide range of functionality, TensorFlow is mainly designed for deep neural network models. -The easiest way to get started with TensorFlow is using Eager Execution. +The easiest way to get started with TensorFlow is by using Eager Execution. - * @{$get_started/eager}, is for anyone new to machine learning or TensorFlow. + * @{$get_started/eager}, is for anyone new to machine learning or TensorFlow. TensorFlow provides many APIs. The remainder of this section focuses on the Estimator API which provide scalable, high-performance models. See the diff --git a/tensorflow/docs_src/install/install_c.md b/tensorflow/docs_src/install/install_c.md index 1abd840ab3..2901848745 100644 --- a/tensorflow/docs_src/install/install_c.md +++ b/tensorflow/docs_src/install/install_c.md @@ -38,7 +38,7 @@ enable TensorFlow for C: OS="linux" # Change to "darwin" for macOS TARGET_DIRECTORY="/usr/local" curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.8.0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.9.0-rc0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md index 52a2a3f8a6..55bc0f64e7 100644 --- a/tensorflow/docs_src/install/install_go.md +++ b/tensorflow/docs_src/install/install_go.md @@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go: TF_TYPE="cpu" # Change to "gpu" for GPU support TARGET_DIRECTORY='/usr/local' curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.8.0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.9.0-rc0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md index 1256fb99c4..637231da12 100644 --- a/tensorflow/docs_src/install/install_java.md +++ b/tensorflow/docs_src/install/install_java.md @@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs: org.tensorflow tensorflow - 1.8.0 + 1.9.0-rc0 ``` @@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow: org.tensorflow tensorflow - 1.8.0 + 1.9.0-rc0 @@ -124,12 +124,12 @@ instead: org.tensorflow libtensorflow - 1.8.0 + 1.9.0-rc0 org.tensorflow libtensorflow_jni_gpu - 1.8.0 + 1.9.0-rc0 ``` @@ -148,7 +148,7 @@ refer to the simpler instructions above instead. Take the following steps to install TensorFlow for Java on Linux or macOS: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.9.0-rc0.jar), which is the TensorFlow Java Archive (JAR). 2. Decide whether you will run TensorFlow for Java on CPU(s) only or with @@ -167,7 +167,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: OS=$(uname -s | tr '[:upper:]' '[:lower:]') mkdir -p ./jni curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.8.0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.9.0-rc0.tar.gz" | tar -xz -C ./jni ### Install on Windows @@ -175,13 +175,13 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: Take the following steps to install TensorFlow for Java on Windows: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.9.0-rc0.jar), which is the TensorFlow Java Archive (JAR). 2. Download the following Java Native Interface (JNI) file appropriate for - [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.8.0.zip). + [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.9.0-rc0.zip). 3. Extract this .zip file. - +__Note__: The native library (`tensorflow_jni.dll`) requires `msvcp140.dll` at runtime, which is included in the [Visual C++ 2015 Redistributable](https://www.microsoft.com/en-us/download/details.aspx?id=48145) package. ### Validate the installation @@ -227,7 +227,7 @@ must be part of your `classpath`. For example, you can include the downloaded `.jar` in your `classpath` by using the `-cp` compilation flag as follows: -
javac -cp libtensorflow-1.8.0.jar HelloTF.java
+
javac -cp libtensorflow-1.9.0-rc0.jar HelloTF.java
### Running @@ -241,11 +241,11 @@ two files are available to the JVM: For example, the following command line executes the `HelloTF` program on Linux and macOS X: -
java -cp libtensorflow-1.8.0.jar:. -Djava.library.path=./jni HelloTF
+
java -cp libtensorflow-1.9.0-rc0.jar:. -Djava.library.path=./jni HelloTF
And the following command line executes the `HelloTF` program on Windows: -
java -cp libtensorflow-1.8.0.jar;. -Djava.library.path=jni HelloTF
+
java -cp libtensorflow-1.9.0-rc0.jar;. -Djava.library.path=jni HelloTF
If the program prints Hello from version, you've successfully installed TensorFlow for Java and are ready to use the API. If the program diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index 0ed8160027..c8d706cf3c 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -339,9 +339,7 @@ Docker will download the TensorFlow binary image the first time you launch it. #### GPU support -Prior to installing TensorFlow with GPU support, ensure that your system meets all -[NVIDIA software requirements](#NVIDIARequirements). To launch a Docker container -with NVidia GPU support, enter a command of the following format: +To launch a Docker container with NVidia GPU support, enter a command of the following format (this [does not require any local CUDA installation](https://github.com/nvidia/nvidia-docker/wiki/CUDA#requirements)):
 $ nvidia-docker run -it -p hostPort:containerPort TensorFlowGPUImage
@@ -438,7 +436,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
 
      
      (tensorflow)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc0-cp34-cp34m-linux_x86_64.whl
## Validate your installation @@ -517,7 +515,7 @@ on your system: from source. To use the TensorFlow binaries, version 3.5 or higher is required. See the [NVIDIA documentation](https://developer.nvidia.com/cuda-gpus) for a list of supported GPU cards. -* [GPU drivers](http://nvidia.com/driver) that support your version of the CUDA +* [GPU drivers](http://nvidia.com/drivers) that support your version of the CUDA Toolkit. * The `libcupti-dev` library is the NVIDIA CUDA Profile Tools Interface. This library provides advanced profiling support. To install this library, @@ -684,14 +682,14 @@ This section documents the relevant values for Linux installations. CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc0-cp27-none-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.9.0rc0-cp27-none-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -703,14 +701,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc0-cp34-cp34m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.9.0rc0-cp34-cp34m-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -722,14 +720,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc0-cp35-cp35m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.9.0rc0-cp35-cp35m-linux_x86_64.whl
 
@@ -741,14 +739,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc0-cp36-cp36m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.9.0rc0-cp36-cp36m-linux_x86_64.whl
 
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index 29a867a9e3..9d01271c5a 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -119,7 +119,7 @@ Take the following steps to install TensorFlow with Virtualenv: TensorFlow in the active Virtualenv is as follows:
 $ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py3-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py3-none-any.whl If you encounter installation problems, see [Common Installation Problems](#common-installation-problems). @@ -242,7 +242,7 @@ take the following steps: issue the following command:
 $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py3-none-any.whl 
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py3-none-any.whl If the preceding command fails, see [installation problems](#common-installation-problems). @@ -350,7 +350,7 @@ Take the following steps to install TensorFlow in an Anaconda environment: TensorFlow for Python 2.7:
 (targetDirectory)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py2-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py2-none-any.whl @@ -522,7 +522,7 @@ The value you specify depends on your Python version.
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py2-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py2-none-any.whl
 
@@ -530,5 +530,5 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py2-none-any.
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py3-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py3-none-any.whl
 
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index 5ba522b436..dc6c1e36fc 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -81,7 +81,7 @@ or [macOS](#PrepareMac) - + ## Prepare environment for Linux Before building TensorFlow on Linux, install the following build @@ -328,10 +328,10 @@ Invoke `pip install` to install that pip package. The filename of the `.whl` file depends on your platform. For example, the following command will install the pip package -for TensorFlow 1.8.0 on Linux: +for TensorFlow 1.9.0rc0 on Linux:
-$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.8.0-py2-none-any.whl
+$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.9.0rc0-py2-none-any.whl
 
## Validate your installation @@ -373,9 +373,9 @@ The build and installation problems you encounter typically depend on the operating system. See the "Common installation problems" section of one of the following guides: - * @{$install_linux#CommonInstallationProblems$Installing TensorFlow on Linux} - * @{$install_mac#CommonInstallationProblems$Installing TensorFlow on Mac OS} - * @{$install_windows#CommonInstallationProblems$Installing TensorFlow on Windows} + * @{$install_linux#common_installation_problems$Installing TensorFlow on Linux} + * @{$install_mac#common_installation_problems$Installing TensorFlow on Mac OS} + * @{$install_windows#common_installation_problems$Installing TensorFlow on Windows} Beyond the errors documented in those two guides, the following table notes additional errors specific to building TensorFlow. Note that we @@ -433,6 +433,8 @@ Stack Overflow and specify the `tensorflow` tag. **Linux** + + @@ -456,6 +458,7 @@ Stack Overflow and specify the `tensorflow` tag. **Mac**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.9.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.11.0N/AN/A
tensorflow_gpu-1.9.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.11.079
tensorflow-1.8.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.10.0N/AN/A
tensorflow_gpu-1.8.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.079
tensorflow-1.7.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.10.0N/AN/A
+ @@ -472,6 +475,8 @@ Stack Overflow and specify the `tensorflow` tag. **Windows**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.9.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.11.0N/AN/A
tensorflow-1.8.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.10.1N/AN/A
tensorflow-1.7.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.10.1N/AN/A
tensorflow-1.6.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.8.1N/AN/A
+ + diff --git a/tensorflow/docs_src/mobile/linking_libs.md b/tensorflow/docs_src/mobile/linking_libs.md index cf0db59021..efef5dd0da 100644 --- a/tensorflow/docs_src/mobile/linking_libs.md +++ b/tensorflow/docs_src/mobile/linking_libs.md @@ -27,7 +27,7 @@ called `libandroid_tensorflow_inference_java.jar`. There are three ways to include this functionality in your program: 1. Include the jcenter AAR which contains it, as in this - [example app](https://github.com/googlecodelabs/tensorflow-for-poets-2/blob/master/android/build.gradle#L59-L65) + [example app](https://github.com/googlecodelabs/tensorflow-for-poets-2/blob/master/android/tfmobile/build.gradle#L59-L65) 2. Download the nightly precompiled version from [ci.tensorflow.org](http://ci.tensorflow.org/view/Nightly/job/nightly-android/lastSuccessfulBuild/artifact/out/). diff --git a/tensorflow/docs_src/mobile/prepare_models.md b/tensorflow/docs_src/mobile/prepare_models.md index 8b22c04d87..2b84dbb973 100644 --- a/tensorflow/docs_src/mobile/prepare_models.md +++ b/tensorflow/docs_src/mobile/prepare_models.md @@ -105,8 +105,8 @@ inline constants so everything’s in one file. To handle the conversion, you need the `freeze_graph.py` script, that’s held in [`tensorflow/python/tools/freeze_graph.py`](https://www.tensorflow.org/code/tensorflow/python/tools/freeze_graph.py). You’ll run it like this: - bazel build tensorflow/tools:freeze_graph - bazel-bin/tensorflow/tools/freeze_graph \ + bazel build tensorflow/python/tools:freeze_graph + bazel-bin/tensorflow/python/tools/freeze_graph \ --input_graph=/tmp/model/my_graph.pb \ --input_checkpoint=/tmp/model/model.ckpt-1000 \ --output_graph=/tmp/frozen_graph.pb \ diff --git a/tensorflow/docs_src/performance/quantization.md b/tensorflow/docs_src/performance/quantization.md index 2fea02d861..c97f74139c 100644 --- a/tensorflow/docs_src/performance/quantization.md +++ b/tensorflow/docs_src/performance/quantization.md @@ -227,8 +227,8 @@ of 30.0f, and an 8-bit array, the quantized values represent the following:
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.9.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.9.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.8.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.8.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.7.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
- +
QuantizedFloat
0-10.0
25530.0
12810.0
25530.0
Table 2: Example quantized value range diff --git a/tensorflow/docs_src/programmers_guide/estimators.md b/tensorflow/docs_src/programmers_guide/estimators.md index c4aae1d9d6..b13b47184d 100644 --- a/tensorflow/docs_src/programmers_guide/estimators.md +++ b/tensorflow/docs_src/programmers_guide/estimators.md @@ -21,18 +21,17 @@ Note: TensorFlow also includes a deprecated `Estimator` class at Estimators provide the following benefits: -* You can run Estimators-based models on a local host or on a +* You can run Estimator-based models on a local host or on a distributed multi-server environment without changing your model. - Furthermore, you can run Estimators-based models on CPUs, GPUs, + Furthermore, you can run Estimator-based models on CPUs, GPUs, or TPUs without recoding your model. * Estimators simplify sharing implementations between model developers. -* You can develop a state of the art model with high-level intuitive code, +* You can develop a state of the art model with high-level intuitive code. In short, it is generally much easier to create models with Estimators than with the low-level TensorFlow APIs. -* Estimators are themselves built on tf.layers, which +* Estimators are themselves built on @{tf.layers}, which simplifies customization. -* Estimators build the graph for you. In other words, you don't have to - build the graph. +* Estimators build the graph for you. * Estimators provide a safe distributed training loop that controls how and when to: * build the graph @@ -57,7 +56,7 @@ the "plumbing" for you. That is, pre-made Estimators create and manage pre-made Estimators let you experiment with different model architectures by making only minimal code changes. @{tf.estimator.DNNClassifier$`DNNClassifier`}, for example, is a pre-made Estimator class that trains classification models -through dense, feed-forward neural networks. +based on dense, feed-forward neural networks. ### Structure of a pre-made Estimators program @@ -79,7 +78,7 @@ of the following four steps: an input function: def input_fn(dataset): - ... # manipulate dataset, extracting feature names and the label + ... # manipulate dataset, extracting the feature dict and the label return feature_dict, label (See @{$programmers_guide/datasets} for full details.) @@ -96,13 +95,13 @@ of the following four steps: population = tf.feature_column.numeric_column('population') crime_rate = tf.feature_column.numeric_column('crime_rate') median_education = tf.feature_column.numeric_column('median_education', - normalizer_fn='lambda x: x - global_education_mean') + normalizer_fn=lambda x: x - global_education_mean) 3. **Instantiate the relevant pre-made Estimator.** For example, here's a sample instantiation of a pre-made Estimator named `LinearClassifier`: # Instantiate an estimator, passing the feature columns. - estimator = tf.estimator.Estimator.LinearClassifier( + estimator = tf.estimator.LinearClassifier( feature_columns=[population, crime_rate, median_education], ) diff --git a/tensorflow/docs_src/programmers_guide/feature_columns.md b/tensorflow/docs_src/programmers_guide/feature_columns.md index 845194fe0e..90f5c53a17 100644 --- a/tensorflow/docs_src/programmers_guide/feature_columns.md +++ b/tensorflow/docs_src/programmers_guide/feature_columns.md @@ -528,10 +528,10 @@ suggested by the following snippet: categorical_column = ... # Create any categorical column # Represent the categorical column as an embedding column. -# This means creating a one-hot vector with one element for each category. +# This means creating an embedding vector lookup table with one element for each category. embedding_column = tf.feature_column.embedding_column( categorical_column=categorical_column, - dimension=dimension_of_embedding_vector) + dimension=embedding_dimensions) ``` @{$programmers_guide/embedding$Embeddings} is a significant topic within machine diff --git a/tensorflow/examples/learn/iris.py b/tensorflow/examples/learn/iris.py index 03e60972aa..86f5204ec3 100644 --- a/tensorflow/examples/learn/iris.py +++ b/tensorflow/examples/learn/iris.py @@ -21,7 +21,8 @@ from __future__ import division from __future__ import print_function import os -import urllib + +from six.moves.urllib.request import urlretrieve import tensorflow as tf @@ -38,9 +39,7 @@ FEATURE_KEYS = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width'] def maybe_download_iris_data(file_name, download_url): """Downloads the file and returns the number of data.""" if not os.path.exists(file_name): - raw = urllib.urlopen(download_url).read() - with open(file_name, 'w') as f: - f.write(raw) + urlretrieve(download_url, file_name) # The first line is a comma-separated string. The first one is the number of # total data in the file. diff --git a/tensorflow/java/src/gen/cc/op_generator.cc b/tensorflow/java/src/gen/cc/op_generator.cc index debd95fc62..9b171f66ec 100644 --- a/tensorflow/java/src/gen/cc/op_generator.cc +++ b/tensorflow/java/src/gen/cc/op_generator.cc @@ -376,9 +376,6 @@ void GenerateOp(const OpSpec& op, const EndpointSpec& endpoint, } } // op annotations - op_class.add_annotation( - Annotation::Create("Generated", "javax.annotation") - .attributes("value = \"TensorFlow Java Op Generator\"")); if (endpoint.deprecated()) { op_class.add_annotation(Annotation::Create("Deprecated")); string explanation; @@ -415,8 +412,12 @@ void GenerateOp(const OpSpec& op, const EndpointSpec& endpoint, SourceFileWriter writer(op_file.get()); std::list dependencies; CollectOpDependencies(op, mode, &dependencies); - writer.Write(kLicense).EndLine().BeginType(op_class, PUBLIC | FINAL, - &dependencies, &op_javadoc); + writer.Write(kLicense) + .EndLine() + .Write("// This class has been generated, DO NOT EDIT!") + .EndLine() + .EndLine() + .BeginType(op_class, PUBLIC | FINAL, &dependencies, &op_javadoc); if (!op.optional_attributes().empty()) { RenderOptionsClass(op, op_class, &writer); } diff --git a/tensorflow/java/src/gen/cc/op_specs.cc b/tensorflow/java/src/gen/cc/op_specs.cc index 181fd4c5e3..941ab2699c 100644 --- a/tensorflow/java/src/gen/cc/op_specs.cc +++ b/tensorflow/java/src/gen/cc/op_specs.cc @@ -96,6 +96,7 @@ Type TypeResolver::TypeOf(const OpDef_ArgDef& arg_def, bool* iterable_out) { *iterable_out = true; visited_attrs_.insert(std::make_pair(arg_def.number_attr(), Type::Int())); } + Type type = Type::Wildcard(); if (arg_def.type() != DataType::DT_INVALID) { // resolve type from DataType diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index b2e6c60021..bd97b181ff 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -196,11 +196,11 @@ def implicit_val_and_grad(f): # TODO(cais): Remove calls to tf.constant() once the gradients functions # accept lists and np.ndarrays. - def grad_fn(*args): + def grad_fn(*args, **kwds): """Computes the gradient of the wrapped function.""" this_tape = tape.push_new_tape() try: - end_node = f(*args) + end_node = f(*args, **kwds) if end_node is None: raise ValueError("Cannot differentiate a function that returns None; " "did you forget to return a value from {}?".format( diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD index 9cd17e0407..20522098b0 100644 --- a/tensorflow/python/estimator/BUILD +++ b/tensorflow/python/estimator/BUILD @@ -978,7 +978,10 @@ py_test( size = "large", srcs = ["keras_test.py"], srcs_version = "PY2AND3", - tags = ["notsan"], + tags = [ + "no_windows", + "notsan", + ], deps = [ ":keras", "//tensorflow/core:protos_all_py", diff --git a/tensorflow/python/estimator/exporter.py b/tensorflow/python/estimator/exporter.py index 7cdf840c97..b18212cfcd 100644 --- a/tensorflow/python/estimator/exporter.py +++ b/tensorflow/python/estimator/exporter.py @@ -156,7 +156,7 @@ def _loss_smaller(best_eval_result, current_eval_result): return best_eval_result[default_key] > current_eval_result[default_key] -def _verify_compre_fn_args(compare_fn): +def _verify_compare_fn_args(compare_fn): """Verifies compare_fn arguments.""" args = set(util.fn_args(compare_fn)) if 'best_eval_result' not in args: @@ -265,7 +265,7 @@ class BestExporter(Exporter): self._compare_fn = compare_fn if self._compare_fn is None: raise ValueError('`compare_fn` must not be None.') - _verify_compre_fn_args(self._compare_fn) + _verify_compare_fn_args(self._compare_fn) self._saved_model_exporter = _SavedModelExporter( name, serving_input_receiver_fn, assets_extra, as_text) diff --git a/tensorflow/python/estimator/inputs/numpy_io.py b/tensorflow/python/estimator/inputs/numpy_io.py index 035c7c148c..a6cefdece2 100644 --- a/tensorflow/python/estimator/inputs/numpy_io.py +++ b/tensorflow/python/estimator/inputs/numpy_io.py @@ -136,11 +136,13 @@ def numpy_input_fn(x, values in `x` have same shape). ValueError: if duplicate keys are in both `x` and `y` when `y` is a dict. ValueError: if x or y is an empty dict. - TypeError: `x` is not a dict or array, or if `shuffle` is not bool. + TypeError: `x` is not a dict or array. + ValueError: if 'shuffle' is not provided or a bool. """ if not isinstance(shuffle, bool): - raise TypeError('shuffle must be explicitly set as boolean; ' - 'got {}'.format(shuffle)) + raise ValueError('shuffle must be provided and explicitly set as boolean ' + '(it is recommended to set it as True for training); ' + 'got {}'.format(shuffle)) def input_fn(): """Numpy input function.""" diff --git a/tensorflow/python/estimator/inputs/numpy_io_test.py b/tensorflow/python/estimator/inputs/numpy_io_test.py index 92d057e25d..81b201cc5c 100644 --- a/tensorflow/python/estimator/inputs/numpy_io_test.py +++ b/tensorflow/python/estimator/inputs/numpy_io_test.py @@ -286,8 +286,9 @@ class NumpyIoTest(test.TestCase): x = np.arange(32, 36) y = np.arange(4) with self.test_session(): - with self.assertRaisesRegexp(TypeError, - 'shuffle must be explicitly set as boolean'): + with self.assertRaisesRegexp(ValueError, + 'shuffle must be provided and explicitly ' + 'set as boolean'): # Default shuffle is None. numpy_io.numpy_input_fn(x, y) diff --git a/tensorflow/python/estimator/inputs/pandas_io.py b/tensorflow/python/estimator/inputs/pandas_io.py index 938e244fb3..57f8e5fd6a 100644 --- a/tensorflow/python/estimator/inputs/pandas_io.py +++ b/tensorflow/python/estimator/inputs/pandas_io.py @@ -68,15 +68,16 @@ def pandas_input_fn(x, Raises: ValueError: if `x` already contains a column with the same name as `y`, or if the indexes of `x` and `y` don't match. - TypeError: `shuffle` is not bool. + ValueError: if 'shuffle' is not provided or a bool. """ if not HAS_PANDAS: raise TypeError( 'pandas_input_fn should not be called without pandas installed') if not isinstance(shuffle, bool): - raise TypeError('shuffle must be explicitly set as boolean; ' - 'got {}'.format(shuffle)) + raise ValueError('shuffle must be provided and explicitly set as boolean ' + '(it is recommended to set it as True for training); ' + 'got {}'.format(shuffle)) x = x.copy() if y is not None: diff --git a/tensorflow/python/estimator/inputs/pandas_io_test.py b/tensorflow/python/estimator/inputs/pandas_io_test.py index e5912a3b28..dcecf6dd61 100644 --- a/tensorflow/python/estimator/inputs/pandas_io_test.py +++ b/tensorflow/python/estimator/inputs/pandas_io_test.py @@ -70,8 +70,9 @@ class PandasIoTest(test.TestCase): return x, _ = self.makeTestDataFrame() y_noindex = pd.Series(np.arange(-32, -28)) - with self.assertRaisesRegexp(TypeError, - 'shuffle must be explicitly set as boolean'): + with self.assertRaisesRegexp(ValueError, + 'shuffle must be provided and explicitly ' + 'set as boolean'): # Default shuffle is None pandas_io.pandas_input_fn(x, y_noindex) diff --git a/tensorflow/python/estimator/inputs/queues/feeding_functions.py b/tensorflow/python/estimator/inputs/queues/feeding_functions.py index 8e2ec83020..51a61adb21 100644 --- a/tensorflow/python/estimator/inputs/queues/feeding_functions.py +++ b/tensorflow/python/estimator/inputs/queues/feeding_functions.py @@ -250,7 +250,7 @@ class _PandasFeedFn(object): num_epochs=None): if len(placeholders) != len(dataframe.columns) + 1: raise ValueError("Expected {} placeholders; got {}.".format( - len(dataframe.columns), len(placeholders))) + len(dataframe.columns) + 1, len(placeholders))) self._index_placeholder = placeholders[0] self._col_placeholders = placeholders[1:] self._dataframe = dataframe diff --git a/tensorflow/python/estimator/keras.py b/tensorflow/python/estimator/keras.py index c80af08fba..2f439f765e 100644 --- a/tensorflow/python/estimator/keras.py +++ b/tensorflow/python/estimator/keras.py @@ -70,7 +70,7 @@ def _convert_tensor(x): return x -def _any_variable_initalized(): +def _any_variable_initialized(): """Check if any variable has been initialized in the Keras model. Returns: @@ -511,7 +511,7 @@ def model_to_estimator(keras_model=None, keras_model_fn, model_dir=model_dir, config=config) # Check if we need to call get_weights: - if _any_variable_initalized(): + if _any_variable_initialized(): keras_weights = keras_model.get_weights() # Warn if config passed to estimator tries to update GPUOptions. If a # session has already been created, the GPUOptions passed to the first diff --git a/tensorflow/python/estimator/keras_test.py b/tensorflow/python/estimator/keras_test.py index 6688a84130..5e094ae92b 100644 --- a/tensorflow/python/estimator/keras_test.py +++ b/tensorflow/python/estimator/keras_test.py @@ -31,10 +31,10 @@ from tensorflow.python.estimator import run_config as run_config_lib from tensorflow.python.estimator.inputs import numpy_io from tensorflow.python.framework import ops from tensorflow.python.framework import test_util -from tensorflow.python.keras import backend as K from tensorflow.python.keras import testing_utils from tensorflow.python.keras.applications import mobilenet from tensorflow.python.keras.optimizers import SGD +from tensorflow.python.ops.parsing_ops import gen_parsing_ops from tensorflow.python.platform import gfile from tensorflow.python.platform import test from tensorflow.python.summary.writer import writer_cache @@ -146,13 +146,13 @@ def randomize_io_type(array, name): def multi_inputs_multi_outputs_model(): a = keras.layers.Input(shape=(16,), name='input_a') b = keras.layers.Input(shape=(16,), name='input_b') - m = keras.layers.Input(shape=(8,), dtype='bool', name='input_m') + m = keras.layers.Input(shape=(8,), dtype='string', name='input_m') dense = keras.layers.Dense(8, name='dense_1') a_2 = dense(a) - # Apply a mask - s_2 = keras.layers.Lambda(lambda k: - K.switch(k[0], k[1], K.zeros_like(k[1])))([m, a_2]) + # Read m + m_2 = keras.layers.Lambda(gen_parsing_ops.string_to_number)(m) + s_2 = keras.layers.Lambda(lambda k: k[0] * k[1])([m_2, a_2]) b_2 = dense(b) merged = keras.layers.concatenate([s_2, b_2], name='merge') c = keras.layers.Dense(3, activation='softmax', name='dense_2')(merged) @@ -372,13 +372,13 @@ class TestKerasEstimator(test_util.TensorFlowTestCase): def train_input_fn(): input_dict = {'input_a': a_train, 'input_b': b_train, - 'input_m': input_m_train > 0} + 'input_m': input_m_train.astype(np.str)} output_dict = {'dense_2': c_train, 'dense_3': d_train} return input_dict, output_dict def eval_input_fn(): input_dict = {'input_a': a_test, 'input_b': b_test, - 'input_m': input_m_test > 0} + 'input_m': input_m_test.astype(np.str)} output_dict = {'dense_2': c_test, 'dense_3': d_test} return input_dict, output_dict diff --git a/tensorflow/python/keras/activations.py b/tensorflow/python/keras/activations.py index e487f583be..f608dea430 100644 --- a/tensorflow/python/keras/activations.py +++ b/tensorflow/python/keras/activations.py @@ -93,6 +93,8 @@ def selu(x): - To be used together with the initialization "lecun_normal". - To be used together with the dropout variant "AlphaDropout". + References: + - [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515) """ alpha = 1.6732632423543772848170429916717 scale = 1.0507009873554804934193349852946 diff --git a/tensorflow/python/keras/callbacks.py b/tensorflow/python/keras/callbacks.py index 70b6a8431a..9f91368e5b 100644 --- a/tensorflow/python/keras/callbacks.py +++ b/tensorflow/python/keras/callbacks.py @@ -724,15 +724,6 @@ class TensorBoard(Callback): for weight in layer.weights: mapped_weight_name = weight.name.replace(':', '_') tf_summary.histogram(mapped_weight_name, weight) - if self.write_grads: - grads = model.optimizer.get_gradients(model.total_loss, weight) - - def is_indexed_slices(grad): - return type(grad).__name__ == 'IndexedSlices' - - grads = [grad.values if is_indexed_slices(grad) else grad - for grad in grads] - tf_summary.histogram('{}_grad'.format(mapped_weight_name), grads) if self.write_images: w_img = array_ops.squeeze(weight) shape = K.int_shape(w_img) @@ -759,6 +750,18 @@ class TensorBoard(Callback): assert len(shape) == 4 and shape[-1] in [1, 3, 4] tf_summary.image(mapped_weight_name, w_img) + if self.write_grads: + for weight in layer.trainable_weights: + mapped_weight_name = weight.name.replace(':', '_') + grads = model.optimizer.get_gradients(model.total_loss, weight) + + def is_indexed_slices(grad): + return type(grad).__name__ == 'IndexedSlices' + + grads = [grad.values if is_indexed_slices(grad) else grad + for grad in grads] + tf_summary.histogram('{}_grad'.format(mapped_weight_name), grads) + if hasattr(layer, 'output'): tf_summary.histogram('{}_out'.format(layer.name), layer.output) self.merged = tf_summary.merge_all() diff --git a/tensorflow/python/keras/callbacks_test.py b/tensorflow/python/keras/callbacks_test.py index b355f4a269..5062a26580 100644 --- a/tensorflow/python/keras/callbacks_test.py +++ b/tensorflow/python/keras/callbacks_test.py @@ -653,6 +653,8 @@ class KerasCallbacksTest(test.TestCase): model.add( keras.layers.Dense( NUM_HIDDEN, input_dim=INPUT_DIM, activation='relu')) + # non_trainable_weights: moving_variance, moving_mean + model.add(keras.layers.BatchNormalization()) model.add(keras.layers.Dense(NUM_CLASSES, activation='softmax')) model.compile( loss='categorical_crossentropy', diff --git a/tensorflow/python/keras/engine/network.py b/tensorflow/python/keras/engine/network.py index a4cd017d60..1c9135982e 100644 --- a/tensorflow/python/keras/engine/network.py +++ b/tensorflow/python/keras/engine/network.py @@ -123,7 +123,7 @@ class Network(base_layer.Layer): # Entries are unique. Includes input and output layers. self._layers = [] - # Used in symbolic mode only, only in conjonction with graph-networks + # Used in symbolic mode only, only in conjunction with graph-networks self._outbound_nodes = [] self._inbound_nodes = [] diff --git a/tensorflow/python/keras/engine/saving_test.py b/tensorflow/python/keras/engine/saving_test.py index 6a94986b9c..7e82db028b 100644 --- a/tensorflow/python/keras/engine/saving_test.py +++ b/tensorflow/python/keras/engine/saving_test.py @@ -482,7 +482,7 @@ class TestWholeModelSaving(test.TestCase): with h5py.File(fname, 'r') as h5file: num_names_arrays = len([attr for attr in h5file['model_weights'].attrs if attr.startswith('layer_names')]) - # The chunking of layer names array should have happend. + # The chunking of layer names array should have happened. self.assertGreater(num_names_arrays, 0) out2 = model.predict(x) self.assertAllClose(out, out2, atol=1e-05) @@ -527,7 +527,7 @@ class TestWholeModelSaving(test.TestCase): num_weight_arrays = len( [attr for attr in h5file['model_weights']['nested_model'].attrs if attr.startswith('weight_names')]) - # The chunking of layer names array should have happend. + # The chunking of layer names array should have happened. self.assertGreater(num_weight_arrays, 0) out2 = model.predict(x) self.assertAllClose(out, out2, atol=1e-05) diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py index 89c1f1a40f..fce6cbdb7a 100644 --- a/tensorflow/python/keras/engine/training.py +++ b/tensorflow/python/keras/engine/training.py @@ -24,6 +24,7 @@ import numpy as np from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.ops import iterator_ops from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_util @@ -409,11 +410,13 @@ class Model(Network): else: if sample_weight_mode == 'temporal': sample_weights.append(array_ops.placeholder_with_default( - [[1.]], shape=[None, None], name=name + '_sample_weights')) + constant_op.constant([[1.]], dtype=K.floatx()), + shape=[None, None], name=name + '_sample_weights')) sample_weight_modes.append('temporal') else: sample_weights.append(array_ops.placeholder_with_default( - [1.], shape=[None], name=name + '_sample_weights')) + constant_op.constant([1.], dtype=K.floatx()), + shape=[None], name=name + '_sample_weights')) sample_weight_modes.append(None) self.sample_weight_modes = sample_weight_modes self._feed_sample_weight_modes = [] diff --git a/tensorflow/python/keras/engine/training_eager.py b/tensorflow/python/keras/engine/training_eager.py index 2ecbff3a1c..e8838cd3bc 100644 --- a/tensorflow/python/keras/engine/training_eager.py +++ b/tensorflow/python/keras/engine/training_eager.py @@ -732,7 +732,7 @@ def slice_arrays(arrays, indices, contiguous=True): """Slices batches out of provided arrays (workaround for eager tensors). Unfortunately eager tensors don't have the same slicing behavior as - Numpy arrays (they folow the same slicing behavior as symbolic TF tensors), + Numpy arrays (they follow the same slicing behavior as symbolic TF tensors), hence we cannot use `generic_utils.slice_arrays` directly and we have to implement this workaround based on `concat`. This has a performance cost. diff --git a/tensorflow/python/keras/initializers_test.py b/tensorflow/python/keras/initializers_test.py index a54d6da839..c519e194bd 100644 --- a/tensorflow/python/keras/initializers_test.py +++ b/tensorflow/python/keras/initializers_test.py @@ -71,7 +71,7 @@ class KerasInitializersTest(test.TestCase): stddev=1, seed=126), tensor_shape, - target_mean=0., target_std=None, target_max=2) + target_mean=0., target_max=2, target_min=-2) def test_constant(self): tensor_shape = (5, 6, 4) @@ -83,49 +83,49 @@ class KerasInitializersTest(test.TestCase): tensor_shape = (5, 6, 4, 2) with self.test_session(): fan_in, _ = init_ops._compute_fans(tensor_shape) - scale = np.sqrt(3. / fan_in) + std = np.sqrt(1. / fan_in) self._runner(keras.initializers.lecun_uniform(seed=123), tensor_shape, - target_mean=0., target_max=scale, target_min=-scale) + target_mean=0., target_std=std) def test_glorot_uniform(self): tensor_shape = (5, 6, 4, 2) with self.test_session(): fan_in, fan_out = init_ops._compute_fans(tensor_shape) - scale = np.sqrt(6. / (fan_in + fan_out)) + std = np.sqrt(2. / (fan_in + fan_out)) self._runner(keras.initializers.glorot_uniform(seed=123), tensor_shape, - target_mean=0., target_max=scale, target_min=-scale) + target_mean=0., target_std=std) def test_he_uniform(self): tensor_shape = (5, 6, 4, 2) with self.test_session(): fan_in, _ = init_ops._compute_fans(tensor_shape) - scale = np.sqrt(6. / fan_in) + std = np.sqrt(2. / fan_in) self._runner(keras.initializers.he_uniform(seed=123), tensor_shape, - target_mean=0., target_max=scale, target_min=-scale) + target_mean=0., target_std=std) def test_lecun_normal(self): tensor_shape = (5, 6, 4, 2) with self.test_session(): fan_in, _ = init_ops._compute_fans(tensor_shape) - scale = np.sqrt(1. / fan_in) + std = np.sqrt(1. / fan_in) self._runner(keras.initializers.lecun_normal(seed=123), tensor_shape, - target_mean=0., target_std=None, target_max=2 * scale) + target_mean=0., target_std=std) def test_glorot_normal(self): tensor_shape = (5, 6, 4, 2) with self.test_session(): fan_in, fan_out = init_ops._compute_fans(tensor_shape) - scale = np.sqrt(2. / (fan_in + fan_out)) + std = np.sqrt(2. / (fan_in + fan_out)) self._runner(keras.initializers.glorot_normal(seed=123), tensor_shape, - target_mean=0., target_std=None, target_max=2 * scale) + target_mean=0., target_std=std) def test_he_normal(self): tensor_shape = (5, 6, 4, 2) with self.test_session(): fan_in, _ = init_ops._compute_fans(tensor_shape) - scale = np.sqrt(2. / fan_in) + std = np.sqrt(2. / fan_in) self._runner(keras.initializers.he_normal(seed=123), tensor_shape, - target_mean=0., target_std=None, target_max=2 * scale) + target_mean=0., target_std=std) def test_orthogonal(self): tensor_shape = (20, 20) diff --git a/tensorflow/python/keras/layers/core.py b/tensorflow/python/keras/layers/core.py index 5061825d38..f60064ed63 100644 --- a/tensorflow/python/keras/layers/core.py +++ b/tensorflow/python/keras/layers/core.py @@ -19,7 +19,9 @@ from __future__ import division from __future__ import print_function import copy +import sys import types as python_types +import warnings import numpy as np @@ -714,6 +716,7 @@ class Lambda(Layer): return self.mask def get_config(self): + module = self.function.__module__ if isinstance(self.function, python_types.LambdaType): function = generic_utils.func_dump(self.function) function_type = 'lambda' @@ -721,21 +724,26 @@ class Lambda(Layer): function = self.function.__name__ function_type = 'function' + output_shape_module = None if isinstance(self._output_shape, python_types.LambdaType): output_shape = generic_utils.func_dump(self._output_shape) output_shape_type = 'lambda' + output_shape_module = self._output_shape.__module__ elif callable(self._output_shape): output_shape = self._output_shape.__name__ output_shape_type = 'function' + output_shape_module = self._output_shape.__module__ else: output_shape = self._output_shape output_shape_type = 'raw' config = { 'function': function, + 'module': module, 'function_type': function_type, 'output_shape': output_shape, 'output_shape_type': output_shape_type, + 'output_shape_module': output_shape_module, 'arguments': self.arguments } base_config = super(Lambda, self).get_config() @@ -745,8 +753,16 @@ class Lambda(Layer): def from_config(cls, config, custom_objects=None): config = config.copy() globs = globals() + module = config.pop('module', None) + if module in sys.modules: + globs.update(sys.modules[module].__dict__) + elif module is not None: + # Note: we don't know the name of the function if it's a lambda. + warnings.warn('{} is not loaded, but a Lambda layer uses it. ' + 'It may cause errors.'.format(module) + , UserWarning) if custom_objects: - globs = dict(list(globs.items()) + list(custom_objects.items())) + globs.update(custom_objects) function_type = config.pop('function_type') if function_type == 'function': # Simple lookup in custom objects @@ -760,6 +776,14 @@ class Lambda(Layer): else: raise TypeError('Unknown function type:', function_type) + output_shape_module = config.pop('output_shape_module', None) + if output_shape_module in sys.modules: + globs.update(sys.modules[output_shape_module].__dict__) + elif output_shape_module is not None: + # Note: we don't know the name of the function if it's a lambda. + warnings.warn('{} is not loaded, but a Lambda layer uses it. ' + 'It may cause errors.'.format(output_shape_module) + , UserWarning) output_shape_type = config.pop('output_shape_type') if output_shape_type == 'function': # Simple lookup in custom objects diff --git a/tensorflow/python/keras/models_test.py b/tensorflow/python/keras/models_test.py index c616d8f24f..e6e45902a8 100644 --- a/tensorflow/python/keras/models_test.py +++ b/tensorflow/python/keras/models_test.py @@ -144,5 +144,19 @@ class CheckpointingTests(test.TestCase): model.load_weights(save_prefix) self.assertEqual(12., self.evaluate(beta1_power)) +class TestModelBackend(test.TestCase): + + def test_model_backend_float64_use_cases(self): + # Test case for GitHub issue 19318 + floatx = keras.backend.floatx() + keras.backend.set_floatx('float64') + + x = keras.Input((5,)) + y = keras.layers.Dense(1)(x) + model = keras.models.Model(x, y) + model.compile('rmsprop', 'mse') + + keras.backend.set_floatx(floatx) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/kernel_tests/as_string_op_test.py b/tensorflow/python/kernel_tests/as_string_op_test.py index 9d54add264..94ed8ebd31 100644 --- a/tensorflow/python/kernel_tests/as_string_op_test.py +++ b/tensorflow/python/kernel_tests/as_string_op_test.py @@ -130,6 +130,16 @@ class AsStringOpTest(test.TestCase): result = output.eval(feed_dict={input_: int_inputs_}) self.assertAllEqual(s(result), ["%d" % x for x in int_inputs_]) + def testHalfInt(self): + s = lambda strs: [x.decode("ascii") for x in strs] + + with self.test_session(): + input_ = array_ops.placeholder(dtypes.int16) + int_inputs_ = [np.iinfo(np.int16).min, np.iinfo(np.int16).max] + output = string_ops.as_string(input_) + result = output.eval(feed_dict={input_: int_inputs_}) + self.assertAllEqual(s(result), ["%d" % x for x in int_inputs_]) + def testBool(self): bool_inputs_ = [False, True] s = lambda strs: [x.decode("ascii") for x in strs] diff --git a/tensorflow/python/kernel_tests/betainc_op_test.py b/tensorflow/python/kernel_tests/betainc_op_test.py index 08b03f8518..16fdedac41 100644 --- a/tensorflow/python/kernel_tests/betainc_op_test.py +++ b/tensorflow/python/kernel_tests/betainc_op_test.py @@ -172,7 +172,7 @@ class BetaincTest(test.TestCase): tf_gout_t = math_ops.betainc(tf_ga_s, tf_gb_s, tf_gx_s) err = gradient_checker.compute_gradient_error( [tf_gx_s], [gx_s.shape], tf_gout_t, gx_s.shape) - print("betainc gradient err = %g " % err) + tf_logging.info("betainc gradient err = %g " % err) self.assertLess(err, err_tolerance) # Test broadcast gradient @@ -181,7 +181,7 @@ class BetaincTest(test.TestCase): tf_gout_t = math_ops.betainc(tf_ga_s, tf_gb_s, tf_gx_s) err = gradient_checker.compute_gradient_error( [tf_gx_s], [()], tf_gout_t, ga_s.shape) - print("betainc gradient err = %g " % err) + tf_logging.info("betainc gradient err = %g " % err) self.assertLess(err, err_tolerance) diff --git a/tensorflow/python/kernel_tests/clip_ops_test.py b/tensorflow/python/kernel_tests/clip_ops_test.py index e08123b041..fb52d10475 100644 --- a/tensorflow/python/kernel_tests/clip_ops_test.py +++ b/tensorflow/python/kernel_tests/clip_ops_test.py @@ -18,9 +18,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numpy as np + from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops from tensorflow.python.ops import clip_ops from tensorflow.python.ops import gradient_checker from tensorflow.python.platform import test @@ -414,6 +417,16 @@ class ClipTest(test.TestCase): self.assertAllClose(np_ans, tf_ans) + def testClipByValueEmptyTensor(self): + # Test case for GitHub issue 19337 + zero = array_ops.placeholder(dtype=dtypes.float32, shape=None) + x = clip_ops.clip_by_value(zero, zero, zero) + y = clip_ops.clip_by_value(zero, 1.0, 1.0) + z = clip_ops.clip_by_value(zero, zero, 1.0) + w = clip_ops.clip_by_value(zero, 1.0, zero) + with self.test_session(use_gpu=True) as sess: + sess.run([x, y, z, w], feed_dict={zero: np.zeros((7, 0))}) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/kernel_tests/conv_ops_test.py b/tensorflow/python/kernel_tests/conv_ops_test.py index 8699fd5b25..80ba7dafc9 100644 --- a/tensorflow/python/kernel_tests/conv_ops_test.py +++ b/tensorflow/python/kernel_tests/conv_ops_test.py @@ -312,8 +312,8 @@ class Conv2DTest(test.TestCase): expected_values = self.evaluate(expected_results) computed_values = self.evaluate(computed_results) for e_value, c_value in zip(expected_values, computed_values): - print("expected = ", e_value) - print("actual = ", c_value) + tf_logging.info("expected = ", e_value) + tf_logging.info("actual = ", c_value) self.assertAllClose( e_value.flatten(), c_value.flatten(), atol=tolerance, rtol=1e-4) @@ -337,8 +337,8 @@ class Conv2DTest(test.TestCase): for i in range(len(tensors)): conv = tensors[i] value = values[i] - print("expected = ", expected) - print("actual = ", value) + tf_logging.info("expected = ", expected) + tf_logging.info("actual = ", value) tol = 1e-5 if value.dtype == np.float16: tol = 1e-3 @@ -547,8 +547,8 @@ class Conv2DTest(test.TestCase): # "values" consists of two tensors for two backprops value = self.evaluate(conv) self.assertShapeEqual(value, conv) - print("expected = ", expected) - print("actual = ", value) + tf_logging.info("expected = ", expected) + tf_logging.info("actual = ", value) self.assertArrayNear(expected, value.flatten(), err) def _CompareBackpropInput(self, input_sizes, filter_sizes, output_sizes, @@ -723,8 +723,8 @@ class Conv2DTest(test.TestCase): data_format=data_format) value = self.evaluate(conv) self.assertShapeEqual(value, conv) - print("expected = ", expected) - print("actual = ", value) + tf_logging.info("expected = ", expected) + tf_logging.info("actual = ", value) self.assertArrayNear(expected, value.flatten(), 1e-5) def _CompareBackFilter(self, input_sizes, filter_sizes, output_sizes, @@ -912,8 +912,8 @@ class Conv2DTest(test.TestCase): value_2 = sess.run(conv_2) self.assertShapeEqual(value, conv) self.assertShapeEqual(value_2, conv_2) - print("expected = ", value_2) - print("actual = ", value) + tf_logging.info("expected = ", value_2) + tf_logging.info("actual = ", value) self.assertArrayNear(value_2.flatten(), value.flatten(), err) # Testing for backprops @@ -965,8 +965,8 @@ class Conv2DTest(test.TestCase): value_2 = sess.run(conv_2) self.assertShapeEqual(value, conv) self.assertShapeEqual(value_2, conv_2) - print("expected = ", value_2) - print("actual = ", value) + tf_logging.info("expected = ", value_2) + tf_logging.info("actual = ", value) self.assertArrayNear(value_2.flatten(), value.flatten(), err) def testConv2D2x2Depth3ValidBackpropFilterStride1x1Dilation2x1(self): @@ -1178,7 +1178,7 @@ class Conv2DTest(test.TestCase): # since fp16 numerical gradients are too imprecise. err = np.fabs(jacob_t - reference_jacob_t).max() - print("conv_2d gradient error = ", err) + tf_logging.info("conv_2d gradient error = ", err) self.assertLess(err, 0.002) def testInputGradientValidPaddingStrideOne(self): @@ -1546,7 +1546,7 @@ class DepthwiseConv2DTest(test.TestCase): conv = nn_impl.depthwise_conv2d( t1, t2, strides=[1, stride, stride, 1], padding=padding) value = sess.run(conv) - print("value = ", value) + tf_logging.info("value = ", value) self.assertArrayNear(expected, np.ravel(value), 1e-5) self.assertShapeEqual(value, conv) @@ -1668,7 +1668,7 @@ class SeparableConv2DTest(test.TestCase): conv = array_ops.transpose(conv, [0, 2, 3, 1]) value = sess.run(conv) - print("value = ", value) + tf_logging.info("value = ", value) self.assertArrayNear(expected, np.ravel(value), 1e-5) self.assertShapeEqual(value, conv) @@ -1826,7 +1826,7 @@ class Conv2DBenchmark(test.Benchmark): wall_time = time.time() - start self.report_benchmark( name="conv_stack_iter_%d" % iter_index, wall_time=wall_time) - print("conv_stack_iter_%d: %.4f" % (iter_index, wall_time)) + tf_logging.info("conv_stack_iter_%d: %.4f" % (iter_index, wall_time)) def GetInceptionFwdTest(input_size, filter_size, stride, padding, diff --git a/tensorflow/python/kernel_tests/gather_nd_op_test.py b/tensorflow/python/kernel_tests/gather_nd_op_test.py index 91ebe8de99..58e2a8ac2a 100644 --- a/tensorflow/python/kernel_tests/gather_nd_op_test.py +++ b/tensorflow/python/kernel_tests/gather_nd_op_test.py @@ -197,7 +197,21 @@ class GatherNdTest(test.TestCase): self.assertEqual(None, shape.ndims) self.assertEqual(None, shape[0].value) - def testBadIndices(self): + def testBadIndicesCPU(self): + with self.test_session(use_gpu=False): + params = [0, 1, 2] + indices = [[[0], [7]]] # Make this one higher rank + gather_nd = array_ops.gather_nd(params, indices) + with self.assertRaisesOpError( + r"flat indices\[1, :\] = \[7\] does not index into param " + r"\(shape: \[3\]\)"): + gather_nd.eval() + + def _disabledTestBadIndicesGPU(self): + # TODO disabled due to different behavior on GPU and CPU + # On GPU the bad indices do not raise error but fetch 0 values + if not test.is_gpu_available(): + return with self.test_session(use_gpu=True): params = [0, 1, 2] indices = [[[0], [7]]] # Make this one higher rank @@ -207,7 +221,21 @@ class GatherNdTest(test.TestCase): r"\(shape: \[3\]\)"): gather_nd.eval() - def testBadIndicesWithSlices(self): + def testBadIndicesWithSlicesCPU(self): + with self.test_session(use_gpu=False): + params = [[0, 1, 2]] + indices = [[[0], [0], [1]]] # Make this one higher rank + gather_nd = array_ops.gather_nd(params, indices) + with self.assertRaisesOpError( + r"flat indices\[2, :\] = \[1\] does not index into param " + r"\(shape: \[1,3\]\)"): + gather_nd.eval() + + def _disabledTestBadIndicesWithSlicesGPU(self): + # TODO disabled due to different behavior on GPU and CPU + # On GPU the bad indices do not raise error but fetch 0 values + if not test.is_gpu_available(): + return with self.test_session(use_gpu=True): params = [[0, 1, 2]] indices = [[[0], [0], [1]]] # Make this one higher rank diff --git a/tensorflow/python/kernel_tests/gather_op_test.py b/tensorflow/python/kernel_tests/gather_op_test.py index a2fcd751df..033fa95935 100644 --- a/tensorflow/python/kernel_tests/gather_op_test.py +++ b/tensorflow/python/kernel_tests/gather_op_test.py @@ -27,7 +27,8 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradients_impl from tensorflow.python.platform import test -_TEST_TYPES = (dtypes.float32, dtypes.complex64, dtypes.complex128) +_TEST_TYPES = (dtypes.int64, dtypes.float32, + dtypes.complex64, dtypes.complex128) class GatherTest(test.TestCase): @@ -122,6 +123,9 @@ class GatherTest(test.TestCase): gather, [tf_params, tf_indices, tf_axis], gather_grad) self.assertEqual(indices_grad, None) self.assertEqual(axis_grad, None) + if dtype.is_integer: + self.assertEqual(params_grad, None) + continue # For axis 0, we are able to create an efficient IndexedSlices for # the gradient. if axis == 0: @@ -177,7 +181,19 @@ class GatherTest(test.TestCase): gather_t = array_ops.gather(params, indices, axis=axis) self.assertEqual(None, gather_t.shape) - def testBadIndices(self): + def testBadIndicesCPU(self): + with self.test_session(use_gpu=False): + params = [[0, 1, 2], [3, 4, 5]] + with self.assertRaisesOpError(r"indices\[0,0\] = 7 is not in \[0, 2\)"): + array_ops.gather(params, [[7]], axis=0).eval() + with self.assertRaisesOpError(r"indices\[0,0\] = 7 is not in \[0, 3\)"): + array_ops.gather(params, [[7]], axis=1).eval() + + def _disabledTestBadIndicesGPU(self): + # TODO disabled due to different behavior on GPU and CPU + # On GPU the bad indices do not raise error but fetch 0 values + if not test.is_gpu_available(): + return with self.test_session(use_gpu=True): params = [[0, 1, 2], [3, 4, 5]] with self.assertRaisesOpError(r"indices\[0,0\] = 7 is not in \[0, 2\)"): diff --git a/tensorflow/python/kernel_tests/init_ops_test.py b/tensorflow/python/kernel_tests/init_ops_test.py index a9b55854f1..795aa67248 100644 --- a/tensorflow/python/kernel_tests/init_ops_test.py +++ b/tensorflow/python/kernel_tests/init_ops_test.py @@ -362,6 +362,33 @@ class UniformUnitScalingInitializationTest(test.TestCase): dtype=dtypes.string) +class VarianceScalingInitializationTest(test.TestCase): + + def testNormalDistribution(self): + shape = [100, 100] + expect_mean = 0. + expect_var = 1. / shape[0] + init = init_ops.variance_scaling_initializer(distribution='normal') + + with self.test_session(use_gpu=True): + x = init(shape).eval() + + self.assertNear(np.mean(x), expect_mean, err=1e-2) + self.assertNear(np.var(x), expect_var, err=1e-2) + + def testUniformDistribution(self): + shape = [100, 100] + expect_mean = 0. + expect_var = 1. / shape[0] + init = init_ops.variance_scaling_initializer(distribution='uniform') + + with self.test_session(use_gpu=True): + x = init(shape).eval() + + self.assertNear(np.mean(x), expect_mean, err=1e-2) + self.assertNear(np.var(x), expect_var, err=1e-2) + + # TODO(vrv): move to sequence_ops_test? class RangeTest(test.TestCase): diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py index a0c372db7d..e95c729715 100644 --- a/tensorflow/python/kernel_tests/pooling_ops_test.py +++ b/tensorflow/python/kernel_tests/pooling_ops_test.py @@ -947,7 +947,7 @@ class PoolingTest(test.TestCase): output_sizes, x_init_value=x_init_value, delta=1e-2) - print("%s gradient error = " % func_name, err) + tf_logging.info("%s gradient error = " % func_name, err) self.assertLess(err, err_tolerance) def _ConstructAndTestSecondGradient(self, @@ -1024,7 +1024,7 @@ class PoolingTest(test.TestCase): input_sizes, x_init_value=x_init_value, delta=1e-2) - print("%s second-order gradient error = " % func_name, err) + tf_logging.info("%s second-order gradient error = " % func_name, err) self.assertLess(err, err_tolerance) def _testMaxPoolGradValidPadding1_1(self, data_format, use_gpu): diff --git a/tensorflow/python/kernel_tests/py_func_test.py b/tensorflow/python/kernel_tests/py_func_test.py index 677253946e..253e43920b 100644 --- a/tensorflow/python/kernel_tests/py_func_test.py +++ b/tensorflow/python/kernel_tests/py_func_test.py @@ -19,6 +19,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import gc import re import numpy as np @@ -434,13 +435,29 @@ class PyFuncTest(test.TestCase): # ----- Tests shared by py_func and eager_py_func ----- def testCleanup(self): - for _ in xrange(1000): - g = ops.Graph() - with g.as_default(): - c = constant_op.constant([1.], dtypes.float32) - _ = script_ops.py_func(lambda x: x + 1, [c], [dtypes.float32]) - _ = script_ops.eager_py_func(lambda x: x + 1, [c], [dtypes.float32]) - self.assertLess(script_ops._py_funcs.size(), 100) + # Delete everything created by previous tests to avoid side effects. + ops.reset_default_graph() + gc.collect() + initial_size = script_ops._py_funcs.size() + # Encapsulate the graph generation, so locals can be deleted. + def make_graphs(): + for _ in xrange(1000): + g = ops.Graph() + with g.as_default(): + c = constant_op.constant([1.], dtypes.float32) + _ = script_ops.py_func(lambda x: x + 1, [c], [dtypes.float32]) + _ = script_ops.eager_py_func(lambda x: x + 1, [c], [dtypes.float32]) + # These ops have a reference to 'c' which has a reference to the graph. + # Checks if the functions are being deleted though the graph is referenced from them. + # (see #18292) + _ = script_ops.py_func(lambda x: x + c.shape[0], [c], [dtypes.float32]) + _ = script_ops.eager_py_func(lambda x: x + c.shape[0], [c], [dtypes.float32]) + + # Call garbage collector to enforce deletion. + make_graphs() + ops.reset_default_graph() + gc.collect() + self.assertEqual(initial_size, script_ops._py_funcs.size()) # ----- Tests for eager_py_func ----- @test_util.run_in_graph_and_eager_modes() diff --git a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py index 79fe927b8a..faa4b49a8d 100644 --- a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py +++ b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py @@ -144,7 +144,9 @@ class StatefulScatterNdTest(test.TestCase): self.assertAllClose(new, ref_var.eval()) def _VariableRankTests(self, np_scatter, tf_scatter): - for vtype in (np.float32, np.float64, np.complex64, np.complex128): + for vtype in (np.int32, + np.float32, np.float64, + np.complex64, np.complex128): for itype in (np.int32, np.int64): self._VariableRankTest(np_scatter, tf_scatter, vtype, itype) @@ -221,7 +223,7 @@ class StatefulScatterNdTest(test.TestCase): # self._VariableRankTests(_NumpyDiv, state_ops.scatter_nd_div) def _ScatterRepeatIndicesTest(self, np_scatter, tf_scatter): - for vtype in (np.float32, np.float64): + for vtype in (np.int32, np.float32, np.float64): for itype in (np.int32, np.int64): self._VariableRankTest( np_scatter, tf_scatter, vtype, itype, repeat_indices=True) diff --git a/tensorflow/python/kernel_tests/scatter_ops_test.py b/tensorflow/python/kernel_tests/scatter_ops_test.py index c70a4ffce7..1a0fa744ae 100644 --- a/tensorflow/python/kernel_tests/scatter_ops_test.py +++ b/tensorflow/python/kernel_tests/scatter_ops_test.py @@ -159,7 +159,13 @@ class ScatterTest(test.TestCase): # Clips small values to avoid division by zero. def clip_small_values(x): - return 1e-4 * np.sign(x) if np.abs(x) < 1e-4 else x + threshold = 1e-4 + sign = np.sign(x) + + if isinstance(x, np.int32): + threshold = 1 + sign = np.random.choice([-1, 1]) + return threshold * sign if np.abs(x) < threshold else x updates = np.vectorize(clip_small_values)(updates) old = _AsType(np.random.randn(*((first_dim,) + extra_shape)), vtype) @@ -181,7 +187,11 @@ class ScatterTest(test.TestCase): tf_scatter, repeat_indices=False, updates_are_scalar=False): - for vtype in (np.float32, np.float64): + vtypes = [np.float32, np.float64] + if tf_scatter != state_ops.scatter_div: + vtypes.append(np.int32) + + for vtype in vtypes: for itype in (np.int32, np.int64): self._VariableRankTest(tf_scatter, vtype, itype, repeat_indices, updates_are_scalar) diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py index 794be096b7..a82855dfeb 100644 --- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py +++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py @@ -264,7 +264,9 @@ class UnsortedSegmentTest(SegmentReductionHelper): # A subset of ops has been enabled for complex numbers self.complex_ops_list = [(np.add, None, - math_ops.unsorted_segment_sum, lambda t: 0)] + math_ops.unsorted_segment_sum, lambda t: 0), + (np.ndarray.__mul__, None, + math_ops.unsorted_segment_prod, lambda t: 1)] self.differentiable_dtypes = [dtypes_lib.float16, dtypes_lib.float32, dtypes_lib.float64] self.all_dtypes = (self.differentiable_dtypes + diff --git a/tensorflow/python/kernel_tests/string_split_op_test.py b/tensorflow/python/kernel_tests/string_split_op_test.py index a5bd1b6ee0..e20daccb28 100644 --- a/tensorflow/python/kernel_tests/string_split_op_test.py +++ b/tensorflow/python/kernel_tests/string_split_op_test.py @@ -146,5 +146,101 @@ class StringSplitOpTest(test.TestCase): self.assertAllEqual(shape, [3, 1]) +class StringSplitV2OpTest(test.TestCase): + + def testSplitV2(self): + strings = ["pigs on the wing", "animals"] + + with self.test_session() as sess: + tokens = string_ops.string_split_v2(strings) + indices, values, shape = sess.run(tokens) + self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2], [0, 3], [1, 0]]) + self.assertAllEqual(values, [b"pigs", b"on", b"the", b"wing", b"animals"]) + self.assertAllEqual(shape, [2, 4]) + + def testSplitV2MultiCharSeparator(self): + # Match Python behavior: + # >>> '1<>2<>3'.split('<>') + # ['1', '2', '3'] + # >>> "<><>4<>5<><>6<>".split("<>") + # ['', '', '4', '5', '', '6', ''] + strings = ["1<>2<>3", "<><>4<>5<><>6<>"] + + with self.test_session() as sess: + tokens = string_ops.string_split_v2(strings, sep="<>") + indices, values, shape = sess.run(tokens) + self.assertAllEqual( + indices, [[0, 0], [0, 1], [0, 2], + [1, 0], [1, 1], [1, 2], [1, 3], [1, 4], [1, 5], [1, 6]]) + self.assertAllEqual(values, [b"1", b"2", b"3", + b"", b"", b"4", b"5", b"", b"6", b""]) + self.assertAllEqual(shape, [2, 7]) + + def testSplitV2SimpleSeparator(self): + # Match Python behavior: + # >>> '1,2,3'.split(',') + # ['1', '2', '3'] + # >>> '1,2,,3,'.split(',') + # ['1', '2', '', '3', ''] + strings = ["1,2,3", "4,5,,6,"] + + with self.test_session() as sess: + tokens = string_ops.string_split_v2(strings, sep=',') + indices, values, shape = sess.run(tokens) + self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2], + [1, 0], [1, 1], [1, 2], [1, 3], [1, 4]]) + self.assertAllEqual(values, [b"1", b"2", b"3", + b"4", b"5", b"", b"6", b""]) + self.assertAllEqual(shape, [2, 5]) + + def testSplitV2EmptySeparator(self): + # Match Python behavior: + # >>> '1 2 3'.split() + # ['1', '2', '3'] + #>>> ' 1 2 3 '.split() + #['1', '2', '3'] + strings = ["1 2 3", " 4 5 6 "] + + with self.test_session() as sess: + tokens = string_ops.string_split_v2(strings) + indices, values, shape = sess.run(tokens) + self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2], + [1, 0], [1, 1], [1, 2]]) + self.assertAllEqual(values, [b"1", b"2", b"3", b"4", b"5", b"6"]) + self.assertAllEqual(shape, [2, 3]) + + def testSplitV2SimpleSeparatorMaxSplit(self): + # Match Python behavior: + # >>> '1,2,3'.split(',', maxsplit=1) + # ['1', '2,3'] + # >>> '4,5,,6,'.split(',', maxsplit=1) + # ['4', '5,,6,'] + strings = ["1,2,3", "4,5,,6,"] + + with self.test_session() as sess: + tokens = string_ops.string_split_v2(strings, sep=',', maxsplit=1) + indices, values, shape = sess.run(tokens) + self.assertAllEqual(indices, [[0, 0], [0, 1], + [1, 0], [1, 1]]) + self.assertAllEqual(values, [b"1", b"2,3", b"4", b"5,,6,"]) + self.assertAllEqual(shape, [2, 2]) + + def testSplitV2EmptySeparatorMaxSplit(self): + # Match Python behavior: + # '1 2 3'.split(maxsplit=1) + # ['1', '2 3'] + # >>> " 4 5 6 ".split(maxsplit=1) + # ['4', '5 6 '] + strings = ["1 2 3", " 4 5 6 "] + + with self.test_session() as sess: + tokens = string_ops.string_split_v2(strings, maxsplit=1) + indices, values, shape = sess.run(tokens) + self.assertAllEqual(indices, [[0, 0], [0, 1], + [1, 0], [1, 1]]) + self.assertAllEqual(values, [b"1", b"2 3", b"4", b"5 6 "]) + self.assertAllEqual(shape, [2, 2]) + + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 8129334703..fae63b1132 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -2619,6 +2619,10 @@ reverse.__doc__ = gen_array_ops.reverse_v2.__doc__ # pylint: disable=redefined-builtin @tf_export("reverse_sequence") +@deprecation.deprecated_args( + None, "seq_dim is deprecated, use seq_axis instead", "seq_dim") +@deprecation.deprecated_args( + None, "batch_dim is deprecated, use batch_axis instead", "batch_dim") def reverse_sequence(input, seq_lengths, seq_axis=None, diff --git a/tensorflow/python/ops/gradient_checker.py b/tensorflow/python/ops/gradient_checker.py index 12afcd0b51..94c8d79335 100644 --- a/tensorflow/python/ops/gradient_checker.py +++ b/tensorflow/python/ops/gradient_checker.py @@ -283,10 +283,10 @@ def compute_gradient(x, numbers. For example, if `x` is complex with shape `[m]` and `y` is complex with shape `[n]`, each Jacobian `J` will have shape `[m * 2, n * 2]` with - J[:m, :n] = d(Re y)/d(Re x) - J[:m, n:] = d(Im y)/d(Re x) - J[m:, :n] = d(Re y)/d(Im x) - J[m:, n:] = d(Im y)/d(Im x) + J[::2, ::2] = d(Re y)/d(Re x) + J[::2, 1::2] = d(Im y)/d(Re x) + J[1::2, ::2] = d(Re y)/d(Im x) + J[1::2, 1::2] = d(Im y)/d(Im x) Args: x: a tensor or list of tensors diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index bdcf420980..f27d9224c1 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -28,6 +28,7 @@ from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import functional_ops from tensorflow.python.ops import gen_image_ops from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import math_ops @@ -258,14 +259,14 @@ def random_flip_up_down(image, seed=None): dimension, which is `height`. Otherwise output the image as-is. Args: - image: A 3-D tensor of shape `[height, width, channels].` + image: 4-D Tensor of shape `[batch, height, width, channels]` or + 3-D Tensor of shape `[height, width, channels]`. seed: A Python integer. Used to create a random seed. See @{tf.set_random_seed} for behavior. Returns: - A 3-D tensor of the same type and shape as `image`. - + A tensor of the same type and shape as `image`. Raises: ValueError: if the shape of `image` not supported. """ @@ -280,13 +281,14 @@ def random_flip_left_right(image, seed=None): second dimension, which is `width`. Otherwise output the image as-is. Args: - image: A 3-D tensor of shape `[height, width, channels].` + image: 4-D Tensor of shape `[batch, height, width, channels]` or + 3-D Tensor of shape `[height, width, channels]`. seed: A Python integer. Used to create a random seed. See @{tf.set_random_seed} for behavior. Returns: - A 3-D tensor of the same type and shape as `image`. + A tensor of the same type and shape as `image`. Raises: ValueError: if the shape of `image` not supported. @@ -297,7 +299,8 @@ def random_flip_left_right(image, seed=None): def _random_flip(image, flip_index, seed, scope_name): """Randomly (50% chance) flip an image along axis `flip_index`. Args: - image: A 3-D tensor of shape `[height, width, channels].` + image: 4-D Tensor of shape `[batch, height, width, channels]` or + 3-D Tensor of shape `[height, width, channels]`. flip_index: The dimension along which to flip the image. Vertical: 0, Horizontal: 1 seed: A Python integer. Used to create a random seed. See @@ -306,22 +309,37 @@ def _random_flip(image, flip_index, seed, scope_name): scope_name: Name of the scope in which the ops are added. Returns: - A 3-D tensor of the same type and shape as `image`. + A tensor of the same type and shape as `image`. Raises: ValueError: if the shape of `image` not supported. """ with ops.name_scope(None, scope_name, [image]) as scope: image = ops.convert_to_tensor(image, name='image') - image = _Assert3DImage(image) - uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed) - mirror_cond = math_ops.less(uniform_random, .5) - result = control_flow_ops.cond( - mirror_cond, - lambda: array_ops.reverse(image, [flip_index]), - lambda: image, - name=scope) - return fix_image_flip_shape(image, result) + image = _AssertAtLeast3DImage(image) + shape = image.get_shape() + if shape.ndims == 3 or shape.ndims is None: + uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed) + mirror_cond = math_ops.less(uniform_random, .5) + result = control_flow_ops.cond( + mirror_cond, + lambda: array_ops.reverse(image, [flip_index]), + lambda: image, + name=scope + ) + return fix_image_flip_shape(image, result) + elif shape.ndims == 4: + uniform_random = random_ops.random_uniform( + [array_ops.shape(image)[0]], 0, 1.0, seed=seed + ) + mirror_cond = math_ops.less(uniform_random, .5) + return array_ops.where( + mirror_cond, + image, + functional_ops.map_fn(lambda x: array_ops.reverse(x, [flip_index]), image, dtype=image.dtype) + ) + else: + raise ValueError('\'image\' must have either 3 or 4 dimensions.') @tf_export('image.flip_left_right') @@ -1634,13 +1652,13 @@ def is_jpeg(contents, name=None): @tf_export('image.decode_image') -def decode_image(contents, channels=None, name=None): +def decode_image(contents, channels=None, dtype=dtypes.uint8, name=None): """Convenience function for `decode_bmp`, `decode_gif`, `decode_jpeg`, and `decode_png`. Detects whether an image is a BMP, GIF, JPEG, or PNG, and performs the - appropriate operation to convert the input bytes `string` into a `Tensor` of - type `uint8`. + appropriate operation to convert the input bytes `string` into a `Tensor` + of type `dtype`. Note: `decode_gif` returns a 4-D array `[num_frames, height, width, 3]`, as opposed to `decode_bmp`, `decode_jpeg` and `decode_png`, which return 3-D @@ -1652,10 +1670,11 @@ def decode_image(contents, channels=None, name=None): contents: 0-D `string`. The encoded image bytes. channels: An optional `int`. Defaults to `0`. Number of color channels for the decoded image. + dtype: The desired DType of the returned `Tensor`. name: A name for the operation (optional) Returns: - `Tensor` with type `uint8` with shape `[height, width, num_channels]` for + `Tensor` with type `dtype` and shape `[height, width, num_channels]` for BMP, JPEG, and PNG images and shape `[num_frames, height, width, 3]` for GIF images. @@ -1679,7 +1698,7 @@ def decode_image(contents, channels=None, name=None): channels_msg = 'Channels must be in (None, 0, 3) when decoding BMP images' assert_channels = control_flow_ops.Assert(good_channels, [channels_msg]) with ops.control_dependencies([assert_decode, assert_channels]): - return gen_image_ops.decode_bmp(contents) + return convert_image_dtype(gen_image_ops.decode_bmp(contents), dtype) def _gif(): # Create assert to make sure that channels is not set to 1 @@ -1692,7 +1711,7 @@ def decode_image(contents, channels=None, name=None): channels_msg = 'Channels must be in (None, 0, 3) when decoding GIF images' assert_channels = control_flow_ops.Assert(good_channels, [channels_msg]) with ops.control_dependencies([assert_channels]): - return gen_image_ops.decode_gif(contents) + return convert_image_dtype(gen_image_ops.decode_gif(contents), dtype) def check_gif(): # Create assert op to check that bytes are GIF decodable @@ -1701,7 +1720,11 @@ def decode_image(contents, channels=None, name=None): def _png(): """Decodes a PNG image.""" - return gen_image_ops.decode_png(contents, channels) + return convert_image_dtype( + gen_image_ops.decode_png(contents, channels, + dtype=dtypes.uint8 + if dtype == dtypes.uint8 + else dtypes.uint16), dtype) def check_png(): """Checks if an image is PNG.""" @@ -1717,7 +1740,8 @@ def decode_image(contents, channels=None, name=None): 'images') assert_channels = control_flow_ops.Assert(good_channels, [channels_msg]) with ops.control_dependencies([assert_channels]): - return gen_image_ops.decode_jpeg(contents, channels) + return convert_image_dtype( + gen_image_ops.decode_jpeg(contents, channels), dtype) # Decode normal JPEG images (start with \xff\xd8\xff\xe0) # as well as JPEG images with EXIF data (start with \xff\xd8\xff\xe1). @@ -1878,7 +1902,7 @@ def sample_distorted_bounding_box(image_size, width / height within this range. area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The cropped area of the image must contain a fraction of the - supplied image within in this range. + supplied image within this range. max_attempts: An optional `int`. Defaults to `100`. Number of attempts at generating a cropped region of the image of the specified constraints. After `max_attempts` failures, return the diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py index 45499dcce0..2a6ab26e96 100644 --- a/tensorflow/python/ops/image_ops_test.py +++ b/tensorflow/python/ops/image_ops_test.py @@ -533,6 +533,37 @@ class FlipImageBenchmark(test.Benchmark): iters=benchmark_rounds, wall_time=step_time) + def _benchmarkBatchedRandomFlipLeftRight(self, device, cpu_count): + image_shape = [16, 299, 299, 3] + warmup_rounds = 100 + benchmark_rounds = 1000 + config = config_pb2.ConfigProto() + if cpu_count is not None: + config.inter_op_parallelism_threads = 1 + config.intra_op_parallelism_threads = cpu_count + with session.Session("", graph=ops.Graph(), config=config) as sess: + with ops.device(device): + inputs = variables.Variable( + random_ops.random_uniform(image_shape, dtype=dtypes.float32) * 255, + trainable=False, + dtype=dtypes.float32) + run_op = image_ops.random_flip_left_right(inputs) + sess.run(variables.global_variables_initializer()) + for i in xrange(warmup_rounds + benchmark_rounds): + if i == warmup_rounds: + start = time.time() + sess.run(run_op) + end = time.time() + step_time = (end - start) / benchmark_rounds + tag = device + "_%s" % (cpu_count if cpu_count is not None else "_all") + print("benchmarkBatchedRandomFlipLeftRight_16_299_299_3_%s step_time: " + "%.2f us" % + (tag, step_time * 1e6)) + self.report_benchmark( + name="benchmarkBatchedRandomFlipLeftRight_16_299_299_3_%s" % (tag), + iters=benchmark_rounds, + wall_time=step_time) + def benchmarkFlipLeftRightCpu1(self): self._benchmarkFlipLeftRight("/cpu:0", 1) @@ -551,6 +582,15 @@ class FlipImageBenchmark(test.Benchmark): def benchmarkRandomFlipLeftRightGpu(self): self._benchmarkRandomFlipLeftRight(test.gpu_device_name(), None) + def benchmarkBatchedRandomFlipLeftRightCpu1(self): + self._benchmarkBatchedRandomFlipLeftRight("/cpu:0", 1) + + def benchmarkBatchedRandomFlipLeftRightCpuAll(self): + self._benchmarkBatchedRandomFlipLeftRight("/cpu:0", None) + + def benchmarkBatchedRandomFlipLeftRightGpu(self): + self._benchmarkBatchedRandomFlipLeftRight(test.gpu_device_name(), None) + class AdjustHueBenchmark(test.Benchmark): @@ -987,7 +1027,7 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): with self.test_session(use_gpu=True): x_tf = constant_op.constant(x_np, shape=x_np.shape) - y = image_ops.random_flip_left_right(x_tf) + y = image_ops.random_flip_left_right(x_tf, seed=seed) self.assertTrue(y.op.name.startswith("random_flip_left_right")) count_flipped = 0 @@ -1008,6 +1048,50 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): self.assertGreaterEqual(count_flipped, 20) self.assertGreaterEqual(count_unflipped, 20) + def testRandomFlipLeftRightWithBatch(self): + batch_size = 16 + seed = 42 + + # create single item of test data + x_np_raw = np.array( + [[1, 2, 3], [1, 2, 3]], dtype=np.uint8 + ).reshape([1, 2, 3, 1]) + y_np_raw = np.array( + [[3, 2, 1], [3, 2, 1]], dtype=np.uint8 + ).reshape([1, 2, 3, 1]) + + # create batched test data + x_np = np.vstack([x_np_raw for _ in range(batch_size)]) + y_np = np.vstack([y_np_raw for _ in range(batch_size)]) + + with self.test_session(use_gpu=True): + x_tf = constant_op.constant(x_np, shape=x_np.shape) + y = image_ops.random_flip_left_right(x_tf, seed=seed) + self.assertTrue(y.op.name.startswith("random_flip_left_right")) + + count_flipped = 0 + count_unflipped = 0 + for _ in range(100): + y_tf = y.eval() + + # check every element of the batch + for i in range(batch_size): + if y_tf[i][0][0] == 1: + self.assertAllEqual(y_tf[i], x_np[i]) + count_unflipped += 1 + else: + self.assertAllEqual(y_tf[i], y_np[i]) + count_flipped += 1 + + # 100 trials, each containing batch_size elements + # Mean: 50 * batch_size + # Std Dev: ~5 * sqrt(batch_size) + # Six Sigma: 50 * batch_size - (5 * 6 * sqrt(batch_size)) + # = 50 * batch_size - 30 * sqrt(batch_size) = 800 - 30 * 4 = 680 + six_sigma = 50 * batch_size - 30 * np.sqrt(batch_size) + self.assertGreaterEqual(count_flipped, six_sigma) + self.assertGreaterEqual(count_unflipped, six_sigma) + def testInvolutionUpDown(self): x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1]) @@ -1057,9 +1141,11 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1]) y_np = np.array([[4, 5, 6], [1, 2, 3]], dtype=np.uint8).reshape([2, 3, 1]) + seed = 42 + with self.test_session(use_gpu=True): x_tf = constant_op.constant(x_np, shape=x_np.shape) - y = image_ops.random_flip_up_down(x_tf, seed=42) + y = image_ops.random_flip_up_down(x_tf, seed=seed) self.assertTrue(y.op.name.startswith("random_flip_up_down")) count_flipped = 0 count_unflipped = 0 @@ -1079,6 +1165,50 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): self.assertGreaterEqual(count_flipped, 20) self.assertGreaterEqual(count_unflipped, 20) + def testRandomFlipUpDownWithBatch(self): + batch_size = 16 + seed = 42 + + # create single item of test data + x_np_raw = np.array( + [[1, 2, 3], [4, 5, 6]], dtype=np.uint8 + ).reshape([1, 2, 3, 1]) + y_np_raw = np.array( + [[4, 5, 6], [1, 2, 3]], dtype=np.uint8 + ).reshape([1, 2, 3, 1]) + + # create batched test data + x_np = np.vstack([x_np_raw for _ in range(batch_size)]) + y_np = np.vstack([y_np_raw for _ in range(batch_size)]) + + with self.test_session(use_gpu=True): + x_tf = constant_op.constant(x_np, shape=x_np.shape) + y = image_ops.random_flip_up_down(x_tf, seed=seed) + self.assertTrue(y.op.name.startswith("random_flip_up_down")) + + count_flipped = 0 + count_unflipped = 0 + for _ in range(100): + y_tf = y.eval() + + # check every element of the batch + for i in range(batch_size): + if y_tf[i][0][0] == 1: + self.assertAllEqual(y_tf[i], x_np[i]) + count_unflipped += 1 + else: + self.assertAllEqual(y_tf[i], y_np[i]) + count_flipped += 1 + + # 100 trials, each containing batch_size elements + # Mean: 50 * batch_size + # Std Dev: ~5 * sqrt(batch_size) + # Six Sigma: 50 * batch_size - (5 * 6 * sqrt(batch_size)) + # = 50 * batch_size - 30 * sqrt(batch_size) = 800 - 30 * 4 = 680 + six_sigma = 50 * batch_size - 30 * np.sqrt(batch_size) + self.assertGreaterEqual(count_flipped, six_sigma) + self.assertGreaterEqual(count_unflipped, six_sigma) + def testInvolutionTranspose(self): x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1]) @@ -1156,6 +1286,7 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): #Ops that support 4D input for op in [ image_ops.flip_left_right, image_ops.flip_up_down, + image_ops.random_flip_left_right, image_ops.random_flip_up_down, image_ops.transpose_image, image_ops.rot90 ]: transformed_unknown_dims_4 = op(p_unknown_dims_4) @@ -1166,14 +1297,6 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): "must be at least three-dimensional"): op(p_wrong_rank) - for op in [ - image_ops.random_flip_left_right, - image_ops.random_flip_up_down, - ]: - with self.assertRaisesRegexp(ValueError, "must be three-dimensional"): - op(p_wrong_rank) - - def testRot90GroupOrder(self): image = np.arange(24, dtype=np.uint8).reshape([2, 4, 3]) with self.test_session(use_gpu=True): @@ -1208,41 +1331,6 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): y_np = np.rot90(image, k=k, axes=(1, 2)) self.assertAllEqual(y_np, y_tf.eval({k_placeholder: k})) -class RandomFlipTest(test_util.TensorFlowTestCase): - - def testRandomLeftRight(self): - x_np = np.array([0, 1], dtype=np.uint8).reshape([1, 2, 1]) - num_iterations = 500 - - hist = [0, 0] - with self.test_session(use_gpu=True): - x_tf = constant_op.constant(x_np, shape=x_np.shape) - y = image_ops.random_flip_left_right(x_tf) - for _ in xrange(num_iterations): - y_np = y.eval().flatten()[0] - hist[y_np] += 1 - - # Ensure that each entry is observed within 4 standard deviations. - four_stddev = 4.0 * np.sqrt(num_iterations / 2.0) - self.assertAllClose(hist, [num_iterations / 2.0] * 2, atol=four_stddev) - - def testRandomUpDown(self): - x_np = np.array([0, 1], dtype=np.uint8).reshape([2, 1, 1]) - num_iterations = 500 - - hist = [0, 0] - with self.test_session(use_gpu=True): - x_tf = constant_op.constant(x_np, shape=x_np.shape) - y = image_ops.random_flip_up_down(x_tf) - for _ in xrange(num_iterations): - y_np = y.eval().flatten()[0] - hist[y_np] += 1 - - # Ensure that each entry is observed within 4 standard deviations. - four_stddev = 4.0 * np.sqrt(num_iterations / 2.0) - self.assertAllClose(hist, [num_iterations / 2.0] * 2, atol=four_stddev) - - class AdjustContrastTest(test_util.TensorFlowTestCase): def _testContrast(self, x_np, y_np, contrast_factor): @@ -3880,5 +3968,88 @@ class SobelEdgesTest(test_util.TensorFlowTestCase): self.assertAllClose(expected_batch, actual_sobel) +class DecodeImageTest(test_util.TensorFlowTestCase): + + def testJpegUint16(self): + with self.test_session(use_gpu=True) as sess: + base = "tensorflow/core/lib/jpeg/testdata" + jpeg0 = io_ops.read_file(os.path.join(base, "jpeg_merge_test1.jpg")) + image0 = image_ops.decode_image(jpeg0, dtype=dtypes.uint16) + image1 = image_ops.convert_image_dtype(image_ops.decode_jpeg(jpeg0), + dtypes.uint16) + image0, image1 = sess.run([image0, image1]) + self.assertAllEqual(image0, image1) + + def testPngUint16(self): + with self.test_session(use_gpu=True) as sess: + base = "tensorflow/core/lib/png/testdata" + png0 = io_ops.read_file(os.path.join(base, "lena_rgba.png")) + image0 = image_ops.decode_image(png0, dtype=dtypes.uint16) + image1 = image_ops.convert_image_dtype( + image_ops.decode_png(png0, dtype=dtypes.uint16), dtypes.uint16) + image0, image1 = sess.run([image0, image1]) + self.assertAllEqual(image0, image1) + + def testGifUint16(self): + with self.test_session(use_gpu=True) as sess: + base = "tensorflow/core/lib/gif/testdata" + gif0 = io_ops.read_file(os.path.join(base, "scan.gif")) + image0 = image_ops.decode_image(gif0, dtype=dtypes.uint16) + image1 = image_ops.convert_image_dtype(image_ops.decode_gif(gif0), + dtypes.uint16) + image0, image1 = sess.run([image0, image1]) + self.assertAllEqual(image0, image1) + + def testBmpUint16(self): + with self.test_session(use_gpu=True) as sess: + base = "tensorflow/core/lib/bmp/testdata" + bmp0 = io_ops.read_file(os.path.join(base, "lena.bmp")) + image0 = image_ops.decode_image(bmp0, dtype=dtypes.uint16) + image1 = image_ops.convert_image_dtype(image_ops.decode_bmp(bmp0), + dtypes.uint16) + image0, image1 = sess.run([image0, image1]) + self.assertAllEqual(image0, image1) + + def testJpegFloat32(self): + with self.test_session(use_gpu=True) as sess: + base = "tensorflow/core/lib/jpeg/testdata" + jpeg0 = io_ops.read_file(os.path.join(base, "jpeg_merge_test1.jpg")) + image0 = image_ops.decode_image(jpeg0, dtype=dtypes.float32) + image1 = image_ops.convert_image_dtype(image_ops.decode_jpeg(jpeg0), + dtypes.float32) + image0, image1 = sess.run([image0, image1]) + self.assertAllEqual(image0, image1) + + def testPngFloat32(self): + with self.test_session(use_gpu=True) as sess: + base = "tensorflow/core/lib/png/testdata" + png0 = io_ops.read_file(os.path.join(base, "lena_rgba.png")) + image0 = image_ops.decode_image(png0, dtype=dtypes.float32) + image1 = image_ops.convert_image_dtype( + image_ops.decode_png(png0, dtype=dtypes.uint16), dtypes.float32) + image0, image1 = sess.run([image0, image1]) + self.assertAllEqual(image0, image1) + + def testGifFloat32(self): + with self.test_session(use_gpu=True) as sess: + base = "tensorflow/core/lib/gif/testdata" + gif0 = io_ops.read_file(os.path.join(base, "scan.gif")) + image0 = image_ops.decode_image(gif0, dtype=dtypes.float32) + image1 = image_ops.convert_image_dtype(image_ops.decode_gif(gif0), + dtypes.float32) + image0, image1 = sess.run([image0, image1]) + self.assertAllEqual(image0, image1) + + def testBmpFloat32(self): + with self.test_session(use_gpu=True) as sess: + base = "tensorflow/core/lib/bmp/testdata" + bmp0 = io_ops.read_file(os.path.join(base, "lena.bmp")) + image0 = image_ops.decode_image(bmp0, dtype=dtypes.float32) + image1 = image_ops.convert_image_dtype(image_ops.decode_bmp(bmp0), + dtypes.float32) + image0, image1 = sess.run([image0, image1]) + self.assertAllEqual(image0, image1) + + if __name__ == "__main__": googletest.main() diff --git a/tensorflow/python/ops/init_ops.py b/tensorflow/python/ops/init_ops.py index 2df230d470..724fcc39cd 100644 --- a/tensorflow/python/ops/init_ops.py +++ b/tensorflow/python/ops/init_ops.py @@ -467,7 +467,8 @@ class VarianceScaling(Initializer): else: scale /= max(1., (fan_in + fan_out) / 2.) if self.distribution == "normal": - stddev = math.sqrt(scale) + # constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.) + stddev = math.sqrt(scale) / .87962566103423978 return random_ops.truncated_normal( shape, 0.0, stddev, dtype, seed=self.seed) else: diff --git a/tensorflow/python/ops/logging_ops.py b/tensorflow/python/ops/logging_ops.py index 222b8ebc9d..8276047cb6 100644 --- a/tensorflow/python/ops/logging_ops.py +++ b/tensorflow/python/ops/logging_ops.py @@ -35,8 +35,9 @@ from tensorflow.python.util.tf_export import tf_export # Assert and Print are special symbols in python, so we must -# use an upper-case version of them. -@tf_export("Print") +# have an upper-case version of them. For users with Python 3 or Python 2.7 +# with `from __future__ import print_function`, we also allow lowercase. +@tf_export("Print", "print") def Print(input_, data, message=None, first_n=None, summarize=None, name=None): """Prints a list of tensors. diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index e40481f3a7..466d0dadc8 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -125,8 +125,8 @@ def abs(x, name=None): # pylint: disable=redefined-builtin ``` Args: - x: A `Tensor` or `SparseTensor` of type `float32`, `float64`, `int32`, - `int64`, `complex64` or `complex128`. + x: A `Tensor` or `SparseTensor` of type `float16`, `float32`, `float64`, + `int32`, `int64`, `complex64` or `complex128`. name: A name for the operation (optional). Returns: @@ -430,10 +430,10 @@ def pow(x, y, name=None): # pylint: disable=redefined-builtin ``` Args: - x: A `Tensor` of type `float32`, `float64`, `int32`, `int64`, `complex64`, - or `complex128`. - y: A `Tensor` of type `float32`, `float64`, `int32`, `int64`, `complex64`, - or `complex128`. + x: A `Tensor` of type `float16`, `float32`, `float64`, `int32`, `int64`, + `complex64`, or `complex128`. + y: A `Tensor` of type `float16`, `float32`, `float64`, `int32`, `int64`, + `complex64`, or `complex128`. name: A name for the operation (optional). Returns: @@ -600,7 +600,7 @@ def round(x, name=None): # pylint: disable=redefined-builtin ``` Args: - x: A `Tensor` of type `float32` or `float64`. + x: A `Tensor` of type `float16`, `float32`, `float64`, `int32`, or `int64`. name: A name for the operation (optional). Returns: @@ -1257,7 +1257,7 @@ def reduce_sum(input_tensor, entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. - If `axis` has no entries, all dimensions are reduced, and a + If `axis` is None, all dimensions are reduced, and a tensor with a single element is returned. For example: @@ -1397,7 +1397,7 @@ def reduce_mean(input_tensor, entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. - If `axis` has no entries, all dimensions are reduced, and a + If `axis` is None, all dimensions are reduced, and a tensor with a single element is returned. For example: @@ -1469,7 +1469,7 @@ def reduce_prod(input_tensor, entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. - If `axis` has no entries, all dimensions are reduced, and a + If `axis` is None, all dimensions are reduced, and a tensor with a single element is returned. Args: @@ -1519,7 +1519,7 @@ def reduce_min(input_tensor, entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. - If `axis` has no entries, all dimensions are reduced, and a + If `axis` is None, all dimensions are reduced, and a tensor with a single element is returned. Args: @@ -1568,7 +1568,7 @@ def reduce_max(input_tensor, entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. - If `axis` has no entries, all dimensions are reduced, and a + If `axis` is None, all dimensions are reduced, and a tensor with a single element is returned. Args: @@ -1617,7 +1617,7 @@ def reduce_all(input_tensor, entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. - If `axis` has no entries, all dimensions are reduced, and a + If `axis` is None, all dimensions are reduced, and a tensor with a single element is returned. For example: @@ -1675,7 +1675,7 @@ def reduce_any(input_tensor, entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. - If `axis` has no entries, all dimensions are reduced, and a + If `axis` is None, all dimensions are reduced, and a tensor with a single element is returned. For example: diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py index 783d485892..f47f38e29e 100644 --- a/tensorflow/python/ops/nn_impl.py +++ b/tensorflow/python/ops/nn_impl.py @@ -621,7 +621,7 @@ def normalize_moments(counts, mean_ss, variance_ss, shift, name=None): """Calculate the mean and variance of based on the sufficient statistics. Args: - counts: A `Tensor` containing a the total count of the data (one value). + counts: A `Tensor` containing the total count of the data (one value). mean_ss: A `Tensor` containing the mean sufficient statistics: the (possibly shifted) sum of the elements to average over. variance_ss: A `Tensor` containing the variance sufficient statistics: the @@ -689,6 +689,9 @@ def moments( # Compute true mean while keeping the dims for proper broadcasting. mean = math_ops.reduce_mean(y, axes, keepdims=True, name="mean") # sample variance, not unbiased variance + # Note: stop_gradient does not change the gradient that gets + # backpropagated to the mean from the variance calculation, + # because that gradient is zero variance = math_ops.reduce_mean( math_ops.squared_difference(y, array_ops.stop_gradient(mean)), axes, diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index a0b55eb077..0c2f5b06c4 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -1596,12 +1596,12 @@ def leaky_relu(features, alpha=0.2, name=None): Returns: The activation value. """ - with ops.name_scope(name, "LeakyRelu", [features, alpha]): + with ops.name_scope(name, "LeakyRelu", [features, alpha]) as name: features = ops.convert_to_tensor(features, name="features") if features.dtype.is_integer: features = math_ops.to_float(features) alpha = ops.convert_to_tensor(alpha, dtype=features.dtype, name="alpha") - return math_ops.maximum(alpha * features, features) + return math_ops.maximum(alpha * features, features, name=name) def _flatten_outer_dims(logits): diff --git a/tensorflow/python/ops/nn_test.py b/tensorflow/python/ops/nn_test.py index 46a5f4fae6..035b4735af 100644 --- a/tensorflow/python/ops/nn_test.py +++ b/tensorflow/python/ops/nn_test.py @@ -962,6 +962,16 @@ class LeakyReluTest(test_lib.TestCase): self.assertAllClose( outputs, [-0.4, -0.2, 0.0, 1.0, 2.0], rtol=tol, atol=tol) + def testName(self): + np_values = np.array([-2, -1, 0, 1, 2], dtype=np.float64) + outputs_with_name_set = nn_ops.leaky_relu( + constant_op.constant(np_values), + name='test_relu_op') + self.assertEqual(outputs_with_name_set.name, 'test_relu_op:0') + outputs_without_name_set = nn_ops.leaky_relu( + constant_op.constant(np_values)) + self.assertEqual(outputs_without_name_set.name, 'LeakyRelu:0') + class SwishTest(test_lib.TestCase): diff --git a/tensorflow/python/ops/script_ops.py b/tensorflow/python/ops/script_ops.py index f8676ccb5f..219562de5d 100644 --- a/tensorflow/python/ops/script_ops.py +++ b/tensorflow/python/ops/script_ops.py @@ -23,6 +23,7 @@ import threading # Used by py_util.cc to get tracebacks. import traceback # pylint: disable=unused-import +import weakref import numpy as np import six @@ -129,11 +130,14 @@ class FuncRegistry(object): def __init__(self): self._lock = threading.Lock() self._unique_id = 0 # GUARDED_BY(self._lock) - self._funcs = {} + # Only store weakrefs to the funtions. The strong reference is stored in + # the graph. + self._funcs = weakref.WeakValueDictionary() def insert(self, func): """Registers `func` and returns a unique token for this entry.""" token = self._next_unique_token() + # Store a weakref to the function self._funcs[token] = func return token @@ -186,7 +190,7 @@ class FuncRegistry(object): Raises: ValueError: if no function is registered for `token`. """ - func = self._funcs[token] + func = self._funcs.get(token, None) if func is None: raise ValueError("callback %s is not found" % token) if isinstance(func, EagerFunc): @@ -228,19 +232,6 @@ _py_funcs = FuncRegistry() pywrap_tensorflow.InitializePyTrampoline(_py_funcs) -class CleanupFunc(object): - """A helper class to remove a registered function from _py_funcs.""" - - def __init__(self, token): - self._token = token - - def __del__(self): - if _py_funcs is not None: - # If _py_funcs is None, the program is most likely in shutdown, and the - # _py_funcs object has been destroyed already. - _py_funcs.remove(self._token) - - def _internal_py_func(func, inp, Tout, @@ -270,17 +261,15 @@ def _internal_py_func(func, # bound to that of the outer graph instead. graph = graph._outer_graph - cleanup = CleanupFunc(token) - # TODO(zhifengc): Consider adding a Graph method to collect # `cleanup` objects in one of its member. - if not hasattr(graph, "_cleanup_py_funcs_used_in_graph"): - graph._cleanup_py_funcs_used_in_graph = [] + if not hasattr(graph, "_py_funcs_used_in_graph"): + graph._py_funcs_used_in_graph = [] - # When `graph` is destroyed, elements in _cleanup_py_funcs_used_in_graph - # will be destroyed and their __del__ will remove the 'token' from - # the funcs registry. - graph._cleanup_py_funcs_used_in_graph.append(cleanup) + # Store a reference to the function in the graph to ensure it stays alive + # as long as the graph lives. When the graph is destroyed, the function + # is left to the garbage collector for destruction as well. + graph._py_funcs_used_in_graph.append(func) # pylint: enable=protected-access if eager: diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py index 0130233746..c3b16a7bd5 100644 --- a/tensorflow/python/ops/sparse_ops.py +++ b/tensorflow/python/ops/sparse_ops.py @@ -84,6 +84,8 @@ def _convert_to_sparse_tensors(sp_inputs): # pylint: disable=protected-access @tf_export("sparse_concat") +@deprecation.deprecated_args( + None, "concat_dim is deprecated, use axis instead", "concat_dim") def sparse_concat(axis, sp_inputs, name=None, @@ -597,6 +599,8 @@ class KeywordRequired(object): @tf_export("sparse_split") +@deprecation.deprecated_args( + None, "split_dim is deprecated, use axis instead", "split_dim") def sparse_split(keyword_required=KeywordRequired(), sp_input=None, num_split=None, diff --git a/tensorflow/python/ops/string_ops.py b/tensorflow/python/ops/string_ops.py index ae79c01949..0280c89c10 100644 --- a/tensorflow/python/ops/string_ops.py +++ b/tensorflow/python/ops/string_ops.py @@ -91,6 +91,59 @@ def string_split(source, delimiter=" ", skip_empty=True): # pylint: disable=inv shape.set_shape([2]) return sparse_tensor.SparseTensor(indices, values, shape) +@tf_export("strings.split") +def string_split_v2(source, sep=None, maxsplit=-1): + """Split elements of `source` based on `sep` into a `SparseTensor`. + + Let N be the size of source (typically N will be the batch size). Split each + element of `source` based on `sep` and return a `SparseTensor` + containing the split tokens. Empty tokens are ignored. + + For example, N = 2, source[0] is 'hello world' and source[1] is 'a b c', + then the output will be + + st.indices = [0, 0; + 0, 1; + 1, 0; + 1, 1; + 1, 2] + st.shape = [2, 3] + st.values = ['hello', 'world', 'a', 'b', 'c'] + + If `sep` is given, consecutive delimiters are not grouped together and are + deemed to delimit empty strings. For example, source of `"1<>2<><>3"` and + sep of `"<>"` returns `["1", "2", "", "3"]`. If `sep` is None or an empty + string, consecutive whitespace are regarded as a single separator, and the + result will contain no empty strings at the startor end if the string has + leading or trailing whitespace. + + Note that the above mentioned behavior matches python's str.split. + + Args: + source: `1-D` string `Tensor`, the strings to split. + sep: `0-D` string `Tensor`, the delimiter character. + maxsplit: An `int`. If `maxsplit > 0`, limit of the split of the result. + + Raises: + ValueError: If sep is not a string. + + Returns: + A `SparseTensor` of rank `2`, the strings split according to the delimiter. + The first column of the indices corresponds to the row in `source` and the + second column corresponds to the index of the split component in this row. + """ + if sep is None: + sep = '' + sep = ops.convert_to_tensor(sep, dtype=dtypes.string) + source = ops.convert_to_tensor(source, dtype=dtypes.string) + + indices, values, shape = gen_string_ops.string_split_v2( + source, sep=sep, maxsplit=maxsplit) + indices.set_shape([None, 2]) + values.set_shape([None]) + shape.set_shape([2]) + return sparse_tensor.SparseTensor(indices, values, shape) + def _reduce_join_reduction_dims(x, axis, reduction_indices): """Returns range(rank(x) - 1, 0, -1) if reduction_indices is None.""" diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py index f49e2d314d..47414c28af 100644 --- a/tensorflow/python/ops/variable_scope.py +++ b/tensorflow/python/ops/variable_scope.py @@ -1786,6 +1786,23 @@ class variable_scope(object): assert v.name == "foo/bar/v:0" ``` + Simple example of how to reenter a premade variable scope safely: + + ```python + with tf.variable_scope("foo") as vs: + pass + + # Re-enter the variable scope. + with tf.variable_scope(vs, + auxiliary_name_scope=False) as vs1: + # Restore the original name_scope. + with tf.name_scope(vs1.original_name_scope): + v = tf.get_variable("v", [1]) + assert v.name == "foo/v:0" + c = tf.constant([1], name="c") + assert c.name == "foo/c:0" + ``` + Basic example of sharing a variable AUTO_REUSE: ```python @@ -1924,7 +1941,9 @@ class variable_scope(object): (which must have the same shape). Constraints are not safe to use when doing asynchronous distributed training. auxiliary_name_scope: If `True`, we create an auxiliary name scope with - the scope. If `False`, we don't touch name scope. + the scope. If `False`, we don't create it. Note that the argument is + not inherited, and it only takes effect for once when creating. You + should only use it for re-entering a premade variable scope. Returns: A scope that can be captured and reused. diff --git a/tensorflow/python/tools/import_pb_to_tensorboard.py b/tensorflow/python/tools/import_pb_to_tensorboard.py old mode 100755 new mode 100644 diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 1f9fbad0b4..c3bc9ccd45 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -1723,7 +1723,7 @@ def tf_py_build_info_genrule(): name="py_build_info_gen", outs=["platform/build_info.py"], cmd= - "$(location //tensorflow/tools/build_info:gen_build_info.py) --raw_generate \"$@\" --build_config " + if_cuda("cuda", "cpu"), + "$(location //tensorflow/tools/build_info:gen_build_info.py) --raw_generate \"$@\" --build_config " + if_cuda("cuda", "cpu"), local=1, tools=[clean_dep("//tensorflow/tools/build_info:gen_build_info.py")],) diff --git a/tensorflow/tools/api/generator/create_python_api.py b/tensorflow/tools/api/generator/create_python_api.py index bca9fa49eb..671b7e387e 100644 --- a/tensorflow/tools/api/generator/create_python_api.py +++ b/tensorflow/tools/api/generator/create_python_api.py @@ -41,7 +41,11 @@ _GENERATED_FILE_HEADER = """# This file is MACHINE GENERATED! Do not edit. # Generated by: tensorflow/tools/api/generator/create_python_api.py script. \"\"\"%s \"\"\" + +from __future__ import print_function + """ +_GENERATED_FILE_FOOTER = "\n\ndel print_function\n" class SymbolExposedTwiceError(Exception): @@ -149,6 +153,7 @@ class _ModuleInitCodeBuilder(object): _names_with_underscore = [%s] __all__ = [_s for _s in dir() if not _s.startswith('_')] __all__.extend([_s for _s in _names_with_underscore]) +__all__.remove('print_function') ''' % underscore_names_str return module_text_map @@ -333,7 +338,8 @@ def create_api_files( if module or not root_init_template: contents = ( _GENERATED_FILE_HEADER % - get_module_docstring(module, package, api_name) + text) + get_module_docstring(module, package, api_name) + + text + _GENERATED_FILE_FOOTER) else: # Read base init file with open(root_init_template, 'r') as root_init_template_file: diff --git a/tensorflow/tools/api/golden/tensorflow.image.pbtxt b/tensorflow/tools/api/golden/tensorflow.image.pbtxt index 5bb3b3c444..10171b3d60 100644 --- a/tensorflow/tools/api/golden/tensorflow.image.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.image.pbtxt @@ -58,7 +58,7 @@ tf_module { } member_method { name: "decode_image" - argspec: "args=[\'contents\', \'channels\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'contents\', \'channels\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \"\", \'None\'], " } member_method { name: "decode_jpeg" diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt index dc2bd40096..3051c4437e 100644 --- a/tensorflow/tools/api/golden/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.pbtxt @@ -1532,6 +1532,10 @@ tf_module { name: "pow" argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "print" + argspec: "args=[\'input_\', \'data\', \'message\', \'first_n\', \'summarize\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], " + } member_method { name: "py_func" argspec: "args=[\'func\', \'inp\', \'Tout\', \'stateful\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], " diff --git a/tensorflow/tools/api/golden/tensorflow.strings.pbtxt b/tensorflow/tools/api/golden/tensorflow.strings.pbtxt index a3fbe95bba..b641c39feb 100644 --- a/tensorflow/tools/api/golden/tensorflow.strings.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.strings.pbtxt @@ -4,4 +4,8 @@ tf_module { name: "regex_full_match" argspec: "args=[\'input\', \'pattern\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "split" + argspec: "args=[\'source\', \'sep\', \'maxsplit\'], varargs=None, keywords=None, defaults=[\'None\', \'-1\'], " + } } diff --git a/tensorflow/tools/ci_build/builds/pip.sh b/tensorflow/tools/ci_build/builds/pip.sh index 5fa75e1d61..883bb93647 100755 --- a/tensorflow/tools/ci_build/builds/pip.sh +++ b/tensorflow/tools/ci_build/builds/pip.sh @@ -322,6 +322,10 @@ create_activate_virtualenv_and_install_tensorflow() { pip install -v ${PIP_FLAGS} ${WHL_PATH} || \ die "pip install (forcing to reinstall tensorflow) FAILED" echo "Successfully installed pip package ${TF_WHEEL_PATH}" + + # Force downgrade setuptools. + pip install --upgrade setuptools==39.1.0 + } ################################################################################ diff --git a/tensorflow/tools/ci_build/builds/with_the_same_user b/tensorflow/tools/ci_build/builds/with_the_same_user index d4bf546d40..b216e3549f 100755 --- a/tensorflow/tools/ci_build/builds/with_the_same_user +++ b/tensorflow/tools/ci_build/builds/with_the_same_user @@ -40,7 +40,7 @@ if [ -n "${CI_BUILD_USER_FORCE_BADNAME}" ]; then ADDUSER_OPTS="--force-badname" fi -getent group "${CI_BUILD_GID}" || addgroup --gid "${CI_BUILD_GID}" "${CI_BUILD_GROUP}" +getent group "${CI_BUILD_GID}" || addgroup ${ADDUSER_OPTS} --gid "${CI_BUILD_GID}" "${CI_BUILD_GROUP}" getent passwd "${CI_BUILD_UID}" || adduser ${ADDUSER_OPTS} \ --gid "${CI_BUILD_GID}" --uid "${CI_BUILD_UID}" \ --gecos "${CI_BUILD_USER} (generated by with_the_same_user script)" \ diff --git a/tensorflow/tools/ci_build/ci_build.sh b/tensorflow/tools/ci_build/ci_build.sh index 072dd6ab99..1f0fd0387a 100755 --- a/tensorflow/tools/ci_build/ci_build.sh +++ b/tensorflow/tools/ci_build/ci_build.sh @@ -134,6 +134,12 @@ if [[ $? != "0" ]]; then die "ERROR: docker build failed. Dockerfile is at ${DOCKERFILE_PATH}" fi +# If caller wants the with_the_same_user script to allow bad usernames, +# pass the var to the docker environment +if [ -n "${CI_BUILD_USER_FORCE_BADNAME}" ]; then + CI_BUILD_USER_FORCE_BADNAME_ENV="-e CI_BUILD_USER_FORCE_BADNAME=yes" +fi + # Run the command inside the container. echo "Running '${COMMAND[*]}' inside ${DOCKER_IMG_NAME}..." mkdir -p ${WORKSPACE}/bazel-ci_build-cache @@ -148,6 +154,7 @@ ${DOCKER_BINARY} run --rm --pid=host \ -e "CI_BUILD_GROUP=$(id -g -n)" \ -e "CI_BUILD_GID=$(id -g)" \ -e "CI_TENSORFLOW_SUBMODULE_PATH=${CI_TENSORFLOW_SUBMODULE_PATH}" \ + ${CI_BUILD_USER_FORCE_BADNAME_ENV} \ -v ${WORKSPACE}:/workspace \ -w /workspace \ ${GPU_EXTRA_PARAMS} \ diff --git a/tensorflow/tools/ci_build/copy_binary.py b/tensorflow/tools/ci_build/copy_binary.py index 420d390d2b..148526492d 100755 --- a/tensorflow/tools/ci_build/copy_binary.py +++ b/tensorflow/tools/ci_build/copy_binary.py @@ -32,7 +32,8 @@ import shutil import tempfile import zipfile -TF_NIGHTLY_REGEX = r"(.+)tf_nightly(|_gpu)-(\d\.\d\.\d.dev[\d]{0,8})-(.+)\.whl" +TF_NIGHTLY_REGEX = (r"(.+)tf_nightly(|_gpu)-(\d\.[\d]{1,2}" + "\.\d.dev[\d]{0,8})-(.+)\.whl") BINARY_STRING_TEMPLATE = "%s-%s-%s.whl" diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh index 60290df833..88f1d04193 100755 --- a/tensorflow/tools/ci_build/install/install_pip_packages.sh +++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh @@ -115,3 +115,7 @@ pip2 install keras_applications==1.0.2 pip3 install keras_applications==1.0.2 pip2 install keras_preprocessing==1.0.1 pip3 install keras_preprocessing==1.0.1 + +# Install last working version of setuptools. +pip2 install --upgrade setuptools==39.1.0 +pip3 install --upgrade setuptools==39.1.0 diff --git a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh index edb9d4b929..acd69ef346 100755 --- a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh +++ b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh @@ -39,7 +39,6 @@ if [[ -z $pip35_version ]]; then fi set -e -pip3.5 install --upgrade setuptools pip3.5 install --upgrade pip pip3.5 install --upgrade virtualenv @@ -86,4 +85,7 @@ pip3.5 install --upgrade termcolor pip3.5 install keras_applications==1.0.2 pip3.5 install keras_preprocessing==1.0.1 +# Install last working version of setuptools. +pip3.5 install --upgrade setuptools==39.1.0 + # LINT.ThenChange(//tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh) diff --git a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh index 5635977731..323b30f48e 100755 --- a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh +++ b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh @@ -49,7 +49,6 @@ cd Python-3.6.1 make altinstall ln -s /usr/local/bin/pip3.6 /usr/local/bin/pip3 -pip3 install --upgrade setuptools pip3 install --upgrade pip pip3 install --upgrade virtualenv @@ -101,4 +100,8 @@ pip3 install --upgrade termcolor # Keras pip3.5 install keras_applications==1.0.2 pip3.5 install keras_preprocessing==1.0.1 + +# Install last working version of setuptools. +pip3 install --upgrade setuptools==39.1.0 + # LINT.ThenChange(//tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh) diff --git a/tensorflow/tools/ci_build/linux/mkl/basic-mkl-test.sh b/tensorflow/tools/ci_build/linux/mkl/basic-mkl-test.sh new file mode 100755 index 0000000000..10a09a415a --- /dev/null +++ b/tensorflow/tools/ci_build/linux/mkl/basic-mkl-test.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# +# Usage: basic_mkl_test.sh + +# Helper function to traverse directories up until given file is found. +function upsearch () { + test / == "$PWD" && return || \ + test -e "$1" && echo "$PWD" && return || \ + cd .. && upsearch "$1" +} + +# Set up WORKSPACE. +WORKSPACE="${WORKSPACE:-$(upsearch WORKSPACE)}" + +BUILD_TAG=mkl-ci-test CI_BUILD_USER_FORCE_BADNAME=yes ${WORKSPACE}/tensorflow/tools/ci_build/ci_build.sh cpu tensorflow/tools/ci_build/linux/cpu/run_mkl.sh diff --git a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh index 1bd1852ffc..b8bce57c87 100755 --- a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh +++ b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh @@ -79,6 +79,7 @@ if [[ $1 == "PI_ONE" ]]; then --linkopt=-L${OPENBLAS_INSTALL_PATH}/lib/ --linkopt=-l:libopenblas.a" echo "Building for the Pi One/Zero, with no NEON support" + WHEEL_ARCH=linux_armv6l else PI_COPTS='--copt=-march=armv7-a --copt=-mfpu=neon-vfpv4 --copt=-std=gnu11 --copt=-DS_IREAD=S_IRUSR --copt=-DS_IWRITE=S_IWUSR @@ -86,6 +87,7 @@ else --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8' + WHEEL_ARCH=linux_armv7l echo "Building for the Pi Two/Three, with NEON acceleration" fi @@ -100,6 +102,8 @@ bazel build -c opt ${PI_COPTS} \ --copt=-fomit-frame-pointer --cpu=armeabi \ --crosstool_top=@local_config_arm_compiler//:toolchain \ --verbose_failures \ + //tensorflow:libtensorflow.so \ + //tensorflow:libtensorflow_framework.so \ //tensorflow/tools/benchmark:benchmark_model \ //tensorflow/tools/pip_package:build_pip_package @@ -112,10 +116,12 @@ BDIST_OPTS="--universal" \ bazel-bin/tensorflow/tools/pip_package/build_pip_package "${OUTDIR}" OLD_FN=$(ls "${OUTDIR}" | grep -m 1 \.whl) -SUB='s/tensorflow-([^-]+)-([^-]+)-.*/tensorflow-\1-\2-none-any.whl/; print' +SUB='s/tensorflow-([^-]+)-([^-]+)-.*/tensorflow-\1-\2-none-'${WHEEL_ARCH}'.whl/; print' NEW_FN=$(echo "${OLD_FN}" | perl -ne "${SUB}") mv "${OUTDIR}/${OLD_FN}" "${OUTDIR}/${NEW_FN}" cp bazel-bin/tensorflow/tools/benchmark/benchmark_model "${OUTDIR}" +cp bazel-bin/tensorflow/libtensorflow.so "${OUTDIR}" +cp bazel-bin/tensorflow/libtensorflow_framework.so "${OUTDIR}" echo "Output can be found here:" find "${OUTDIR}" diff --git a/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl b/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl index 47539b2423..f8f63e276c 100644 --- a/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl +++ b/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl @@ -31,7 +31,11 @@ def _def_file_filter_configure_impl(repository_ctx): vc_path = find_vc_path(repository_ctx) if vc_path == "visual-studio-not-found": auto_configure_fail("Visual C++ build tools not found on your machine") - undname_bin_path = find_msvc_tool(repository_ctx, vc_path, "undname.exe").replace("\\", "\\\\") + + undname = find_msvc_tool(repository_ctx, vc_path, "undname.exe") + if undname == None: + auto_configure_fail("Couldn't find undname.exe under %s, please check your VC installation and set BAZEL_VC environment variable correctly." % vc_path) + undname_bin_path = undname.replace("\\", "\\\\") repository_ctx.template( "def_file_filter.py", diff --git a/tensorflow/tools/dist_test/local_test.sh b/tensorflow/tools/dist_test/local_test.sh index 06c2b997cb..b0114721bd 100755 --- a/tensorflow/tools/dist_test/local_test.sh +++ b/tensorflow/tools/dist_test/local_test.sh @@ -64,9 +64,6 @@ die() { # Configurations DOCKER_IMG_NAME="tensorflow/tf-dist-test-local-cluster" -# Use TensorFlow v1.5.0 for Python 2.7 and CPU only as we set num_gpus to 0 in the below -DEFAULT_WHL_FILE_LOCATION="https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0-cp27-none-linux_x86_64.whl" - # Parse input arguments LEAVE_CONTAINER_RUNNING=0 MODEL_NAME="" @@ -77,8 +74,7 @@ SYNC_REPLICAS_FLAG="" WHL_FILE_LOCATION=${1} if [[ -z "${WHL_FILE_LOCATION}" ]]; then - WHL_FILE_LOCATION=${DEFAULT_WHL_FILE_LOCATION} - echo "use default whl file location" + echo "WARNING: No wheel url passed. Will use latest tf-nightly cpu p2 wheel." fi while true; do @@ -131,7 +127,11 @@ echo "Building in temporary directory: ${BUILD_DIR}" cp -r ${DIR}/* "${BUILD_DIR}"/ || \ die "Failed to copy files to ${BUILD_DIR}" -if [[ $WHL_FILE_LOCATION =~ 'http://' || $WHL_FILE_LOCATION =~ 'https://' ]]; then +# Download whl file into the build context directory. +if [[ -z "${WHL_FILE_LOCATION}" ]]; then + pip2 download --no-deps tf-nightly + cp tf-nightly-*.whl "${BUILD_DIR}"/tensorflow-none-any.whl +elif [[ $WHL_FILE_LOCATION =~ 'http://' || $WHL_FILE_LOCATION =~ 'https://' ]]; then # Download whl file into the build context directory. wget -P "${BUILD_DIR}" "${WHL_FILE_LOCATION}" || \ die "Failed to download tensorflow whl file from URL: ${WHL_FILE_LOCATION}" diff --git a/tensorflow/tools/dist_test/remote_test.sh b/tensorflow/tools/dist_test/remote_test.sh index 935535312d..e188c88c8f 100755 --- a/tensorflow/tools/dist_test/remote_test.sh +++ b/tensorflow/tools/dist_test/remote_test.sh @@ -108,7 +108,7 @@ fi # Parse command-line arguments. WHL_URL=${1} if [[ -z "${WHL_URL}" ]]; then - die "whl URL is not specified" + echo "WARNING: No wheel url passed. Will use latest tf-nightly cpu p2 wheel." fi # Create docker build context directory. @@ -121,8 +121,13 @@ cp -r ${DIR}/* ${BUILD_DIR}/ || \ die "Failed to copy files to ${BUILD_DIR}" # Download whl file into the build context directory. -wget -P "${BUILD_DIR}" ${WHL_URL} || \ - die "Failed to download tensorflow whl file from URL: ${WHL_URL}" +if [[ -z "${WHL_URL}" ]]; then + pip2 download --no-deps tf-nightly + cp tf-nightly-*.whl "${BUILD_DIR}"/tensorflow-none-any.whl +else + wget -P "${BUILD_DIR}" ${WHL_URL} || \ + die "Failed to download tensorflow whl file from URL: ${WHL_URL}" +fi # Build docker image for test. docker build ${NO_CACHE_FLAG} \ diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel index 406d134699..57a491255e 100644 --- a/tensorflow/tools/docker/Dockerfile.devel +++ b/tensorflow/tools/docker/Dockerfile.devel @@ -76,7 +76,7 @@ RUN mkdir /bazel && \ # Download and build TensorFlow. WORKDIR /tensorflow -RUN git clone --branch=r1.8 --depth=1 https://github.com/tensorflow/tensorflow.git . +RUN git clone --branch=r1.9 --depth=1 https://github.com/tensorflow/tensorflow.git . # TODO(craigcitro): Don't install the pip package, since it makes it # more difficult to experiment with local changes. Instead, just add diff --git a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl index a6cd44ced1..6796ad70e5 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl +++ b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl @@ -3,7 +3,7 @@ FROM tensorflow/tensorflow:latest-devel LABEL maintainer="Clayne Robison" # These arguments are parameterized. Use --build-args to override. -ARG TF_BRANCH=r1.8 +ARG TF_BRANCH=r1.9 ARG WHL_DIR=/whl RUN apt-get update && apt-get install -y --no-install-recommends \ diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu index 2fe47f3356..204b5b4dba 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-gpu +++ b/tensorflow/tools/docker/Dockerfile.devel-gpu @@ -13,8 +13,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ cuda-cusparse-dev-9-0 \ curl \ git \ - libcudnn7=7.0.5.15-1+cuda9.0 \ - libcudnn7-dev=7.0.5.15-1+cuda9.0 \ + libcudnn7=7.1.4.18-1+cuda9.0 \ + libcudnn7-dev=7.1.4.18-1+cuda9.0 \ libcurl3-dev \ libfreetype6-dev \ libhdf5-serial-dev \ @@ -85,7 +85,7 @@ RUN mkdir /bazel && \ # Download and build TensorFlow. WORKDIR /tensorflow -RUN git clone --branch=r1.8 --depth=1 https://github.com/tensorflow/tensorflow.git . +RUN git clone --branch=r1.9 --depth=1 https://github.com/tensorflow/tensorflow.git . # Configure the build for our CUDA configuration. ENV CI_BUILD_PYTHON python diff --git a/tensorflow/tools/docker/Dockerfile.gpu b/tensorflow/tools/docker/Dockerfile.gpu index bff4a20392..9197651ff4 100644 --- a/tensorflow/tools/docker/Dockerfile.gpu +++ b/tensorflow/tools/docker/Dockerfile.gpu @@ -12,7 +12,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ cuda-cusolver-9-0 \ cuda-cusparse-9-0 \ curl \ - libcudnn7=7.0.5.15-1+cuda9.0 \ + libcudnn7=7.1.4.18-1+cuda9.0 \ libfreetype6-dev \ libhdf5-serial-dev \ libpng12-dev \ diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index d0fd0fae97..d149365ac1 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -61,6 +61,7 @@ COMMON_PIP_DEPS = [ "//tensorflow/contrib/autograph/core:test_lib", "//tensorflow/contrib/autograph/impl:impl", "//tensorflow/contrib/autograph/lang:lang", + "//tensorflow/contrib/autograph/operators:operators", "//tensorflow/contrib/autograph/pyct:pyct", "//tensorflow/contrib/autograph/pyct/static_analysis:static_analysis", "//tensorflow/contrib/boosted_trees:boosted_trees_pip", diff --git a/tensorflow/tools/pip_package/build_pip_package.sh b/tensorflow/tools/pip_package/build_pip_package.sh index 0c4065bc77..f7e42ce536 100755 --- a/tensorflow/tools/pip_package/build_pip_package.sh +++ b/tensorflow/tools/pip_package/build_pip_package.sh @@ -41,51 +41,15 @@ function is_windows() { fi } -function main() { +function prepare_src() { if [ $# -lt 1 ] ; then echo "No destination dir provided" exit 1 fi - DEST=$(real_path $1) - TMPDIR=$(mktemp -d -t tmp.XXXXXXXXXX) - - PKG_NAME_FLAG="" - GPU_BUILD=0 - NIGHTLY_BUILD=0 - PROJECT_NAME="" - while true; do - if [[ "$1" == "--nightly_flag" ]]; then - NIGHTLY_BUILD=1 - elif [[ "$1" == "--gpu" ]]; then - GPU_BUILD=1 - elif [[ "$1" == "--gpudirect" ]]; then - PKG_NAME_FLAG="--project_name tensorflow_gpudirect" - elif [[ "$1" == "--project_name" ]]; then - shift - if [[ -z "$1" ]]; then - break - fi - PROJECT_NAME="$1" - fi - shift - - if [[ -z "$1" ]]; then - break - fi - done - - if [[ -n ${PROJECT_NAME} ]]; then - PKG_NAME_FLAG="--project_name ${PROJECT_NAME}" - elif [[ ${NIGHTLY_BUILD} == "1" && ${GPU_BUILD} == "1" ]]; then - PKG_NAME_FLAG="--project_name tf_nightly_gpu" - elif [[ ${NIGHTLY_BUILD} == "1" ]]; then - PKG_NAME_FLAG="--project_name tf_nightly" - elif [[ ${GPU_BUILD} == "1" ]]; then - PKG_NAME_FLAG="--project_name tensorflow_gpu" - fi - - echo $(date) : "=== Using tmpdir: ${TMPDIR}" + TMPDIR="$1" + mkdir -p "$TMPDIR" + echo $(date) : "=== Preparing sources in dir: ${TMPDIR}" if [ ! -d bazel-bin/tensorflow ]; then echo "Could not find bazel-bin. Did you run from the root of the build tree?" @@ -155,17 +119,28 @@ function main() { # over so user defined ops can be compiled. mkdir -p ${TMPDIR}/google mkdir -p ${TMPDIR}/third_party - pushd ${RUNFILES%org_tensorflow} + pushd ${RUNFILES%org_tensorflow} > /dev/null for header in $(find protobuf_archive -name \*.h); do mkdir -p "${TMPDIR}/google/$(dirname ${header})" cp "$header" "${TMPDIR}/google/$(dirname ${header})/" done - popd + popd > /dev/null cp -R $RUNFILES/third_party/eigen3 ${TMPDIR}/third_party cp tensorflow/tools/pip_package/MANIFEST.in ${TMPDIR} cp tensorflow/tools/pip_package/README ${TMPDIR} cp tensorflow/tools/pip_package/setup.py ${TMPDIR} +} + +function build_wheel() { + if [ $# -lt 2 ] ; then + echo "No src and dest dir provided" + exit 1 + fi + + TMPDIR="$1" + DEST="$2" + PKG_NAME_FLAG="$3" # Before we leave the top-level directory, make sure we know how to # call python. @@ -173,15 +148,110 @@ function main() { source tools/python_bin_path.sh fi - pushd ${TMPDIR} + pushd ${TMPDIR} > /dev/null rm -f MANIFEST echo $(date) : "=== Building wheel" "${PYTHON_BIN_PATH:-python}" setup.py bdist_wheel ${PKG_NAME_FLAG} >/dev/null mkdir -p ${DEST} cp dist/* ${DEST} - popd - rm -rf ${TMPDIR} + popd > /dev/null echo $(date) : "=== Output wheel file is in: ${DEST}" } +function usage() { + echo "Usage:" + echo "$0 [--src srcdir] [--dst dstdir] [options]" + echo "$0 dstdir [options]" + echo "" + echo " --src prepare sources in srcdir" + echo " will use temporary dir if not specified" + echo "" + echo " --dst build wheel in dstdir" + echo " if dstdir is not set do not build, only prepare sources" + echo "" + echo " Options:" + echo " --project_name set project name to name" + echo " --gpu build tensorflow_gpu" + echo " --gpudirect build tensorflow_gpudirect" + echo " --nightly_flag build tensorflow nightly" + echo "" + exit 1 +} + +function main() { + PKG_NAME_FLAG="" + PROJECT_NAME="" + GPU_BUILD=0 + NIGHTLY_BUILD=0 + SRCDIR="" + DSTDIR="" + CLEANSRC=1 + while true; do + if [[ "$1" == "--help" ]]; then + usage + exit 1 + elif [[ "$1" == "--nightly_flag" ]]; then + NIGHTLY_BUILD=1 + elif [[ "$1" == "--gpu" ]]; then + GPU_BUILD=1 + elif [[ "$1" == "--gpudirect" ]]; then + PKG_NAME_FLAG="--project_name tensorflow_gpudirect" + elif [[ "$1" == "--project_name" ]]; then + shift + if [[ -z "$1" ]]; then + break + fi + PROJECT_NAME="$1" + elif [[ "$1" == "--src" ]]; then + shift + SRCDIR="$(real_path $1)" + CLEANSRC=0 + elif [[ "$1" == "--dst" ]]; then + shift + DSTDIR="$(real_path $1)" + else + DSTDIR="$(real_path $1)" + fi + shift + + if [[ -z "$1" ]]; then + break + fi + done + + if [[ -z "$DSTDIR" ]] && [[ -z "$SRCDIR" ]]; then + echo "No destination dir provided" + usage + exit 1 + fi + + if [[ -z "$SRCDIR" ]]; then + # make temp srcdir if none set + SRCDIR="$(mktemp -d -t tmp.XXXXXXXXXX)" + fi + + prepare_src "$SRCDIR" + + if [[ -z "$DSTDIR" ]]; then + # only want to prepare sources + exit + fi + + if [[ -n ${PROJECT_NAME} ]]; then + PKG_NAME_FLAG="--project_name ${PROJECT_NAME}" + elif [[ ${NIGHTLY_BUILD} == "1" && ${GPU_BUILD} == "1" ]]; then + PKG_NAME_FLAG="--project_name tf_nightly_gpu" + elif [[ ${NIGHTLY_BUILD} == "1" ]]; then + PKG_NAME_FLAG="--project_name tf_nightly" + elif [[ ${GPU_BUILD} == "1" ]]; then + PKG_NAME_FLAG="--project_name tensorflow_gpu" + fi + + build_wheel "$SRCDIR" "$DSTDIR" "$PKG_NAME_FLAG" + + if [[ $CLEANSRC -ne 0 ]]; then + rm -rf "${TMPDIR}" + fi +} + main "$@" diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index d25a9e77b1..97f625e7e9 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -45,7 +45,7 @@ DOCLINES = __doc__.split('\n') # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.8.0' +_VERSION = '1.9.0-rc0' REQUIRED_PACKAGES = [ 'absl-py >= 0.1.6', @@ -54,6 +54,7 @@ REQUIRED_PACKAGES = [ 'numpy >= 1.13.3', 'six >= 1.10.0', 'protobuf >= 3.4.0', + 'setuptools <= 39.1.0', 'tensorboard >= 1.8.0, < 1.9.0', 'termcolor >= 1.1.0', ] diff --git a/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc b/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc index 29add6d5ea..15d7c70281 100644 --- a/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc +++ b/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc @@ -814,6 +814,9 @@ void Generator::Generate(const FileDescriptor& fd) { // Add header to cc file. SetOutput(&cc_); Print("// GENERATED FILE - DO NOT MODIFY"); + Print(); + Print("#include "); // for `std::stable_sort()` + Print(); headers = {GetProtoTextHeaderName(fd, true /* impl */)}; AddHeadersToCurrentSection(headers); Print(); diff --git a/tensorflow/tools/quantization/quantize_graph_test.py b/tensorflow/tools/quantization/quantize_graph_test.py index df71840b64..92bb5127da 100644 --- a/tensorflow/tools/quantization/quantize_graph_test.py +++ b/tensorflow/tools/quantization/quantize_graph_test.py @@ -119,8 +119,8 @@ def are_tensors_near(a, b, tolerance): flat_a = a.flatten() flat_b = b.flatten() if len(flat_a) != len(flat_b): - print("Tensors are different sizes: " + str(len(flat_a)) + " vs " + str( - len(flat_b))) + tf_logging.info("Tensors are different sizes: " + str(len(flat_a)) + " vs " + + str(len(flat_b))) return False value_count = len(flat_a) how_many_different = 0 @@ -140,10 +140,10 @@ def are_tensors_near(a, b, tolerance): if how_many_different == 0: return True else: - print("Tensors have {0} different values ({1}%), with mean difference" - " {2} and mean absolute difference {3}".format( - how_many_different, proportion_different * 100, mean_difference, - mean_abs_difference)) + tf_logging.info("Tensors have {0} different values ({1}%), with mean" + " difference {2} and mean absolute difference {3}".format( + how_many_different, proportion_different * 100, + mean_difference, mean_abs_difference)) return False diff --git a/tensorflow/tools/test/upload_test_benchmarks.py b/tensorflow/tools/test/upload_test_benchmarks.py index 9c45359ee1..c030575109 100644 --- a/tensorflow/tools/test/upload_test_benchmarks.py +++ b/tensorflow/tools/test/upload_test_benchmarks.py @@ -89,7 +89,6 @@ import shutil from six import text_type from google.cloud import datastore -from six import text_type def is_real_file(dirpath, fname): diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 161d1dbd06..b4fbbd6c23 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -50,31 +50,31 @@ def tf_workspace(path_prefix="", tf_repo_name=""): mkl_repository( name = "mkl_linux", urls = [ - "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.13/mklml_lnx_2018.0.2.20180127.tgz", - "https://github.com/intel/mkl-dnn/releases/download/v0.13/mklml_lnx_2018.0.2.20180127.tgz", + "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.14/mklml_lnx_2018.0.3.20180406.tgz", + "https://github.com/intel/mkl-dnn/releases/download/v0.14/mklml_lnx_2018.0.3.20180406.tgz" ], - sha256 = "74844bd77294742bf2396ff040369d1aa4cdd9e826fcd38cf8398ae83564d146", - strip_prefix = "mklml_lnx_2018.0.2.20180127", + sha256 = "d2305244fdc9b87db7426ed4496e87a4b3977ad3374d73b8000e8b7a5b7aa725", + strip_prefix = "mklml_lnx_2018.0.3.20180406", build_file = clean_dep("//third_party/mkl:mkl.BUILD") ) mkl_repository( name = "mkl_windows", urls = [ - "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.13/mklml_win_2018.0.2.20180127.zip", - "https://github.com/intel/mkl-dnn/releases/download/v0.13/mklml_win_2018.0.2.20180127.zip" + "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.14/mklml_win_2018.0.3.20180406.zip", + "https://github.com/intel/mkl-dnn/releases/download/v0.14/mklml_win_2018.0.3.20180406.zip" ], - sha256 = "d8fbf0faa0684bffa3548005d05fe5cfe56ff9dbc0e15e7612d7ac01055a6ded", - strip_prefix = "mklml_win_2018.0.2.20180127", + sha256 = "a584a5bf1c8d2ad70b90d12b52652030e9a338217719064fdb84b7ad0d693694", + strip_prefix = "mklml_win_2018.0.3.20180406", build_file = clean_dep("//third_party/mkl:mkl.BUILD") ) mkl_repository( name = "mkl_darwin", urls = [ - "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.13/mklml_mac_2018.0.2.20180127.tgz", - "https://github.com/intel/mkl-dnn/releases/download/v0.13/mklml_mac_2018.0.2.20180127.tgz" + "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.14/mklml_mac_2018.0.3.20180406.tgz", + "https://github.com/intel/mkl-dnn/releases/download/v0.14/mklml_mac_2018.0.3.20180406.tgz" ], - sha256 = "aa740d71e14562bfea56e6829e6dc186e7487cbcf6748a88dec73826b7ec1943", - strip_prefix = "mklml_mac_2018.0.2.20180127", + sha256 = "094e3dfd61c816136dc8d12a45cc611ce26c5f4828176a3644cd0b0efa15a25b", + strip_prefix = "mklml_mac_2018.0.3.20180406", build_file = clean_dep("//third_party/mkl:mkl.BUILD") ) @@ -85,11 +85,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "mkl_dnn", urls = [ - "https://mirror.bazel.build/github.com/intel/mkl-dnn/archive/v0.13.tar.gz", - "https://github.com/intel/mkl-dnn/archive/v0.13.tar.gz", + "https://mirror.bazel.build/github.com/intel/mkl-dnn/archive/v0.14.tar.gz", + "https://github.com/intel/mkl-dnn/archive/v0.14.tar.gz", ], - sha256 = "d2cfd93a70cfe86ebe054477c530c9b5c1218b70f75856eb6d1956c68ee89e8f", - strip_prefix = "mkl-dnn-0.13", + sha256 = "efebc53882856afec86457a2da644693f5d59c68772d41d640d6b60a8efc4eb0", + strip_prefix = "mkl-dnn-0.14", build_file = clean_dep("//third_party/mkl_dnn:mkldnn.BUILD"), ) @@ -187,11 +187,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "highwayhash", urls = [ - "https://mirror.bazel.build/github.com/google/highwayhash/archive/dfcb97ca4fe9277bf9dc1802dd979b071896453b.tar.gz", - "https://github.com/google/highwayhash/archive/dfcb97ca4fe9277bf9dc1802dd979b071896453b.tar.gz", + "http://mirror.bazel.build/github.com/google/highwayhash/archive/fd3d9af80465e4383162e4a7c5e2f406e82dd968.tar.gz", + "https://github.com/google/highwayhash/archive/fd3d9af80465e4383162e4a7c5e2f406e82dd968.tar.gz", ], - sha256 = "0f30a15b1566d93f146c8d149878a06e91d9bb7ec2cfd76906df62a82be4aac9", - strip_prefix = "highwayhash-dfcb97ca4fe9277bf9dc1802dd979b071896453b", + sha256 = "9c3e0e87d581feeb0c18d814d98f170ff23e62967a2bd6855847f0b2fe598a37", + strip_prefix = "highwayhash-fd3d9af80465e4383162e4a7c5e2f406e82dd968", build_file = clean_dep("//third_party:highwayhash.BUILD"), ) diff --git a/third_party/eigen.BUILD b/third_party/eigen.BUILD index 07bb6645eb..e54c1a4501 100644 --- a/third_party/eigen.BUILD +++ b/third_party/eigen.BUILD @@ -64,6 +64,7 @@ cc_library( # This define (mostly) guarantees we don't link any problematic # code. We use it, but we do not rely on it, as evidenced above. "EIGEN_MPL2_ONLY", + "EIGEN_MAX_ALIGN_BYTES=64", ], includes = ["."], visibility = ["//visibility:public"], diff --git a/third_party/highwayhash.BUILD b/third_party/highwayhash.BUILD index 1b8e40765e..08cb84ea2c 100644 --- a/third_party/highwayhash.BUILD +++ b/third_party/highwayhash.BUILD @@ -10,6 +10,7 @@ cc_library( srcs = ["highwayhash/sip_hash.cc"], hdrs = [ "highwayhash/sip_hash.h", + "highwayhash/endianess.h", "highwayhash/state_helpers.h", ], visibility = ["//visibility:public"], diff --git a/third_party/jpeg/jpeg.BUILD b/third_party/jpeg/jpeg.BUILD index 4418ac32fc..663a218733 100644 --- a/third_party/jpeg/jpeg.BUILD +++ b/third_party/jpeg/jpeg.BUILD @@ -291,8 +291,10 @@ cc_library( "jchuff.h", "jconfig.h", "jdct.h", + "jerror.h", "jinclude.h", "jmorecfg.h", + "jpegint.h", "jpeglib.h", "jsimd.h", "jsimddct.h", diff --git a/third_party/png.BUILD b/third_party/png.BUILD index 76ab32d69c..17c5449cc0 100644 --- a/third_party/png.BUILD +++ b/third_party/png.BUILD @@ -28,7 +28,14 @@ cc_library( "pngwrite.c", "pngwtran.c", "pngwutil.c", - ], + ] + select({ + "@org_tensorflow//tensorflow:linux_ppc64le": [ + "powerpc/powerpc_init.c", + "powerpc/filter_vsx_intrinsics.c", + ], + "//conditions:default": [ + ], + }), hdrs = [ "png.h", "pngconf.h", diff --git a/third_party/py/python_configure.bzl b/third_party/py/python_configure.bzl index 954f21f5f8..3c7e5c8469 100644 --- a/third_party/py/python_configure.bzl +++ b/third_party/py/python_configure.bzl @@ -6,6 +6,7 @@ * `PYTHON_LIB_PATH`: Location of python libraries. """ +_BAZEL_SH = "BAZEL_SH" _PYTHON_BIN_PATH = "PYTHON_BIN_PATH" _PYTHON_LIB_PATH = "PYTHON_LIB_PATH" _TF_PYTHON_CONFIG_REPO = "TF_PYTHON_CONFIG_REPO" @@ -152,6 +153,22 @@ def _get_python_bin(repository_ctx): _PYTHON_BIN_PATH, repository_ctx.os.environ.get("PATH", ""))) +def _get_bash_bin(repository_ctx): + """Gets the bash bin path.""" + bash_bin = repository_ctx.os.environ.get(_BAZEL_SH) + if bash_bin != None: + return bash_bin + else: + bash_bin_path = repository_ctx.which("bash") + if bash_bin_path != None: + return str(bash_bin_path) + else: + _fail("Cannot find bash in PATH, please make sure " + + "bash is installed and add its directory in PATH, or --define " + + "%s='/path/to/bash'.\nPATH=%s" % ( + _BAZEL_SH, repository_ctx.os.environ.get("PATH", ""))) + + def _get_python_lib(repository_ctx, python_bin): """Gets the python lib path.""" python_lib = repository_ctx.os.environ.get(_PYTHON_LIB_PATH) @@ -184,14 +201,14 @@ def _get_python_lib(repository_ctx, python_bin): " print(paths[0])\n" + "END") cmd = '%s - %s' % (python_bin, print_lib) - result = repository_ctx.execute(["bash", "-c", cmd]) + result = repository_ctx.execute([_get_bash_bin(repository_ctx), "-c", cmd]) return result.stdout.strip('\n') def _check_python_lib(repository_ctx, python_lib): """Checks the python lib path.""" cmd = 'test -d "%s" -a -x "%s"' % (python_lib, python_lib) - result = repository_ctx.execute(["bash", "-c", cmd]) + result = repository_ctx.execute([_get_bash_bin(repository_ctx), "-c", cmd]) if result.return_code == 1: _fail("Invalid python library path: %s" % python_lib) @@ -199,7 +216,7 @@ def _check_python_lib(repository_ctx, python_lib): def _check_python_bin(repository_ctx, python_bin): """Checks the python bin path.""" cmd = '[[ -x "%s" ]] && [[ ! -d "%s" ]]' % (python_bin, python_bin) - result = repository_ctx.execute(["bash", "-c", cmd]) + result = repository_ctx.execute([_get_bash_bin(repository_ctx), "-c", cmd]) if result.return_code == 1: _fail("--define %s='%s' is not executable. Is it the python binary?" % ( _PYTHON_BIN_PATH, python_bin)) @@ -294,6 +311,7 @@ def _python_autoconf_impl(repository_ctx): python_configure = repository_rule( implementation = _python_autoconf_impl, environ = [ + _BAZEL_SH, _PYTHON_BIN_PATH, _PYTHON_LIB_PATH, _TF_PYTHON_CONFIG_REPO, diff --git a/third_party/repo.bzl b/third_party/repo.bzl index 36f5aa5bde..cb67d3e961 100644 --- a/third_party/repo.bzl +++ b/third_party/repo.bzl @@ -17,7 +17,6 @@ _SINGLE_URL_WHITELIST = depset([ "arm_compiler", "ortools_archive", - "gemmlowp", ]) def _is_windows(ctx): @@ -88,7 +87,9 @@ def _tf_http_archive(ctx): if ctx.attr.patch_file != None: _apply_patch(ctx, ctx.attr.patch_file) if ctx.attr.build_file != None: - ctx.template("BUILD", ctx.attr.build_file, { + # Use BUILD.bazel to avoid conflict with third party projects with + # BUILD or build (directory) underneath. + ctx.template("BUILD.bazel", ctx.attr.build_file, { "%prefix%": ".." if _repos_are_siblings() else "external", }, False) -- cgit v1.2.3 From 4631936e61651101932073197c08b600006530a3 Mon Sep 17 00:00:00 2001 From: gracehoney <31743510+aaroey@users.noreply.github.com> Date: Thu, 21 Jun 2018 15:23:05 -0700 Subject: Fix internal build errors. --- configure.py | 2 +- tensorflow/contrib/tensorrt/BUILD | 1 + .../contrib/tensorrt/convert/convert_graph.cc | 94 +++++++++++++--------- .../contrib/tensorrt/convert/convert_nodes.cc | 7 +- .../contrib/tensorrt/convert/convert_nodes.h | 9 +-- tensorflow/contrib/tensorrt/convert/utils.h | 2 +- .../contrib/tensorrt/kernels/trt_engine_op.cc | 28 +++---- .../contrib/tensorrt/kernels/trt_engine_op.h | 10 +-- tensorflow/contrib/tensorrt/python/trt_convert.py | 12 ++- .../tensorrt/resources/trt_int8_calibrator.cc | 1 - .../contrib/tensorrt/resources/trt_resources.h | 12 +-- tensorflow/contrib/tensorrt/test/test_tftrt.py | 11 ++- 12 files changed, 101 insertions(+), 88 deletions(-) (limited to 'configure.py') diff --git a/configure.py b/configure.py index a14d006a73..ad585fa52e 100644 --- a/configure.py +++ b/configure.py @@ -944,7 +944,7 @@ def set_tf_cudnn_version(environ_cp): def is_cuda_compatible(lib, cuda_ver, cudnn_ver): - """Check the compatibility between given library and cudnn/cudart libraries.""" + """Check compatibility between given library and cudnn/cudart libraries.""" ldd_bin = which('ldd') or '/usr/bin/ldd' ldd_out = run_shell([ldd_bin, lib], True) ldd_out = ldd_out.split(os.linesep) diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index e7b3fe38e5..adda0b758b 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -207,6 +207,7 @@ tf_cuda_library( ], deps = [ ":trt_logging", + ":utils", "//tensorflow/core:framework_headers_lib", "//tensorflow/core:framework_lite", "//tensorflow/core:lib_proto_parsing", diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index ba7d3b5f86..1c4fd4a0ce 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -49,13 +49,14 @@ limitations under the License. #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" -#include "tensorflow/core/protobuf/config.pb.h" +#include "tensorflow/core/protobuf/config.pb.h" // NOLINT #include "tensorflow/core/protobuf/device_properties.pb.h" // NOLINT +#include "tensorflow/core/protobuf/rewriter_config.pb.h" // NOLINT #include "tensorflow/core/util/device_name_utils.h" #if GOOGLE_CUDA #if GOOGLE_TENSORRT -#include +#include "cuda/include/cuda_runtime_api.h" #include "tensorrt/include/NvInfer.h" namespace tensorflow { namespace tensorrt { @@ -238,14 +239,14 @@ tensorflow::Status ConvertGraphDefToTensorRT( } // Function to get subsegment information structure. -EngineInfo GetEngineInfo( +tensorflow::Status GetEngineInfo( const tensorflow::Graph* g, const tensorflow::grappler::GraphProperties& graph_properties, const std::set& segment_nodes, const std::unordered_map& node_map, - const std::vector& reverse_topo_order) { + const std::vector& reverse_topo_order, + EngineInfo* info) { std::vector subgraph_node_ids; - EngineInfo info; std::set segment_devices; int input_port = 0; int output_port = 0; @@ -296,9 +297,9 @@ EngineInfo GetEngineInfo( created_edges.insert({s, port}); input_port++; } - info.connections.emplace_back(input_node->name(), input_node->id(), - edge->src_output(), node_name, node_id, - edge->dst_input(), true, port); + info->connections.emplace_back(input_node->name(), input_node->id(), + edge->src_output(), node_name, node_id, + edge->dst_input(), true, port); } } } @@ -316,28 +317,28 @@ EngineInfo GetEngineInfo( created_edges.insert({s, port}); output_port++; } - info.connections.emplace_back(output_node->name(), output_node->id(), - edge->dst_input(), node_name, node_id, - edge->src_output(), false, port); + info->connections.emplace_back(output_node->name(), output_node->id(), + edge->dst_input(), node_name, node_id, + edge->src_output(), false, port); } } } - ConvertSegmentToGraphDef(g, graph_properties, subgraph_node_ids, - &info.connections, &info.segment_graph_def, - &info.engine_name); + TF_RETURN_IF_ERROR(ConvertSegmentToGraphDef( + g, graph_properties, subgraph_node_ids, &info->connections, + &info->segment_graph_def, &info->engine_name)); // TODO(sami): This should not happen once segmenter is updated. if (segment_devices.size() == 1) { - info.device = *segment_devices.begin(); + info->device = *segment_devices.begin(); } else if (segment_devices.size() > 1) { LOG(WARNING) << "Detected multiple(" << segment_devices.size() << ") devices for the segment. Picking first one to continue " << "but this shouldn't have happened"; - info.device = *segment_devices.begin(); + info->device = *segment_devices.begin(); } else { VLOG(1) << "Segment devices size is 0"; } - return info; + return Status::OK(); } // Function to insert a TRT node into the graph. The graph is not modified if @@ -562,7 +563,9 @@ tensorflow::Status RegisterSegmentFunctionToFunctionLibrary( tensorflow::NodeDefBuilder node_builder( StrCat(name, "_Arg"), tensorflow::FunctionLibraryDefinition::kArgOp); VLOG(1) << "Adding " << StrCat(name, "_Arg"); - node_builder.Attr("T", node->output_type(0)).Attr("index", i).Finalize(&nd); + TF_RETURN_IF_ERROR(node_builder.Attr("T", node->output_type(0)) + .Attr("index", i) + .Finalize(&nd)); tensorflow::Status s; auto node_arg = sgraph.AddNode(nd, &s); if (!s.ok()) { @@ -593,7 +596,9 @@ tensorflow::Status RegisterSegmentFunctionToFunctionLibrary( VLOG(1) << " input " << nout.node << ":" << nout.index << " dtype=" << tensorflow::DataTypeString(nout.data_type); node_builder.Input({nout}); - node_builder.Attr("T", node->output_type(0)).Attr("index", i).Finalize(&nd); + TF_RETURN_IF_ERROR(node_builder.Attr("T", node->output_type(0)) + .Attr("index", i) + .Finalize(&nd)); if (VLOG_IS_ON(3)) { VLOG(3) << nd.DebugString(); } @@ -713,11 +718,12 @@ tensorflow::Status ConvertAfterShapes(ConversionParams& params) { segment_options.exclude_node_list.insert(node); } segment_options.minimum_segment_size = params.minimum_segment_size; - tensorflow::tensorrt::segment::SegmentNodesVector segments; + tensorflow::tensorrt::segment::SegmentNodesVector initial_segments; TF_RETURN_IF_ERROR(tensorrt::segment::SegmentGraph( - &graph, IsTensorRTCandidate, segment_options, &segments)); - if (segments.size() > 1) { - VLOG(0) << "MULTIPLE tensorrt candidate conversion: " << segments.size(); + &graph, IsTensorRTCandidate, segment_options, &initial_segments)); + if (initial_segments.size() > 1) { + VLOG(0) << "MULTIPLE tensorrt candidate conversion: " + << initial_segments.size(); } // Get the EngineInfo for each segment. @@ -725,17 +731,24 @@ tensorflow::Status ConvertAfterShapes(ConversionParams& params) { TF_RETURN_IF_ERROR(BuildNodeMap(graph, &node_map)); float total_num_nodes_in_segments = 0.; std::vector engine_segments; - engine_segments.reserve(segments.size()); + engine_segments.reserve(initial_segments.size()); std::vector reverse_topo_order; tensorflow::GetPostOrder(graph, &reverse_topo_order); size_t total_engine_bytes_size = 0; std::vector engine_bytes_size; - for (size_t t = 0; t < segments.size(); t++) { - auto& s = segments.at(t); - engine_segments.emplace_back(GetEngineInfo(&graph, *params.graph_properties, - s.first, node_map, - reverse_topo_order)); - auto& curr_engine = engine_segments.back(); + tensorflow::tensorrt::segment::SegmentNodesVector converted_segments; + converted_segments.reserve(initial_segments.size()); + for (size_t t = 0; t < initial_segments.size(); t++) { + auto& curr_segment = initial_segments.at(t); + EngineInfo curr_engine; + Status status = + GetEngineInfo(&graph, *params.graph_properties, curr_segment.first, + node_map, reverse_topo_order, &curr_engine); + if (!status.ok()) { + LOG(WARNING) << "Failed to get engine info for segment " << t << ": " + << status; + continue; + } curr_engine.precision_mode = params.precision_mode; curr_engine.engine_type = (params.is_dyn_op || params.precision_mode == INT8MODE @@ -744,12 +757,19 @@ tensorflow::Status ConvertAfterShapes(ConversionParams& params) { curr_engine.cached_engine_batches = params.cached_engine_batches; curr_engine.maximum_cached_engines = params.max_cached_engines; StrAppend(&curr_engine.engine_name, "my_trt_op_", t); - RegisterSegmentFunctionToFunctionLibrary( + status = RegisterSegmentFunctionToFunctionLibrary( &graph, curr_engine.segment_graph_def, curr_engine.engine_name); + if (!status.ok()) { + LOG(WARNING) << "Failed to register segment graphdef as a function " << t + << ": " << status; + continue; + } engine_bytes_size.push_back(curr_engine.segment_graph_def.ByteSizeLong()); total_engine_bytes_size += engine_bytes_size.back(); - total_num_nodes_in_segments += s.first.size(); + total_num_nodes_in_segments += curr_segment.first.size(); + engine_segments.push_back(std::move(curr_engine)); + converted_segments.push_back(std::move(curr_segment)); if (VLOG_IS_ON(8)) { string fname = curr_engine.engine_name; @@ -775,7 +795,7 @@ tensorflow::Status ConvertAfterShapes(ConversionParams& params) { engine.max_workspace_size_bytes = params.max_workspace_size_bytes * (engine_bytes_size.at(i) / total_engine_bytes_size + - segments.at(i).first.size() / total_num_nodes_in_segments) / + converted_segments.at(i).first.size() / total_num_nodes_in_segments) / 2.0; // The allocator is used to build the engine. The build and the built engine // will be destroyed after we get the serialized engine string, so it's fine @@ -793,17 +813,17 @@ tensorflow::Status ConvertAfterShapes(ConversionParams& params) { cudaSetDevice(cuda_device_id); auto status = CreateTRTNode(&graph, engine_segments, i, alloc.get(), params.max_batch_size); - // If status is ok, we successfuly added the node to the graph and can + // If status is ok, we successfully added the node to the graph and can // remove segment ops. Otherwise graph is not modified. if (status.ok()) { - for (auto node_name : segments.at(i).first) { + for (auto node_name : converted_segments.at(i).first) { graph.RemoveNode(node_map.at(node_name)); } } else { // Graph is not modified. LOG(WARNING) << "Engine creation for segment " << i << ", composed of " - << segments.at(i).first.size() << " nodes failed: " << status - << ". Skipping..."; + << converted_segments.at(i).first.size() << " nodes failed: " + << status << ". Skipping..."; } } cudaSetDevice(old_cuda_device); diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index b5214b461a..146b9c7344 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -2130,13 +2130,10 @@ void Converter::register_op_converters() { } // namespace tensorflow::Status ConvertGraphDefToEngine( - const tensorflow::GraphDef& gdef, - int precision_mode, - int max_batch_size, + const tensorflow::GraphDef& gdef, int precision_mode, int max_batch_size, size_t max_workspace_size_bytes, const std::vector& input_shapes, - Logger* logger, - nvinfer1::IGpuAllocator* allocator, + Logger* logger, nvinfer1::IGpuAllocator* allocator, TRTInt8Calibrator* calibrator, TrtUniquePtrType* engine, bool* convert_successfully) { diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.h b/tensorflow/contrib/tensorrt/convert/convert_nodes.h index 2da4edf7f5..7684d8d4a2 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h @@ -78,7 +78,7 @@ struct EngineInfo { EngineInfo() : engine_type(EngineType::TRTStatic), max_workspace_size_bytes(0), - precision_mode(FP32MODE) {}; + precision_mode(FP32MODE) {} string engine_name; string device; @@ -120,13 +120,10 @@ tensorflow::Status ConvertSegmentToGraphDef( // is successful. This is different than successfully building the engine: // building can still fail afterwards. tensorflow::Status ConvertGraphDefToEngine( - const tensorflow::GraphDef& gdef, - int precision_mode, - int max_batch_size, + const tensorflow::GraphDef& gdef, int precision_mode, int max_batch_size, size_t max_workspace_size_bytes, const std::vector& input_shapes, - Logger* logger, - nvinfer1::IGpuAllocator* allocator, + Logger* logger, nvinfer1::IGpuAllocator* allocator, TRTInt8Calibrator* calibrator, TrtUniquePtrType* engine, bool* convert_successfully); diff --git a/tensorflow/contrib/tensorrt/convert/utils.h b/tensorflow/contrib/tensorrt/convert/utils.h index 021fdaf8c5..f601c06701 100644 --- a/tensorflow/contrib/tensorrt/convert/utils.h +++ b/tensorflow/contrib/tensorrt/convert/utils.h @@ -31,7 +31,7 @@ struct TrtDestroyer { template using TrtUniquePtrType = std::unique_ptr>; -} // namespace convert } // namespace tensorrt +} // namespace tensorflow #endif // TENSORFLOW_CONTRIB_TENSORRT_CONVERT_UTILS_H_ diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc index d12f738ac5..75e32559bb 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc @@ -15,8 +15,8 @@ limitations under the License. #include "tensorflow/contrib/tensorrt/kernels/trt_engine_op.h" #include -#include "tensorflow/contrib/tensorrt/convert/utils.h" #include "tensorflow/contrib/tensorrt/convert/convert_nodes.h" +#include "tensorflow/contrib/tensorrt/convert/utils.h" #include "tensorflow/contrib/tensorrt/log/trt_logger.h" #include "tensorflow/contrib/tensorrt/resources/trt_resource_manager.h" #include "tensorflow/contrib/tensorrt/resources/trt_resources.h" @@ -77,9 +77,8 @@ tensorflow::Status TRTEngineOp::ConstructFunctionHandle(OpKernelContext* ctx) { } auto fdef = lib->GetFunctionLibraryDefinition()->Find(funcdef_name_); if (fdef == nullptr) { - return tensorflow::errors::Internal( - "Native FunctionDef ", funcdef_name_, - " can't be found in function library"); + return tensorflow::errors::Internal("Native FunctionDef ", funcdef_name_, + " can't be found in function library"); } tensorflow::FunctionLibraryRuntime::InstantiateOptions inst_ops; inst_ops.overlay_lib = nullptr; @@ -128,8 +127,8 @@ TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) } else if (precision_string == "INT8") { precision_mode_ = convert::INT8MODE; } - calibration_mode_ = (precision_mode_ == convert::INT8MODE && - calibration_data.size() == 0); + calibration_mode_ = + (precision_mode_ == convert::INT8MODE && calibration_data.size() == 0); if (calibration_data.size()) { calibrator_.reset(new TRTInt8Calibrator(calibration_data)); calibration_data.resize(0); @@ -291,8 +290,8 @@ void TRTEngineOp::ComputeAsync(tensorflow::OpKernelContext* ctx, std::vector buffers(num_binding); for (int i = 0; i < ctx->num_inputs(); i++) { const string inp_name = StrCat(kInputPHName, i); - const size_t binding_index = trt_engine_ptr->getBindingIndex( - inp_name.c_str()); + const size_t binding_index = + trt_engine_ptr->getBindingIndex(inp_name.c_str()); const Tensor& input_tensor = ctx->input(i); const TensorShape& input_shape = input_tensor.shape(); @@ -320,7 +319,7 @@ void TRTEngineOp::ComputeAsync(tensorflow::OpKernelContext* ctx, default: LOG(ERROR) << "Unknown TRT data type: " << int(dtype); ctx->SetStatus(tensorflow::errors::InvalidArgument( - "Unknown ouput TRT data type! ", int(dtype))); + "Unknown ouput TRT data type! ", static_cast(dtype))); return; } } @@ -343,8 +342,8 @@ void TRTEngineOp::ComputeAsync(tensorflow::OpKernelContext* ctx, &output_shape)); } else { LOG(ERROR) << "output node not found, at " << output_name; - ctx->SetStatus(tensorflow::errors::Internal( - "output ", output_name, " couldn't be found!")); + ctx->SetStatus(tensorflow::errors::Internal("output ", output_name, + " couldn't be found!")); return; } auto status = ctx->allocate_output(i, output_shape, &output_tensor); @@ -370,7 +369,7 @@ void TRTEngineOp::ComputeAsync(tensorflow::OpKernelContext* ctx, "INT8 outputs are not supported!")); return; default: - LOG(ERROR) << "Unknown TRT data type: " << int(dtype); + LOG(ERROR) << "Unknown TRT data type: " << static_cast(dtype); ctx->SetStatus(tensorflow::errors::InvalidArgument( "Unsupported output data type! ", int(dtype))); return; @@ -442,7 +441,7 @@ TRTEngineOp::EngineCtxPair& TRTEngineOp::GetEngine(int batch_size, if (allocator == nullptr) { // GetAllocator already set the Status. return null_pair; - }; + } infer->setGpuAllocator(allocator); #endif TrtUniquePtrType static_engine( @@ -506,8 +505,7 @@ TRTEngineOp::EngineCtxPair& TRTEngineOp::GetEngine(int batch_size, } tensorflow::Status TRTEngineOp::AllocateCalibrationResources( - tensorflow::OpKernelContext* ctx, - TRTCalibrationResource** cr) { + tensorflow::OpKernelContext* ctx, TRTCalibrationResource** cr) { auto cres = new TRTCalibrationResource(); *cr = cres; // Get the allocator. diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.h b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.h index 0d2f9e8a9d..6fe318be6a 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.h +++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.h @@ -52,19 +52,17 @@ class TRTEngineOp : public AsyncOpKernel { private: // Execute calibration - void ExecuteCalibration(OpKernelContext* ctx, - AsyncHelper* helper); + void ExecuteCalibration(OpKernelContext* ctx, AsyncHelper* helper); // Construct a function handle for executing native funcdef graph Status ConstructFunctionHandle(OpKernelContext* ctx); // Execute replaced native segment as function Op. - void ExecuteNativeSegment(OpKernelContext* ctx, - AsyncHelper* helper); + void ExecuteNativeSegment(OpKernelContext* ctx, AsyncHelper* helper); // Allocate necessary resources for calibration - Status AllocateCalibrationResources( - OpKernelContext* ctx, TRTCalibrationResource** cr); + Status AllocateCalibrationResources(OpKernelContext* ctx, + TRTCalibrationResource** cr); // TODO(samikama): context should go to a resource manager! typedef std::pair, diff --git a/tensorflow/contrib/tensorrt/python/trt_convert.py b/tensorflow/contrib/tensorrt/python/trt_convert.py index 490c74a701..79f512dbcf 100644 --- a/tensorflow/contrib/tensorrt/python/trt_convert.py +++ b/tensorflow/contrib/tensorrt/python/trt_convert.py @@ -21,9 +21,9 @@ from __future__ import print_function # pylint: disable=unused-import,line-too-long import six as _six from tensorflow.contrib.tensorrt.wrap_conversion import calib_convert -from tensorflow.contrib.tensorrt.wrap_conversion import trt_convert -from tensorflow.contrib.tensorrt.wrap_conversion import get_loaded_tensorrt_version from tensorflow.contrib.tensorrt.wrap_conversion import get_linked_tensorrt_version +from tensorflow.contrib.tensorrt.wrap_conversion import get_loaded_tensorrt_version +from tensorflow.contrib.tensorrt.wrap_conversion import trt_convert from tensorflow.core.framework import graph_pb2 from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow.python.framework import errors @@ -58,6 +58,10 @@ def create_inference_graph(input_graph_def, precision_mode: one of 'FP32', 'FP16' and 'INT8' minimum_segment_size: the minimum number of nodes required for a subgraph to be replaced by TRTEngineOp. + is_dynamic_op: whether to generate dynamic TRT ops which will build the TRT + network and engine at run time. + maximum_cached_engines: max number of cached TRT engines in dynamic TRT ops. + cached_engine_batches: batch sizes used to pre-create cached engines. Returns: New GraphDef with TRTEngineOps placed in graph replacing subgraphs. @@ -81,7 +85,7 @@ def create_inference_graph(input_graph_def, "TensorRT %s but library loaded from environment is TensorRT %s" % (".".join([str(x) for x in compiled_version]), ".".join([str(x) for x in loaded_version])) + - ". Please make sure that correct version of TensorRT "\ + ". Please make sure that correct version of TensorRT " + "is available in the system and added to ldconfig or LD_LIBRARY_PATH" ) raise RuntimeError("Incompatible TensorRT library version") @@ -178,7 +182,7 @@ def calib_graph_to_infer_graph(calibration_graph_def, is_dynamic_op=False): is_calib_graph = False for n in calibration_graph_def.node: if n.op == "TRTEngineOp": - is_calib_graph = is_calib_graph or len(n.attr["calibration_data"].s) == 0 + is_calib_graph = is_calib_graph or not n.attr["calibration_data"].s if not is_calib_graph: tf_logging.error( "Not a calib graph. Doesn't seem to contain any calibration nodes.") diff --git a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc index 59ae860bc0..32e81858b9 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc +++ b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc @@ -16,7 +16,6 @@ limitations under the License. #include "tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h" #include -#include #include #include "tensorflow/core/platform/logging.h" diff --git a/tensorflow/contrib/tensorrt/resources/trt_resources.h b/tensorflow/contrib/tensorrt/resources/trt_resources.h index 76863503bd..b7d5ffd674 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_resources.h +++ b/tensorflow/contrib/tensorrt/resources/trt_resources.h @@ -49,15 +49,15 @@ class TRTCalibrationResource : public tensorflow::ResourceBase { string DebugString() override { std::stringstream oss; - using std::hex; using std::dec; using std::endl; + using std::hex; oss << " Calibrator = " << hex << calibrator_.get() << dec << endl - << " Builder = " << hex << builder_.get() << dec << endl - << " Engine = " << hex << engine_.get() << dec << endl - << " Logger = " << hex << &logger_ << dec << endl - << " Allocator = " << hex << allocator_.get() << dec << endl - << " Thread = " << hex << thr_.get() << dec << endl; + << " Builder = " << hex << builder_.get() << dec << endl + << " Engine = " << hex << engine_.get() << dec << endl + << " Logger = " << hex << &logger_ << dec << endl + << " Allocator = " << hex << allocator_.get() << dec << endl + << " Thread = " << hex << thr_.get() << dec << endl; return oss.str(); } diff --git a/tensorflow/contrib/tensorrt/test/test_tftrt.py b/tensorflow/contrib/tensorrt/test/test_tftrt.py index 5e74f9295d..090aa8bdb0 100644 --- a/tensorflow/contrib/tensorrt/test/test_tftrt.py +++ b/tensorflow/contrib/tensorrt/test/test_tftrt.py @@ -76,7 +76,7 @@ def get_multi_engine_graph_def(mode="FP32"): g = ops.Graph() with g.as_default(): x = aops.placeholder(shape=[None, 3, 7, 5], name="input", dtype=dtype) - with g.name_scope("Global_scope") as scope: + with g.name_scope("Global_scope"): with g.name_scope("first_scope"): e = cop.constant( np.random.randn(3, 2, 3, 4), name="weights", dtype=dtype) @@ -92,15 +92,14 @@ def get_multi_engine_graph_def(mode="FP32"): b = cop.constant(np.random.randn(1, 4, 1, 1), name="bias2", dtype=dtype) q = conv / b - c = cop.constant(np.random.randn(1, 4, 1, 1), name="bias3", dtype=dtype) edge = mops.sin(q) edge1 = mops.cos(conv) with g.name_scope("test_scope"): de = edge + edge1 - t = t - edge1 - q = q * edge - t = t + q - t = t - de + t -= edge1 + q *= edge + t += q + t -= de k = aops.squeeze(t, name="output") print(k.dtype) return g.as_graph_def() -- cgit v1.2.3 From 6896a74984efb4b1b77fc36ea274703536ba649d Mon Sep 17 00:00:00 2001 From: Jon Triebenbach Date: Wed, 27 Jun 2018 13:29:53 -0500 Subject: Build OpenBLAS 0.3.0 on ppc64le for TF tests --- configure.py | 7 ++++++ tensorflow/tools/ci_build/Dockerfile.cpu.ppc64le | 2 +- tensorflow/tools/ci_build/Dockerfile.gpu.ppc64le | 2 +- .../ci_build/install/install_openblas_ppc64le.sh | 28 ++++++++++++++++++++++ 4 files changed, 37 insertions(+), 2 deletions(-) create mode 100755 tensorflow/tools/ci_build/install/install_openblas_ppc64le.sh (limited to 'configure.py') diff --git a/configure.py b/configure.py index ad585fa52e..04ad1c9441 100644 --- a/configure.py +++ b/configure.py @@ -1465,6 +1465,13 @@ def main(): environ_cp['TF_NEED_JEMALLOC'] = '0' environ_cp['TF_NEED_TENSORRT'] = '0' + # The numpy package on ppc64le uses OpenBLAS which has multi-threading + # issues that lead to incorrect answers. Set OMP_NUM_THREADS=1 at + # runtime to allow the Tensorflow testcases which compare numpy + # results to Tensorflow results to succeed. + if is_ppc64le(): + write_action_env_to_bazelrc("OMP_NUM_THREADS", 1) + set_build_var(environ_cp, 'TF_NEED_JEMALLOC', 'jemalloc as malloc', 'with_jemalloc', True) set_build_var(environ_cp, 'TF_NEED_GCP', 'Google Cloud Platform', diff --git a/tensorflow/tools/ci_build/Dockerfile.cpu.ppc64le b/tensorflow/tools/ci_build/Dockerfile.cpu.ppc64le index e879c34bbd..ada2c63880 100644 --- a/tensorflow/tools/ci_build/Dockerfile.cpu.ppc64le +++ b/tensorflow/tools/ci_build/Dockerfile.cpu.ppc64le @@ -7,7 +7,7 @@ COPY install/*.sh /install/ RUN /install/install_bootstrap_deb_packages.sh RUN add-apt-repository -y ppa:openjdk-r/ppa RUN /install/install_deb_packages.sh -RUN apt-get update && apt-get install -y libopenblas-dev +RUN /install/install_openblas_ppc64le.sh RUN /install/install_hdf5_ppc64le.sh RUN /install/install_pip_packages.sh RUN /install/install_bazel_from_source.sh diff --git a/tensorflow/tools/ci_build/Dockerfile.gpu.ppc64le b/tensorflow/tools/ci_build/Dockerfile.gpu.ppc64le index 8967138747..a404f129ab 100644 --- a/tensorflow/tools/ci_build/Dockerfile.gpu.ppc64le +++ b/tensorflow/tools/ci_build/Dockerfile.gpu.ppc64le @@ -13,7 +13,7 @@ ARG DEBIAN_FRONTEND=noninteractive RUN /install/install_bootstrap_deb_packages.sh RUN add-apt-repository -y ppa:openjdk-r/ppa RUN /install/install_deb_packages.sh -RUN apt-get update && apt-get install -y libopenblas-dev +RUN /install/install_openblas_ppc64le.sh RUN /install/install_hdf5_ppc64le.sh RUN /install/install_pip_packages.sh RUN /install/install_bazel_from_source.sh diff --git a/tensorflow/tools/ci_build/install/install_openblas_ppc64le.sh b/tensorflow/tools/ci_build/install/install_openblas_ppc64le.sh new file mode 100755 index 0000000000..9ace25a36f --- /dev/null +++ b/tensorflow/tools/ci_build/install/install_openblas_ppc64le.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +OPENBLAS_SRC_PATH=/tmp/openblas_src/ +POWER="POWER8" +USE_OPENMP="USE_OPENMP=1" +OPENBLAS_INSTALL_PATH="/usr" +apt-get install -y gfortran gfortran-5 +sudo rm -rf ${OPENBLAS_SRC_PATH} +git clone -b release-0.3.0 https://github.com/xianyi/OpenBLAS ${OPENBLAS_SRC_PATH} +cd ${OPENBLAS_SRC_PATH} +# Pick up fix for OpenBLAS issue 1571 +git cherry-pick -X theirs 961d25e9c7e4a1758adb1dbeaa15187de69dd052 +make TARGET=${POWER} ${USE_OPENMP} FC=gfortran +make PREFIX=${OPENBLAS_INSTALL_PATH} install -- cgit v1.2.3 From 1e7b0e4ad6d0f57f3241fe0b80a65f2c2a7f11b0 Mon Sep 17 00:00:00 2001 From: Mingxing Tan Date: Thu, 28 Jun 2018 19:13:20 -0700 Subject: Merge changes from github. PiperOrigin-RevId: 202585094 --- .gitignore | 1 + configure.py | 72 +- tensorflow/BUILD | 26 + tensorflow/c/c_api.cc | 6 +- tensorflow/compiler/aot/codegen.cc | 2 +- tensorflow/compiler/xla/rpc/BUILD | 6 +- tensorflow/compiler/xla/service/BUILD | 1 - tensorflow/compiler/xla/service/hlo_instruction.cc | 1 + .../compiler/xla/service/hlo_instruction_test.cc | 34 + tensorflow/contrib/autograph/converters/BUILD | 5 +- .../contrib/autograph/operators/control_flow.py | 2 +- .../contrib/autograph/pyct/static_analysis/cfg.py | 2 +- tensorflow/contrib/autograph/pyct/transformer.py | 4 +- tensorflow/contrib/cmake/CMakeLists.txt | 36 +- .../contrib/cmake/external/double_conversion.cmake | 6 +- tensorflow/contrib/cmake/external/mkl.cmake | 68 ++ tensorflow/contrib/cmake/external/mkldnn.cmake | 12 +- tensorflow/contrib/cmake/tf_python.cmake | 77 +- tensorflow/contrib/cmake/tf_shared_lib.cmake | 5 + .../contrib/constrained_optimization/README.md | 2 +- .../python/swap_regret_optimizer.py | 8 +- .../python/kernel_tests/slide_dataset_op_test.py | 42 +- tensorflow/contrib/data/python/ops/sliding.py | 2 +- .../nmt_with_attention/nmt_with_attention.ipynb | 909 +++++++++++++++++ .../gan/python/estimator/python/head_impl.py | 6 +- .../gan/python/estimator/python/head_test.py | 9 +- tensorflow/contrib/gdr/gdr_server_lib.cc | 2 +- .../optimized/depthwiseconv_uint8_3x3_filter.h | 2 +- .../interpreter_wrapper/interpreter_wrapper.h | 2 + tensorflow/contrib/opt/BUILD | 20 + tensorflow/contrib/opt/__init__.py | 11 +- .../opt/python/training/weight_decay_optimizers.py | 362 +++++++ .../training/weight_decay_optimizers_test.py | 188 ++++ .../contrib/solvers/python/ops/linear_equations.py | 1 - tensorflow/contrib/tensorrt/BUILD | 20 +- .../contrib/tensorrt/convert/convert_graph.cc | 1027 +++++++++++++------- .../contrib/tensorrt/convert/convert_graph.h | 61 +- .../contrib/tensorrt/convert/convert_nodes.cc | 801 +++++---------- .../contrib/tensorrt/convert/convert_nodes.h | 133 ++- .../tensorrt/convert/trt_optimization_pass.cc | 48 +- .../tensorrt/convert/trt_optimization_pass.h | 3 + tensorflow/contrib/tensorrt/convert/utils.h | 37 + .../contrib/tensorrt/kernels/trt_engine_op.cc | 588 +++++++++-- .../contrib/tensorrt/kernels/trt_engine_op.h | 98 +- tensorflow/contrib/tensorrt/ops/trt_engine_op.cc | 18 +- tensorflow/contrib/tensorrt/python/trt_convert.py | 55 +- .../contrib/tensorrt/resources/trt_allocator.cc | 2 +- .../contrib/tensorrt/resources/trt_allocator.h | 5 +- .../tensorrt/resources/trt_int8_calibrator.cc | 34 +- .../tensorrt/resources/trt_int8_calibrator.h | 35 +- .../contrib/tensorrt/resources/trt_resources.h | 49 +- tensorflow/contrib/tensorrt/segment/segment.h | 7 +- tensorflow/contrib/tensorrt/shape_fn/trt_shfn.cc | 76 +- tensorflow/contrib/tensorrt/test/test_tftrt.py | 138 ++- tensorflow/contrib/tensorrt/trt_conversion.i | 98 +- tensorflow/contrib/tpu/profiler/BUILD | 2 +- tensorflow/contrib/verbs/BUILD | 4 +- tensorflow/core/api_def/BUILD | 7 + .../api_def_SampleDistortedBoundingBox.pbtxt | 2 +- .../api_def_SampleDistortedBoundingBoxV2.pbtxt | 2 +- .../api_def/base_api/api_def_SlideDataset.pbtxt | 2 +- .../core/api_def/java_api/api_def_Assert.pbtxt | 4 + .../core/api_def/java_api/api_def_Const.pbtxt | 4 + .../core/api_def/java_api/api_def_Switch.pbtxt | 4 + .../direct_session_with_tracking_alloc_test.cc | 18 +- .../core/common_runtime/mkl_cpu_allocator.cc | 7 + tensorflow/core/debug/BUILD | 4 +- tensorflow/core/distributed_runtime/BUILD | 4 +- tensorflow/core/distributed_runtime/eager/BUILD | 4 +- tensorflow/core/distributed_runtime/rpc/BUILD | 36 +- .../core/distributed_runtime/rpc/eager/BUILD | 7 +- .../distributed_runtime/rpc/grpc_server_lib.cc | 6 + .../core/distributed_runtime/rpc/grpc_server_lib.h | 3 + tensorflow/core/graph/mkl_layout_pass_test.cc | 21 +- tensorflow/core/kernels/data/slide_dataset_op.cc | 51 +- tensorflow/core/kernels/mkl_conv_ops.cc | 332 ++++--- tensorflow/core/kernels/reduction_gpu_kernels.cu.h | 2 +- tensorflow/core/kernels/segment_reduction_ops.h | 6 + tensorflow/core/ops/math_ops.cc | 8 +- tensorflow/core/platform/cloud/oauth_client.cc | 4 +- tensorflow/core/platform/default/build_config.bzl | 5 +- tensorflow/core/platform/windows/port.cc | 5 + .../core/profiler/internal/tfprof_timeline.cc | 16 +- tensorflow/core/util/mkl_util.h | 32 +- tensorflow/docs_src/get_started/index.md | 29 + tensorflow/docs_src/guide/debugger.md | 2 +- tensorflow/go/attrs.go | 245 +++++ tensorflow/go/attrs_test.go | 193 ++++ tensorflow/go/op/wrappers.go | 9 +- tensorflow/go/operation.go | 66 ++ tensorflow/go/operation_test.go | 62 ++ tensorflow/java/BUILD | 5 + tensorflow/java/maven/.gitignore | 6 + tensorflow/java/maven/README.md | 6 + tensorflow/java/maven/hadoop/pom.xml | 24 + tensorflow/java/maven/pom.xml | 2 + tensorflow/java/maven/run_inside_container.sh | 47 +- tensorflow/java/maven/spark-connector/pom.xml | 24 + tensorflow/java/src/gen/cc/op_generator.cc | 11 +- tensorflow/java/src/gen/cc/op_specs.h | 2 + .../tensorflow/processor/OperatorProcessor.java | 348 ++++++- tensorflow/python/estimator/canned/baseline.py | 4 +- tensorflow/python/estimator/export/export.py | 6 +- tensorflow/python/keras/datasets/boston_housing.py | 7 +- tensorflow/python/keras/datasets/mnist.py | 10 +- tensorflow/python/keras/datasets/reuters.py | 6 +- tensorflow/python/keras/layers/__init__.py | 2 + tensorflow/python/keras/layers/merge.py | 4 + .../python/kernel_tests/dynamic_stitch_op_test.py | 1 - tensorflow/python/lib/core/numpy.h | 2 + tensorflow/python/lib/core/py_util.cc | 2 + tensorflow/python/ops/image_ops_impl.py | 103 +- tensorflow/python/ops/image_ops_test.py | 96 ++ tensorflow/python/ops/math_ops_test.py | 9 + tensorflow/python/ops/special_math_ops.py | 2 + tensorflow/python/ops/special_math_ops_test.py | 10 +- tensorflow/python/ops/state_ops.py | 4 +- tensorflow/python/training/checkpoint_utils.py | 2 +- tensorflow/tf_framework_version_script.lds | 11 + tensorflow/tools/api/golden/tensorflow.image.pbtxt | 4 + .../golden/tensorflow.keras.layers.-minimum.pbtxt | 176 ++++ .../golden/tensorflow.keras.layers.-subtract.pbtxt | 176 ++++ .../tools/api/golden/tensorflow.keras.layers.pbtxt | 16 + tensorflow/tools/ci_build/Dockerfile.cpu.ppc64le | 19 + tensorflow/tools/ci_build/Dockerfile.gpu.ppc64le | 27 + tensorflow/tools/ci_build/ci_build.sh | 4 +- .../tools/ci_build/ci_parameterized_build.sh | 8 +- .../ci_build/install/install_bazel_from_source.sh | 40 + .../install/install_buildifier_from_source.sh | 30 + .../ci_build/install/install_golang_ppc64le.sh | 22 + .../tools/ci_build/install/install_pip_packages.sh | 4 + .../install/install_python3.5_pip_packages.sh | 3 + .../install/install_python3.6_pip_packages.sh | 6 +- tensorflow/tools/ci_build/linux/gpu/run_mkl.sh | 47 + .../tools/ci_build/linux/mkl/basic-mkl-gpu-test.sh | 29 + tensorflow/tools/git/gen_git_source.py | 11 +- tensorflow/tools/lib_package/BUILD | 4 +- tensorflow/tools/pip_package/BUILD | 2 +- tensorflow/tools/pip_package/build_pip_package.sh | 21 +- tensorflow/tools/pip_package/setup.py | 4 +- tensorflow/workspace.bzl | 80 +- third_party/curl.BUILD | 22 +- third_party/flatbuffers/flatbuffers.BUILD | 2 + third_party/jsoncpp.BUILD | 7 +- third_party/libxsmm.BUILD | 2 +- 145 files changed, 6294 insertions(+), 1701 deletions(-) create mode 100644 tensorflow/contrib/cmake/external/mkl.cmake create mode 100644 tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb create mode 100644 tensorflow/contrib/opt/python/training/weight_decay_optimizers.py create mode 100644 tensorflow/contrib/opt/python/training/weight_decay_optimizers_test.py create mode 100644 tensorflow/contrib/tensorrt/convert/utils.h create mode 100644 tensorflow/core/api_def/java_api/api_def_Assert.pbtxt create mode 100644 tensorflow/core/api_def/java_api/api_def_Const.pbtxt create mode 100644 tensorflow/core/api_def/java_api/api_def_Switch.pbtxt create mode 100644 tensorflow/docs_src/get_started/index.md create mode 100644 tensorflow/go/attrs.go create mode 100644 tensorflow/go/attrs_test.go create mode 100644 tensorflow/java/maven/hadoop/pom.xml create mode 100644 tensorflow/java/maven/spark-connector/pom.xml create mode 100644 tensorflow/tf_framework_version_script.lds create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-minimum.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-subtract.pbtxt create mode 100644 tensorflow/tools/ci_build/Dockerfile.cpu.ppc64le create mode 100644 tensorflow/tools/ci_build/Dockerfile.gpu.ppc64le create mode 100755 tensorflow/tools/ci_build/install/install_bazel_from_source.sh create mode 100755 tensorflow/tools/ci_build/install/install_buildifier_from_source.sh create mode 100755 tensorflow/tools/ci_build/install/install_golang_ppc64le.sh create mode 100755 tensorflow/tools/ci_build/linux/gpu/run_mkl.sh create mode 100755 tensorflow/tools/ci_build/linux/mkl/basic-mkl-gpu-test.sh (limited to 'configure.py') diff --git a/.gitignore b/.gitignore index 828bbe9bd3..b5306b8b79 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,7 @@ __pycache__ cmake_build/ .idea/** /build/ +[Bb]uild/ /tensorflow/core/util/version_info.cc /tensorflow/python/framework/fast_tensor_util.cpp Pods diff --git a/configure.py b/configure.py index ada342a50a..ad585fa52e 100644 --- a/configure.py +++ b/configure.py @@ -943,6 +943,35 @@ def set_tf_cudnn_version(environ_cp): write_action_env_to_bazelrc('TF_CUDNN_VERSION', tf_cudnn_version) +def is_cuda_compatible(lib, cuda_ver, cudnn_ver): + """Check compatibility between given library and cudnn/cudart libraries.""" + ldd_bin = which('ldd') or '/usr/bin/ldd' + ldd_out = run_shell([ldd_bin, lib], True) + ldd_out = ldd_out.split(os.linesep) + cudnn_pattern = re.compile('.*libcudnn.so\\.?(.*) =>.*$') + cuda_pattern = re.compile('.*libcudart.so\\.?(.*) =>.*$') + cudnn = None + cudart = None + cudnn_ok = True # assume no cudnn dependency by default + cuda_ok = True # assume no cuda dependency by default + for line in ldd_out: + if 'libcudnn.so' in line: + cudnn = cudnn_pattern.search(line) + cudnn_ok = False + elif 'libcudart.so' in line: + cudart = cuda_pattern.search(line) + cuda_ok = False + if cudnn and len(cudnn.group(1)): + cudnn = convert_version_to_int(cudnn.group(1)) + if cudart and len(cudart.group(1)): + cudart = convert_version_to_int(cudart.group(1)) + if cudnn is not None: + cudnn_ok = (cudnn == cudnn_ver) + if cudart is not None: + cuda_ok = (cudart == cuda_ver) + return cudnn_ok and cuda_ok + + def set_tf_tensorrt_install_path(environ_cp): """Set TENSORRT_INSTALL_PATH and TF_TENSORRT_VERSION. @@ -959,8 +988,8 @@ def set_tf_tensorrt_install_path(environ_cp): raise ValueError('Currently TensorRT is only supported on Linux platform.') # Ask user whether to add TensorRT support. - if str(int(get_var( - environ_cp, 'TF_NEED_TENSORRT', 'TensorRT', False))) != '1': + if str(int(get_var(environ_cp, 'TF_NEED_TENSORRT', 'TensorRT', + False))) != '1': return for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS): @@ -973,47 +1002,29 @@ def set_tf_tensorrt_install_path(environ_cp): # Result returned from "read" will be used unexpanded. That make "~" # unusable. Going through one more level of expansion to handle that. - trt_install_path = os.path.realpath( - os.path.expanduser(trt_install_path)) + trt_install_path = os.path.realpath(os.path.expanduser(trt_install_path)) def find_libs(search_path): """Search for libnvinfer.so in "search_path".""" fl = set() if os.path.exists(search_path) and os.path.isdir(search_path): - fl.update([os.path.realpath(os.path.join(search_path, x)) - for x in os.listdir(search_path) if 'libnvinfer.so' in x]) + fl.update([ + os.path.realpath(os.path.join(search_path, x)) + for x in os.listdir(search_path) + if 'libnvinfer.so' in x + ]) return fl possible_files = find_libs(trt_install_path) possible_files.update(find_libs(os.path.join(trt_install_path, 'lib'))) possible_files.update(find_libs(os.path.join(trt_install_path, 'lib64'))) - - def is_compatible(tensorrt_lib, cuda_ver, cudnn_ver): - """Check the compatibility between tensorrt and cudnn/cudart libraries.""" - ldd_bin = which('ldd') or '/usr/bin/ldd' - ldd_out = run_shell([ldd_bin, tensorrt_lib]).split(os.linesep) - cudnn_pattern = re.compile('.*libcudnn.so\\.?(.*) =>.*$') - cuda_pattern = re.compile('.*libcudart.so\\.?(.*) =>.*$') - cudnn = None - cudart = None - for line in ldd_out: - if 'libcudnn.so' in line: - cudnn = cudnn_pattern.search(line) - elif 'libcudart.so' in line: - cudart = cuda_pattern.search(line) - if cudnn and len(cudnn.group(1)): - cudnn = convert_version_to_int(cudnn.group(1)) - if cudart and len(cudart.group(1)): - cudart = convert_version_to_int(cudart.group(1)) - return (cudnn == cudnn_ver) and (cudart == cuda_ver) - cuda_ver = convert_version_to_int(environ_cp['TF_CUDA_VERSION']) cudnn_ver = convert_version_to_int(environ_cp['TF_CUDNN_VERSION']) nvinfer_pattern = re.compile('.*libnvinfer.so.?(.*)$') highest_ver = [0, None, None] for lib_file in possible_files: - if is_compatible(lib_file, cuda_ver, cudnn_ver): + if is_cuda_compatible(lib_file, cuda_ver, cudnn_ver): matches = nvinfer_pattern.search(lib_file) if len(matches.groups()) == 0: continue @@ -1029,12 +1040,13 @@ def set_tf_tensorrt_install_path(environ_cp): # Try another alternative from ldconfig. ldconfig_bin = which('ldconfig') or '/sbin/ldconfig' ldconfig_output = run_shell([ldconfig_bin, '-p']) - search_result = re.search( - '.*libnvinfer.so\\.?([0-9.]*).* => (.*)', ldconfig_output) + search_result = re.search('.*libnvinfer.so\\.?([0-9.]*).* => (.*)', + ldconfig_output) if search_result: libnvinfer_path_from_ldconfig = search_result.group(2) if os.path.exists(libnvinfer_path_from_ldconfig): - if is_compatible(libnvinfer_path_from_ldconfig, cuda_ver, cudnn_ver): + if is_cuda_compatible(libnvinfer_path_from_ldconfig, cuda_ver, + cudnn_ver): trt_install_path = os.path.dirname(libnvinfer_path_from_ldconfig) tf_tensorrt_version = search_result.group(1) break diff --git a/tensorflow/BUILD b/tensorflow/BUILD index e4530a5962..233fe21fbf 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -154,6 +154,12 @@ config_setting( visibility = ["//visibility:public"], ) +config_setting( + name = "linux_s390x", + values = {"cpu": "s390x"}, + visibility = ["//visibility:public"], +) + config_setting( name = "debug", values = { @@ -459,6 +465,15 @@ filegroup( tf_cc_shared_object( name = "libtensorflow_framework.so", framework_so = [], + linkopts = select({ + "//tensorflow:darwin": [], + "//tensorflow:windows": [], + "//tensorflow:windows_msvc": [], + "//conditions:default": [ + "-Wl,--version-script", # This line must be directly followed by the version_script.lds file + "$(location //tensorflow:tf_framework_version_script.lds)", + ], + }), linkstatic = 1, visibility = ["//visibility:public"], deps = [ @@ -468,6 +483,7 @@ tf_cc_shared_object( "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry_impl", "//tensorflow/core:lib_internal_impl", "//tensorflow/stream_executor:stream_executor_impl", + "//tensorflow:tf_framework_version_script.lds", ] + tf_additional_binary_deps(), ) @@ -571,3 +587,13 @@ py_library( visibility = ["//visibility:public"], deps = ["//tensorflow/python:no_contrib"], ) + +cc_library( + name = "grpc", + deps = ["@grpc"], +) + +cc_library( + name = "grpc++", + deps = ["@grpc//:grpc++"], +) diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc index 37c8302e08..5c218d3f25 100644 --- a/tensorflow/c/c_api.cc +++ b/tensorflow/c/c_api.cc @@ -2068,7 +2068,8 @@ TF_ImportGraphDefResults* TF_GraphImportGraphDefWithResults( TF_Graph* graph, const TF_Buffer* graph_def, const TF_ImportGraphDefOptions* options, TF_Status* status) { GraphDef def; - if (!def.ParseFromArray(graph_def->data, graph_def->length)) { + if (!tensorflow::ParseProtoUnlimited(&def, graph_def->data, + graph_def->length)) { status->status = InvalidArgument("Invalid GraphDef"); return nullptr; } @@ -2098,7 +2099,8 @@ void TF_GraphImportGraphDefWithReturnOutputs( return; } GraphDef def; - if (!def.ParseFromArray(graph_def->data, graph_def->length)) { + if (!tensorflow::ParseProtoUnlimited(&def, graph_def->data, + graph_def->length)) { status->status = InvalidArgument("Invalid GraphDef"); return; } diff --git a/tensorflow/compiler/aot/codegen.cc b/tensorflow/compiler/aot/codegen.cc index 0025842aea..28070d60db 100644 --- a/tensorflow/compiler/aot/codegen.cc +++ b/tensorflow/compiler/aot/codegen.cc @@ -287,7 +287,7 @@ Status GenerateHeader(const CodegenOpts& opts, const tf2xla::Config& config, TF_RETURN_IF_ERROR(ValidateFeedFetchCppNames(config)); const int64 result_index = compile_result.aot->result_buffer_index(); const xla::BufferSizes& temp_sizes = compile_result.aot->buffer_sizes(); - if (result_index < 0 || result_index > temp_sizes.size()) { + if (result_index < 0 || result_index >= temp_sizes.size()) { return errors::InvalidArgument("result index: ", result_index, " is outside the range of temp sizes: [0,", temp_sizes.size(), ")"); diff --git a/tensorflow/compiler/xla/rpc/BUILD b/tensorflow/compiler/xla/rpc/BUILD index 1775666652..0b1cec1925 100644 --- a/tensorflow/compiler/xla/rpc/BUILD +++ b/tensorflow/compiler/xla/rpc/BUILD @@ -39,10 +39,10 @@ tf_cc_binary( srcs = ["grpc_service_main.cc"], deps = [ ":grpc_service", + "//tensorflow:grpc++", "//tensorflow/compiler/xla/service:cpu_plugin", "//tensorflow/core:framework_internal", "//tensorflow/core:lib", - "@grpc//:grpc++", ], ) @@ -54,6 +54,7 @@ tf_cc_test( ], deps = [ ":grpc_stub", + "//tensorflow:grpc++", "//tensorflow/compiler/xla/client", "//tensorflow/compiler/xla/client/xla_client:xla_builder", "//tensorflow/compiler/xla/tests:literal_test_util", @@ -61,7 +62,6 @@ tf_cc_test( "//tensorflow/core:lib", "//tensorflow/core:test", "//tensorflow/core:test_main", - "@grpc//:grpc++", ], ) @@ -71,9 +71,9 @@ cc_library( hdrs = ["grpc_service.h"], deps = [ ":xla_service_proto", + "//tensorflow:grpc++", "//tensorflow/compiler/xla/service", "//tensorflow/compiler/xla/service:platform_util", "//tensorflow/core/distributed_runtime/rpc:grpc_util", - "@grpc//:grpc++", ], ) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index ae0749edb9..fe99f700d2 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -2550,7 +2550,6 @@ cc_library( name = "hlo_tfgraph_builder", srcs = ["hlo_tfgraph_builder.cc"], hdrs = ["hlo_tfgraph_builder.h"], - visibility = ["//tensorflow/compiler/xla/tools:__pkg__"], deps = [ ":hlo", "//tensorflow/compiler/xla:literal_util", diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 088c97fbe3..5aaeec802f 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -1515,6 +1515,7 @@ bool HloInstruction::IdenticalSlowPath( // Remaining instructions with special values. case HloOpcode::kCall: + return eq_computations(to_apply(), other.to_apply()); case HloOpcode::kConditional: return eq_computations(true_computation(), other.true_computation()) && eq_computations(false_computation(), other.false_computation()); diff --git a/tensorflow/compiler/xla/service/hlo_instruction_test.cc b/tensorflow/compiler/xla/service/hlo_instruction_test.cc index e1c5123774..d8ca99dfd1 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction_test.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction_test.cc @@ -924,6 +924,40 @@ TEST_F(HloInstructionTest, IdenticalInstructions) { *HloInstruction::CreateBinary(shape, HloOpcode::kDivide, op1, op2))); } +TEST_F(HloInstructionTest, IdenticalCallInstructions) { + const char* const hlo_string = R"( +HloModule Module + +subcomp1 (x: f32[]) -> f32[] { + x = f32[] parameter(0) + ROOT n = f32[] sine(x) +} + +subcomp2 (x: f32[]) -> f32[] { + x = f32[] parameter(0) + ROOT n = f32[] cosine(x) +} + +ENTRY entry (param: f32[]) -> (f32[], f32[], f32[]) { + p = f32[] parameter(0) + t1 = f32[] call(p), to_apply=subcomp1 + t2 = f32[] call(p), to_apply=subcomp1 + t3 = f32[] call(p), to_apply=subcomp2 + ROOT t = (f32[], f32[], f32[]) tuple(t1, t2, t3) + } +)"; + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + ParseHloString(hlo_string)); + + auto* root = module->entry_computation()->root_instruction(); + auto* t1 = root->operand(0); + auto* t2 = root->operand(1); + auto* t3 = root->operand(2); + + EXPECT_TRUE(StructuralEqual(*t1, *t2)); + EXPECT_FALSE(StructuralEqual(*t1, *t3)); +} + TEST_F(HloInstructionTest, FunctionVisitor) { // Verify the function visitor HloInstruction::Accept visits all instructions // from a root properly given the following graph: diff --git a/tensorflow/contrib/autograph/converters/BUILD b/tensorflow/contrib/autograph/converters/BUILD index 931ff62064..b2e2e27673 100644 --- a/tensorflow/contrib/autograph/converters/BUILD +++ b/tensorflow/contrib/autograph/converters/BUILD @@ -120,7 +120,10 @@ py_test( name = "decorators_test", srcs = ["decorators_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], + tags = [ + "no_pip", + "no_windows", + ], deps = [ ":converters", "//tensorflow/contrib/autograph/core:test_lib", diff --git a/tensorflow/contrib/autograph/operators/control_flow.py b/tensorflow/contrib/autograph/operators/control_flow.py index 671c9ccc13..988df70157 100644 --- a/tensorflow/contrib/autograph/operators/control_flow.py +++ b/tensorflow/contrib/autograph/operators/control_flow.py @@ -51,7 +51,7 @@ def for_stmt(iter_, extra_test, body, init_state): Args: iter_: The entity being iterated over. extra_test: Callable with the state as arguments, and boolean return type. - An additionnal loop condition. + An additional loop condition. body: Callable with the iterate and the state as arguments, and state as return type. The actual loop body. init_state: Tuple containing the initial state. diff --git a/tensorflow/contrib/autograph/pyct/static_analysis/cfg.py b/tensorflow/contrib/autograph/pyct/static_analysis/cfg.py index 358d56ce20..4acc4ed66a 100644 --- a/tensorflow/contrib/autograph/pyct/static_analysis/cfg.py +++ b/tensorflow/contrib/autograph/pyct/static_analysis/cfg.py @@ -286,7 +286,7 @@ class Forward(object): # TODO(alexbw): see if we can simplify by visiting breadth-first def visit(self, node): - """Depth-first walking the CFG, applying dataflow information propagtion.""" + """Depth-first walking the CFG, applying dataflow info propagation.""" # node.value is None only for the exit CfgNode. if not node.value: return diff --git a/tensorflow/contrib/autograph/pyct/transformer.py b/tensorflow/contrib/autograph/pyct/transformer.py index 3328dde7aa..7655811830 100644 --- a/tensorflow/contrib/autograph/pyct/transformer.py +++ b/tensorflow/contrib/autograph/pyct/transformer.py @@ -218,7 +218,7 @@ class Base(gast.NodeTransformer): # TODO(mdan): Once we have error tracing, we may be able to just go to SSA. def apply_to_single_assignments(self, targets, values, apply_fn): - """Applies a fuction to each individual assignment. + """Applies a function to each individual assignment. This function can process a possibly-unpacked (e.g. a, b = c, d) assignment. It tries to break down the unpacking if possible. In effect, it has the same @@ -246,7 +246,7 @@ class Base(gast.NodeTransformer): targets field of an ast.Assign node. values: an AST node. apply_fn: a function of a single argument, which will be called with the - respective nodes of each single assignment. The signaure is + respective nodes of each single assignment. The signature is apply_fn(target, value), no return value. """ if not isinstance(targets, (list, tuple)): diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index e524e9e743..4ca7a1b28c 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -336,40 +336,14 @@ endif() # MKL Support if (tensorflow_ENABLE_MKL_SUPPORT) add_definitions(-DINTEL_MKL -DEIGEN_USE_VML) - if (WIN32) - find_path(MKL_HOME_PLATFORM mkl - PATHS ${MKL_HOME} ${MKL_HOME}/../ ${MKL_HOME}/../../ - $ENV{MKLROOT} $ENV{MKLROOT}/../ $ENV{MKLROOT}/../../ - PATH_SUFFIXES windows) - set(MKL_INCLUDE_DIRS ${MKL_HOME_PLATFORM}/mkl/include) - set(MKL_LINK_DIRS - ${MKL_HOME_PLATFORM}/mkl/lib/intel64 - ${MKL_HOME_PLATFORM}/tbb/lib/intel64/vc_mt - ${MKL_HOME_PLATFORM}/compiler/lib/intel64 - ${MKL_HOME_PLATFORM}/mkl/tools/builder/lib) - set(MKL_REDIST_DLL_DIRS - ${MKL_HOME_PLATFORM}/redist/intel64/mkl - ${MKL_HOME_PLATFORM}/redist/intel64/tbb/vc_mt - ${MKL_HOME_PLATFORM}/redist/intel64/compiler) - list(APPEND tensorflow_EXTERNAL_LIBRARIES - mkl_intel_lp64_dll mkl_sequential_dll mkl_core_dll mkl_rt mkl_cdll_intel64) - endif() - if (UNIX) - # Fix me: complete the path on linux - find_path(MKL_HOME_PLATFORM mkl - HINTS ${MKL_HOME} ${MKL_HOME}/../ ${MKL_HOME}/../../ - $ENV{MKLROOT} $ENV{MKLROOT}/../ $ENV{MKLROOT}/../../ - PATH_SUFFIXES linux) - set(MKL_INCLUDE_DIRS ${MKL_HOME_PLATFORM}/mkl/include) - set(MKL_LINK_DIRS) # incompleted - set(MKL_REDIST_SO_DIRS) # incompleted - endif() - include_directories(${MKL_INCLUDE_DIRS}) - link_directories(${MKL_LINK_DIRS}) + include(mkl) + list(APPEND tensorflow_EXTERNAL_LIBRARIES ${mkl_STATIC_LIBRARIES}) + list(APPEND tensorflow_EXTERNAL_DEPENDENCIES mkl_copy_shared_to_destination) + include_directories(${mkl_INCLUDE_DIRS}) if (tensorflow_ENABLE_MKLDNN_SUPPORT) include(mkldnn) list(APPEND tensorflow_EXTERNAL_LIBRARIES ${mkldnn_STATIC_LIBRARIES}) - list(APPEND tensorflow_EXTERNAL_DEPENDENCIES mkldnn) + list(APPEND tensorflow_EXTERNAL_DEPENDENCIES mkldnn_copy_shared_to_destination) include_directories(${mkldnn_INCLUDE_DIRS}) else (tensorflow_ENABLE_MKLDNN_SUPPORT) add_definitions(-DINTEL_MKL_ML) diff --git a/tensorflow/contrib/cmake/external/double_conversion.cmake b/tensorflow/contrib/cmake/external/double_conversion.cmake index 527ccdc8d8..5c5adaf579 100644 --- a/tensorflow/contrib/cmake/external/double_conversion.cmake +++ b/tensorflow/contrib/cmake/external/double_conversion.cmake @@ -16,15 +16,15 @@ include (ExternalProject) set(double_conversion_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/double_conversion/src/double_conversion) set(double_conversion_URL https://github.com/google/double-conversion.git) -set(double_conversion_TAG 5664746) +set(double_conversion_TAG 3992066a95b823efc8ccc1baf82a1cfc73f6e9b8) set(double_conversion_BUILD ${double_conversion_INCLUDE_DIR}) set(double_conversion_LIBRARIES ${double_conversion_BUILD}/double-conversion/libdouble-conversion.so) set(double_conversion_INCLUDES ${double_conversion_BUILD}) if(WIN32) - set(double_conversion_STATIC_LIBRARIES ${double_conversion_BUILD}/double-conversion/$(Configuration)/double-conversion.lib) + set(double_conversion_STATIC_LIBRARIES ${double_conversion_BUILD}/$(Configuration)/double-conversion.lib) else() - set(double_conversion_STATIC_LIBRARIES ${double_conversion_BUILD}/double-conversion/libdouble-conversion.a) + set(double_conversion_STATIC_LIBRARIES ${double_conversion_BUILD}/libdouble-conversion.a) endif() set(double_conversion_HEADERS diff --git a/tensorflow/contrib/cmake/external/mkl.cmake b/tensorflow/contrib/cmake/external/mkl.cmake new file mode 100644 index 0000000000..a172e3a41a --- /dev/null +++ b/tensorflow/contrib/cmake/external/mkl.cmake @@ -0,0 +1,68 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +include (ExternalProject) + +# NOTE: Different from mkldnn.cmake, this file is meant to download mkl libraries +set(mkl_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/mkl/src/mkl/include) +set(mkl_BIN_DIRS ${CMAKE_CURRENT_BINARY_DIR}/mkl/bin) +set(mkl_WIN mklml_win_2018.0.3.20180406.zip) # match for v0.14 +set(mkl_MAC mklml_mac_2018.0.3.20180406.tgz) +set(mkl_LNX mklml_lnx_2018.0.3.20180406.tgz) +set(mkl_TAG v0.14) +set(mkl_URL https://github.com/intel/mkl-dnn/releases) + +if (WIN32) + set(mkl_DOWNLOAD_URL ${mkl_URL}/download/${mkl_TAG}/${mkl_WIN}) + list(APPEND mkl_STATIC_LIBRARIES + ${CMAKE_CURRENT_BINARY_DIR}/mkl/src/mkl/lib/mklml.lib) + list(APPEND mkl_STATIC_LIBRARIES + ${CMAKE_CURRENT_BINARY_DIR}/mkl/src/mkl/lib/libiomp5md.lib) + list(APPEND mkl_SHARED_LIBRARIES + ${CMAKE_CURRENT_BINARY_DIR}/mkl/src/mkl/lib/mklml.dll) + list(APPEND mkl_SHARED_LIBRARIES + ${CMAKE_CURRENT_BINARY_DIR}/mkl/src/mkl/lib/libiomp5md.dll) +elseif (UNIX) + set(mkl_DOWNLOAD_URL ${mkl_URL}/download/${mkl_TAG}/${mkl_LNX}) + list(APPEND mkl_SHARED_LIBRARIES + ${CMAKE_CURRENT_BINARY_DIR}/mkl/src/mkl/lib/libiomp5.so) + list(APPEND mkl_SHARED_LIBRARIES + ${CMAKE_CURRENT_BINARY_DIR}/mkl/src/mkl/lib/libmklml_gnu.so) + list(APPEND mkl_SHARED_LIBRARIES + ${CMAKE_CURRENT_BINARY_DIR}/mkl/src/mkl/lib/libmklml_intel.so) +elseif (APPLE) + set(mkl_DOWNLOAD_URL ${mkl_URL}/download/${mkl_TAG}/${mkl_MAC}) + #TODO need more information +endif () + +ExternalProject_Add(mkl + PREFIX mkl + URL ${mkl_DOWNLOAD_URL} + DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" + UPDATE_COMMAND "" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "") + +# put mkl dynamic libraries in one bin directory +add_custom_target(mkl_create_destination_dir + COMMAND ${CMAKE_COMMAND} -E make_directory ${mkl_BIN_DIRS} + DEPENDS mkl) + +add_custom_target(mkl_copy_shared_to_destination DEPENDS mkl_create_destination_dir) + +foreach(dll_file ${mkl_SHARED_LIBRARIES}) + add_custom_command(TARGET mkl_copy_shared_to_destination PRE_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${dll_file} ${mkl_BIN_DIRS}) +endforeach() diff --git a/tensorflow/contrib/cmake/external/mkldnn.cmake b/tensorflow/contrib/cmake/external/mkldnn.cmake index a639fdee36..8123ee1f39 100644 --- a/tensorflow/contrib/cmake/external/mkldnn.cmake +++ b/tensorflow/contrib/cmake/external/mkldnn.cmake @@ -22,8 +22,11 @@ set(mkldnn_TAG 3063b2e4c943983f6bf5f2fb9a490d4a998cd291) if(WIN32) if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*") set(mkldnn_STATIC_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/mkldnn/src/mkldnn/src/Release/mkldnn.lib) + set(mkldnn_SHARED_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/mkldnn/src/mkldnn/src/Release/mkldnn.dll) + set(mkldnn_BUILD ${CMAKE_CURRENT_BINARY_DIR}/mkldnn/src/mkldnn/src/Release) else() set(mkldnn_STATIC_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/mkldnn/src/mkldnn/src/mkldnn.lib) + set(mkldnn_SHARED_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/mkldnn/src/mkldnn/src/mkldnn.dll) endif() else() set(mkldnn_STATIC_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/mkldnn/src/mkldnn/src/libmkldnn.a) @@ -31,6 +34,7 @@ endif() ExternalProject_Add(mkldnn PREFIX mkldnn + DEPENDS mkl GIT_REPOSITORY ${mkldnn_URL} GIT_TAG ${mkldnn_TAG} DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" @@ -40,5 +44,11 @@ ExternalProject_Add(mkldnn CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF - -DMKLINC:STRING=${MKL_INCLUDE_DIRS} + -DMKLINC:STRING=${mkl_INCLUDE_DIRS} ) + +# since mkldnn depends on mkl, copy the mkldnn.dll together with mklml.dll to mkl_bin_dirs +add_custom_target(mkldnn_copy_shared_to_destination DEPENDS mkldnn) + +add_custom_command(TARGET mkldnn_copy_shared_to_destination PRE_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${mkldnn_SHARED_LIBRARIES} ${mkl_BIN_DIRS}) diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index df6702a42c..e3b59001bc 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -755,26 +755,65 @@ set(api_init_list_file "${tensorflow_source_dir}/api_init_files_list.txt") file(WRITE "${api_init_list_file}" "${api_init_files}") # Run create_python_api.py to generate __init__.py files. -add_custom_command( - OUTPUT ${api_init_files} - DEPENDS tf_python_ops tf_python_copy_scripts_to_destination pywrap_tensorflow_internal tf_python_touchup_modules tf_extension_ops - - # tensorflow/__init__.py depends on files generated in this step. So, remove it while - # this step is running since the files aren't there yet. - COMMAND ${CMAKE_COMMAND} -E remove -f ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/__init__.py - - # Run create_python_api.py to generate API init files. - COMMAND ${CMAKE_COMMAND} -E env PYTHONPATH=${CMAKE_CURRENT_BINARY_DIR}/tf_python ${PYTHON_EXECUTABLE} - "${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/tools/api/generator/create_python_api.py" - "--root_init_template=${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/api_template.__init__.py" - "--apidir=${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow" - "--package=tensorflow.python" - "--apiname=tensorflow" - "${api_init_list_file}" - COMMENT "Generating __init__.py files for Python API." - WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/tf_python" -) +### TODO +# In order to download and compile MKL/MKL-DNN automatically in cmake script, mkl-built libraries should be added to system path +# to be loaded by python executor. However `add_custom_command` has an issue with `COMMAND ${CMAKE_COMMAND} -E env PATH=`, where +# arguments of multiple paths (such as D:/;D:/mkl) will be parsed in to seperate string without semicolon and that command fail to +# recongnize paths. As CUDA isn't built with MKL, the MKL built directory is the only path to this command to work around that issue. +# To not override the CUDA and system path in other circumstances, `if-else` branch used here to handle this problem, +# and should be removed if the path issue can be resolved. +### + +if (tensorflow_ENABLE_MKL_SUPPORT) + # add mkl dist dlls to system path for python + # TODO: In current cmake version, PY_RUNTIME_ENV behaves strange with multiple paths, + # so we have to specify only one path in it to work around the issue. We need this if/else + # to protect overwriting CUDA environments + set(PY_RUNTIME_ENV ${mkl_BIN_DIRS}) + add_custom_command( + OUTPUT ${api_init_files} + DEPENDS tf_python_ops tf_python_copy_scripts_to_destination pywrap_tensorflow_internal tf_python_touchup_modules tf_extension_ops + + # tensorflow/__init__.py depends on files generated in this step. So, remove it while + # this step is running since the files aren't there yet. + COMMAND ${CMAKE_COMMAND} -E remove -f ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/__init__.py + + # Run create_python_api.py to generate API init files. + COMMAND ${CMAKE_COMMAND} -E env PYTHONPATH=${CMAKE_CURRENT_BINARY_DIR}/tf_python PATH=${PY_RUNTIME_ENV} ${PYTHON_EXECUTABLE} + "${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/tools/api/generator/create_python_api.py" + "--root_init_template=${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/api_template.__init__.py" + "--apidir=${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow" + "--package=tensorflow.python" + "--apiname=tensorflow" + "${api_init_list_file}" + + COMMENT "Generating __init__.py files for Python API." + WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/tf_python" + VERBATIM + ) +else (tensorflow_ENABLE_MKL_SUPPORT) + add_custom_command( + OUTPUT ${api_init_files} + DEPENDS tf_python_ops tf_python_copy_scripts_to_destination pywrap_tensorflow_internal tf_python_touchup_modules tf_extension_ops + + # tensorflow/__init__.py depends on files generated in this step. So, remove it while + # this step is running since the files aren't there yet. + COMMAND ${CMAKE_COMMAND} -E remove -f ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/__init__.py + + # Run create_python_api.py to generate API init files. + COMMAND ${CMAKE_COMMAND} -E env PYTHONPATH=${CMAKE_CURRENT_BINARY_DIR}/tf_python ${PYTHON_EXECUTABLE} + "${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/tools/api/generator/create_python_api.py" + "--root_init_template=${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/api_template.__init__.py" + "--apidir=${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow" + "--package=tensorflow.python" + "--apiname=tensorflow" + "${api_init_list_file}" + + COMMENT "Generating __init__.py files for Python API." + WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/tf_python" + ) +endif (tensorflow_ENABLE_MKL_SUPPORT) add_custom_target(tf_python_api SOURCES ${api_init_files}) add_dependencies(tf_python_api tf_python_ops) diff --git a/tensorflow/contrib/cmake/tf_shared_lib.cmake b/tensorflow/contrib/cmake/tf_shared_lib.cmake index 38f40452b5..fdf522f1fd 100644 --- a/tensorflow/contrib/cmake/tf_shared_lib.cmake +++ b/tensorflow/contrib/cmake/tf_shared_lib.cmake @@ -145,3 +145,8 @@ install(DIRECTORY ${tensorflow_source_dir}/third_party/eigen3/ # unsupported Eigen directory install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/eigen/src/eigen/unsupported/Eigen/ DESTINATION include/unsupported/Eigen) +# mkl +if (tensorflow_ENABLE_MKL_SUPPORT) + install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/mkl/src/mkl/include/ + DESTINATION include/mkl) +endif (tensorflow_ENABLE_MKL_SUPPORT) diff --git a/tensorflow/contrib/constrained_optimization/README.md b/tensorflow/contrib/constrained_optimization/README.md index c65a150464..cb1dd7d836 100644 --- a/tensorflow/contrib/constrained_optimization/README.md +++ b/tensorflow/contrib/constrained_optimization/README.md @@ -46,7 +46,7 @@ document. Imagine that we want to constrain the recall of a binary classifier to be at least 90%. Since the recall is proportional to the number of true positive classifications, which itself is a sum of indicator functions, this constraint -is non-differentible, and therefore cannot be used in a problem that will be +is non-differentiable, and therefore cannot be used in a problem that will be optimized using a (stochastic) gradient-based algorithm. For this and similar problems, TFCO supports so-called *proxy constraints*, diff --git a/tensorflow/contrib/constrained_optimization/python/swap_regret_optimizer.py b/tensorflow/contrib/constrained_optimization/python/swap_regret_optimizer.py index 04014ab4ae..3791dae8d7 100644 --- a/tensorflow/contrib/constrained_optimization/python/swap_regret_optimizer.py +++ b/tensorflow/contrib/constrained_optimization/python/swap_regret_optimizer.py @@ -169,8 +169,8 @@ def _project_stochastic_matrix_wrt_euclidean_norm(matrix): del old_inactive # Needed by the condition, but not the body. iteration += 1 scale = (1.0 - standard_ops.reduce_sum( - matrix, axis=0, keep_dims=True)) / standard_ops.maximum( - 1.0, standard_ops.reduce_sum(inactive, axis=0, keep_dims=True)) + matrix, axis=0, keepdims=True)) / standard_ops.maximum( + 1.0, standard_ops.reduce_sum(inactive, axis=0, keepdims=True)) matrix += scale * inactive new_inactive = standard_ops.to_float(matrix > 0) matrix *= new_inactive @@ -206,10 +206,10 @@ def _project_log_stochastic_matrix_wrt_kl_divergence(log_matrix): # For numerical reasons, make sure that the largest matrix element is zero # before exponentiating. - log_matrix -= standard_ops.reduce_max(log_matrix, axis=0, keep_dims=True) + log_matrix -= standard_ops.reduce_max(log_matrix, axis=0, keepdims=True) log_matrix -= standard_ops.log( standard_ops.reduce_sum( - standard_ops.exp(log_matrix), axis=0, keep_dims=True)) + standard_ops.exp(log_matrix), axis=0, keepdims=True)) return log_matrix diff --git a/tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py index 33c48e20be..5590a4bf78 100644 --- a/tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py @@ -58,6 +58,7 @@ class SlideDatasetTest(test.TestCase): [t.shape.as_list() for t in get_next]) with self.test_session() as sess: + # stride < window_size. # Slide over a finite input, where the window_size divides the # total number of elements. sess.run(init_op, feed_dict={count: 20, window_size: 14, stride: 7}) @@ -71,11 +72,9 @@ class SlideDatasetTest(test.TestCase): result_component[j]) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) - # Slide over a finite input, where the window_size does not # divide the total number of elements. sess.run(init_op, feed_dict={count: 20, window_size: 17, stride: 9}) - num_batches = (20 * 7 - 17) // 9 + 1 for i in range(num_batches): result = sess.run(get_next) @@ -86,6 +85,41 @@ class SlideDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) + # stride == window_size. + sess.run(init_op, feed_dict={count: 20, window_size: 14, stride: 14}) + num_batches = 20 * 7 // 14 + for i in range(num_batches): + result = sess.run(get_next) + for component, result_component in zip(components, result): + for j in range(14): + self.assertAllEqual(component[(i*14 + j) % 7]**2, + result_component[j]) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + # stride > window_size. + sess.run(init_op, feed_dict={count: 20, window_size: 10, stride: 14}) + num_batches = 20 * 7 // 14 + for i in range(num_batches): + result = sess.run(get_next) + for component, result_component in zip(components, result): + for j in range(10): + self.assertAllEqual(component[(i*14 + j) % 7]**2, + result_component[j]) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + # Drop the last batch which is smaller than window_size. + sess.run(init_op, feed_dict={count: 20, window_size: 14, stride: 19}) + num_batches = (20 * 7 - 7) // 19 # = 19 * 7 // 19 + for i in range(num_batches): + result = sess.run(get_next) + for component, result_component in zip(components, result): + for j in range(14): + self.assertAllEqual(component[(i*19 + j) % 7]**2, + result_component[j]) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + # Slide over a finite input, which is less than window_size, # should fail straight away. sess.run(init_op, feed_dict={count: 1, window_size: 10, stride: 4}) @@ -108,10 +142,6 @@ class SlideDatasetTest(test.TestCase): # Invalid stride should be an initialization time error. with self.assertRaises(errors.InvalidArgumentError): sess.run(init_op, feed_dict={count: 14, window_size: 3, stride: 0}) - with self.assertRaises(errors.InvalidArgumentError): - sess.run(init_op, feed_dict={count: 14, window_size: 3, stride: 3}) - with self.assertRaises(errors.InvalidArgumentError): - sess.run(init_op, feed_dict={count: 14, window_size: 3, stride: 5}) def assertSparseValuesEqual(self, a, b): self.assertAllEqual(a.indices, b.indices) diff --git a/tensorflow/contrib/data/python/ops/sliding.py b/tensorflow/contrib/data/python/ops/sliding.py index f935beb1a9..3f3c5ca17c 100644 --- a/tensorflow/contrib/data/python/ops/sliding.py +++ b/tensorflow/contrib/data/python/ops/sliding.py @@ -86,7 +86,7 @@ def sliding_window_batch(window_size, stride=1): elements in the sliding window. stride: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the steps moving the sliding window forward for one iteration. The default - is `1`. It must be in `[1, window_size)`. + is `1`. It must be positive. Returns: A `Dataset` transformation function, which can be passed to diff --git a/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb b/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb new file mode 100644 index 0000000000..54ebcad8e9 --- /dev/null +++ b/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb @@ -0,0 +1,909 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "nmt_with_attention.ipynb", + "version": "0.3.2", + "views": {}, + "default_view": {}, + "provenance": [ + { + "file_id": "1C4fpM7_7IL8ZzF7Gc5abywqQjeQNS2-U", + "timestamp": 1527858391290 + }, + { + "file_id": "1pExo6aUuw0S6MISFWoinfJv0Ftm9V4qv", + "timestamp": 1527776041613 + } + ], + "private_outputs": true, + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "metadata": { + "id": "AOpGoE2T-YXS", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "##### Copyright 2018 The TensorFlow Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\").\n", + "\n", + "# Neural Machine Translation with Attention\n", + "\n", + "
\n", + "\n", + " Run in Google Colab \n", + "\n", + "View source on Github
" + ] + }, + { + "metadata": { + "id": "CiwtNgENbx2g", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "This notebook trains a sequence to sequence (seq2seq) model for Spanish to English translation using [tf.keras](https://www.tensorflow.org/programmers_guide/keras) and [eager execution](https://www.tensorflow.org/programmers_guide/eager). This is an advanced example that assumes some knowledge of sequence to sequence models.\n", + "\n", + "After training the model in this notebook, you will be able to input a Spanish sentence, such as *\"¿todavia estan en casa?\"*, and return the English translation: *\"are you still at home?\"*\n", + "\n", + "The translation quality is reasonable for a toy example, but the generated attention plot is perhaps more interesting. This shows which parts of the input sentence has the model's attention while translating:\n", + "\n", + "\"spanish-english\n", + "\n", + "Note: This example takes approximately 10 mintues to run on a single P100 GPU." + ] + }, + { + "metadata": { + "id": "tnxXKDjq3jEL", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "cell_type": "code", + "source": [ + "from __future__ import absolute_import, division, print_function\n", + "\n", + "# Import TensorFlow >= 1.9 and enable eager execution\n", + "import tensorflow as tf\n", + "\n", + "tf.enable_eager_execution()\n", + "\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "import unicodedata\n", + "import re\n", + "import numpy as np\n", + "import os\n", + "import time\n", + "\n", + "print(tf.__version__)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "wfodePkj3jEa", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "## Download and prepare the dataset\n", + "\n", + "We'll use a language dataset provided by http://www.manythings.org/anki/. This dataset contains language translation pairs in the format:\n", + "\n", + "```\n", + "May I borrow this book?\t¿Puedo tomar prestado este libro?\n", + "```\n", + "\n", + "There are a variety of languages available, but we'll use the English-Spanish dataset. For convenience, we've hosted a copy of this dataset on Google Cloud, but you can also download your own copy. After downloading the dataset, here are the steps we'll take to prepare the data:\n", + "\n", + "1. Add a *start* and *end* token to each sentence.\n", + "2. Clean the sentences by removing special characters.\n", + "3. Create a word index and reverse word index (dictionaries mapping from word → id and id → word).\n", + "4. Pad each sentence to a maximum length." + ] + }, + { + "metadata": { + "id": "kRVATYOgJs1b", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "cell_type": "code", + "source": [ + "# Download the file\n", + "path_to_zip = tf.keras.utils.get_file(\n", + " 'spa-eng.zip', origin='http://download.tensorflow.org/data/spa-eng.zip', \n", + " extract=True)\n", + "\n", + "path_to_file = os.path.dirname(path_to_zip)+\"/spa-eng/spa.txt\"" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "rd0jw-eC3jEh", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "cell_type": "code", + "source": [ + "# Converts the unicode file to ascii\n", + "def unicode_to_ascii(s):\n", + " return ''.join(c for c in unicodedata.normalize('NFD', s)\n", + " if unicodedata.category(c) != 'Mn')\n", + "\n", + "\n", + "def preprocess_sentence(w):\n", + " w = unicode_to_ascii(w.lower().strip())\n", + " \n", + " # creating a space between a word and the punctuation following it\n", + " # eg: \"he is a boy.\" => \"he is a boy .\" \n", + " # Reference:- https://stackoverflow.com/questions/3645931/python-padding-punctuation-with-white-spaces-keeping-punctuation\n", + " w = re.sub(r\"([?.!,¿])\", r\" \\1 \", w)\n", + " w = re.sub(r'[\" \"]+', \" \", w)\n", + " \n", + " # replacing everything with space except (a-z, A-Z, \".\", \"?\", \"!\", \",\")\n", + " w = re.sub(r\"[^a-zA-Z?.!,¿]+\", \" \", w)\n", + " \n", + " w = w.rstrip().strip()\n", + " \n", + " # adding a start and an end token to the sentence\n", + " # so that the model know when to start and stop predicting.\n", + " w = ' ' + w + ' '\n", + " return w" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "OHn4Dct23jEm", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "cell_type": "code", + "source": [ + "# 1. Remove the accents\n", + "# 2. Clean the sentences\n", + "# 3. Return word pairs in the format: [ENGLISH, SPANISH]\n", + "def create_dataset(path, num_examples):\n", + " lines = open(path, encoding='UTF-8').read().strip().split('\\n')\n", + " \n", + " word_pairs = [[preprocess_sentence(w) for w in l.split('\\t')] for l in lines[:num_examples]]\n", + " \n", + " return word_pairs" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "9xbqO7Iie9bb", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "cell_type": "code", + "source": [ + "# This class creates a word -> index mapping (e.g,. \"dad\" -> 5) and vice-versa \n", + "# (e.g., 5 -> \"dad\") for each language,\n", + "class LanguageIndex():\n", + " def __init__(self, lang):\n", + " self.lang = lang\n", + " self.word2idx = {}\n", + " self.idx2word = {}\n", + " self.vocab = set()\n", + " \n", + " self.create_index()\n", + " \n", + " def create_index(self):\n", + " for phrase in self.lang:\n", + " self.vocab.update(phrase.split(' '))\n", + " \n", + " self.vocab = sorted(self.vocab)\n", + " \n", + " self.word2idx[''] = 0\n", + " for index, word in enumerate(self.vocab):\n", + " self.word2idx[word] = index + 1\n", + " \n", + " for word, index in self.word2idx.items():\n", + " self.idx2word[index] = word" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "eAY9k49G3jE_", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "cell_type": "code", + "source": [ + "def max_length(tensor):\n", + " return max(len(t) for t in tensor)\n", + "\n", + "\n", + "def load_dataset(path, num_examples):\n", + " # creating cleaned input, output pairs\n", + " pairs = create_dataset(path, num_examples)\n", + "\n", + " # index language using the class defined above \n", + " inp_lang = LanguageIndex(sp for en, sp in pairs)\n", + " targ_lang = LanguageIndex(en for en, sp in pairs)\n", + " \n", + " # Vectorize the input and target languages\n", + " \n", + " # Spanish sentences\n", + " input_tensor = [[inp_lang.word2idx[s] for s in sp.split(' ')] for en, sp in pairs]\n", + " \n", + " # English sentences\n", + " target_tensor = [[targ_lang.word2idx[s] for s in en.split(' ')] for en, sp in pairs]\n", + " \n", + " # Calculate max_length of input and output tensor\n", + " # Here, we'll set those to the longest sentence in the dataset\n", + " max_length_inp, max_length_tar = max_length(input_tensor), max_length(target_tensor)\n", + " \n", + " # Padding the input and output tensor to the maximum length\n", + " input_tensor = tf.keras.preprocessing.sequence.pad_sequences(input_tensor, \n", + " maxlen=max_length_inp,\n", + " padding='post')\n", + " \n", + " target_tensor = tf.keras.preprocessing.sequence.pad_sequences(target_tensor, \n", + " maxlen=max_length_tar, \n", + " padding='post')\n", + " \n", + " return input_tensor, target_tensor, inp_lang, targ_lang, max_length_inp, max_length_tar" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "GOi42V79Ydlr", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### Limit the size of the dataset to experiment faster (optional)\n", + "\n", + "Training on the complete dataset of >100,000 sentences will take a long time. To train faster, we can limit the size of the dataset to 30,000 sentences (of course, translation quality degrades with less data):" + ] + }, + { + "metadata": { + "id": "cnxC7q-j3jFD", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "cell_type": "code", + "source": [ + "# Try experimenting with the size of that dataset\n", + "num_examples = 30000\n", + "input_tensor, target_tensor, inp_lang, targ_lang, max_length_inp, max_length_targ = load_dataset(path_to_file, num_examples)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "4QILQkOs3jFG", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "cell_type": "code", + "source": [ + "# Creating training and validation sets using an 80-20 split\n", + "input_tensor_train, input_tensor_val, target_tensor_train, target_tensor_val = train_test_split(input_tensor, target_tensor, test_size=0.2)\n", + "\n", + "# Show length\n", + "len(input_tensor_train), len(target_tensor_train), len(input_tensor_val), len(target_tensor_val)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "rgCLkfv5uO3d", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### Create a tf.data dataset" + ] + }, + { + "metadata": { + "id": "TqHsArVZ3jFS", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "cell_type": "code", + "source": [ + "BUFFER_SIZE = len(input_tensor_train)\n", + "BATCH_SIZE = 64\n", + "embedding_dim = 256\n", + "units = 1024\n", + "vocab_inp_size = len(inp_lang.word2idx)\n", + "vocab_tar_size = len(targ_lang.word2idx)\n", + "\n", + "dataset = tf.data.Dataset.from_tensor_slices((input_tensor_train, target_tensor_train)).shuffle(BUFFER_SIZE)\n", + "dataset = dataset.apply(tf.contrib.data.batch_and_drop_remainder(BATCH_SIZE))" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "TNfHIF71ulLu", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "## Write the encoder and decoder model\n", + "\n", + "Here, we'll implement an encoder-decoder model with attention which you can read about in the TensorFlow [Neural Machine Translation (seq2seq) tutorial](https://www.tensorflow.org/tutorials/seq2seq). This example uses a more recent set of APIs. This notebook implements the [attention equations](https://www.tensorflow.org/tutorials/seq2seq#background_on_the_attention_mechanism) from the seq2seq tutorial. The following diagram shows that each input words is assigned a weight by the attention mechanism which is then used by the decoder to predict the next word in the sentence.\n", + "\n", + "\"attention\n", + "\n", + "The input is put through an encoder model which gives us the encoder output of shape *(batch_size, max_length, hidden_size)* and the encoder hidden state of shape *(batch_size, hidden_size)*. \n", + "\n", + "Here are the equations that are implemented:\n", + "\n", + "\"attention\n", + "\"attention\n", + "\n", + "We're using *Bahdanau attention*. Lets decide on notation before writing the simplified form:\n", + "\n", + "* FC = Fully connected (dense) layer\n", + "* EO = Encoder output\n", + "* H = hidden state\n", + "* X = input to the decoder\n", + "\n", + "And the pseudo-code:\n", + "\n", + "* `score = FC(tanh(FC(EO) + FC(H)))`\n", + "* `attention weights = softmax(score, axis = 1)`. Softmax by default is applied on the last axis but here we want to apply it on the *1st axis*, since the shape of score is *(batch_size, max_length, hidden_size)*. `Max_length` is the length of our input. Since we are trying to assign a weight to each input, softmax should be applied on that axis.\n", + "* `context vector = sum(attention weights * EO, axis = 1)`. Same reason as above for choosing axis as 1.\n", + "* `embedding output` = The input to the decoder X is passed through an embedding layer.\n", + "* `merged vector = concat(embedding output, context vector)`\n", + "* This merged vector is then given to the GRU\n", + " \n", + "The shapes of all the vectors at each step have been specified in the comments in the code:" + ] + }, + { + "metadata": { + "id": "avyJ_4VIUoHb", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "cell_type": "code", + "source": [ + "def gru(units):\n", + " # If you have a GPU, we recommend using CuDNNGRU(provides a 3x speedup than GRU)\n", + " # the code automatically does that.\n", + " if tf.test.is_gpu_available():\n", + " return tf.keras.layers.CuDNNGRU(units, \n", + " return_sequences=True, \n", + " return_state=True, \n", + " recurrent_initializer='glorot_uniform')\n", + " else:\n", + " return tf.keras.layers.GRU(units, \n", + " return_sequences=True, \n", + " return_state=True, \n", + " recurrent_activation='sigmoid', \n", + " recurrent_initializer='glorot_uniform')" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "nZ2rI24i3jFg", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "cell_type": "code", + "source": [ + "class Encoder(tf.keras.Model):\n", + " def __init__(self, vocab_size, embedding_dim, enc_units, batch_sz):\n", + " super(Encoder, self).__init__()\n", + " self.batch_sz = batch_sz\n", + " self.enc_units = enc_units\n", + " self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)\n", + " self.gru = gru(self.enc_units)\n", + " \n", + " def call(self, x, hidden):\n", + " x = self.embedding(x)\n", + " output, state = self.gru(x, initial_state = hidden) \n", + " return output, state\n", + " \n", + " def initialize_hidden_state(self):\n", + " return tf.zeros((self.batch_sz, self.enc_units))" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "yJ_B3mhW3jFk", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "cell_type": "code", + "source": [ + "class Decoder(tf.keras.Model):\n", + " def __init__(self, vocab_size, embedding_dim, dec_units, batch_sz):\n", + " super(Decoder, self).__init__()\n", + " self.batch_sz = batch_sz\n", + " self.dec_units = dec_units\n", + " self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)\n", + " self.gru = gru(self.dec_units)\n", + " self.fc = tf.keras.layers.Dense(vocab_size)\n", + " \n", + " # used for attention\n", + " self.W1 = tf.keras.layers.Dense(self.dec_units)\n", + " self.W2 = tf.keras.layers.Dense(self.dec_units)\n", + " self.V = tf.keras.layers.Dense(1)\n", + " \n", + " def call(self, x, hidden, enc_output):\n", + " # enc_output shape == (batch_size, max_length, hidden_size)\n", + " \n", + " # hidden shape == (batch_size, hidden size)\n", + " # hidden_with_time_axis shape == (batch_size, 1, hidden size)\n", + " # we are doing this to perform addition to calculate the score\n", + " hidden_with_time_axis = tf.expand_dims(hidden, 1)\n", + " \n", + " # score shape == (batch_size, max_length, hidden_size)\n", + " score = tf.nn.tanh(self.W1(enc_output) + self.W2(hidden_with_time_axis))\n", + " \n", + " # attention_weights shape == (batch_size, max_length, 1)\n", + " # we get 1 at the last axis because we are applying score to self.V\n", + " attention_weights = tf.nn.softmax(self.V(score), axis=1)\n", + " \n", + " # context_vector shape after sum == (batch_size, hidden_size)\n", + " context_vector = attention_weights * enc_output\n", + " context_vector = tf.reduce_sum(context_vector, axis=1)\n", + " \n", + " # x shape after passing through embedding == (batch_size, 1, embedding_dim)\n", + " x = self.embedding(x)\n", + " \n", + " # x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)\n", + " x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)\n", + " \n", + " # passing the concatenated vector to the GRU\n", + " output, state = self.gru(x)\n", + " \n", + " # output shape == (batch_size * max_length, hidden_size)\n", + " output = tf.reshape(output, (-1, output.shape[2]))\n", + " \n", + " # output shape == (batch_size * max_length, vocab)\n", + " x = self.fc(output)\n", + " \n", + " return x, state, attention_weights\n", + " \n", + " def initialize_hidden_state(self):\n", + " return tf.zeros((self.batch_sz, self.dec_units))" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "P5UY8wko3jFp", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "cell_type": "code", + "source": [ + "encoder = Encoder(vocab_inp_size, embedding_dim, units, BATCH_SIZE)\n", + "decoder = Decoder(vocab_tar_size, embedding_dim, units, BATCH_SIZE)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "_ch_71VbIRfK", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "## Define the optimizer and the loss function" + ] + }, + { + "metadata": { + "id": "WmTHr5iV3jFr", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "cell_type": "code", + "source": [ + "optimizer = tf.train.AdamOptimizer()\n", + "\n", + "\n", + "def loss_function(real, pred):\n", + " mask = 1 - np.equal(real, 0)\n", + " loss_ = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=real, logits=pred) * mask\n", + " return tf.reduce_mean(loss_)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "hpObfY22IddU", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "## Training\n", + "\n", + "1. Pass the *input* through the *encoder* which return *encoder output* and the *encoder hidden state*.\n", + "2. The encoder output, encoder hidden state and the decoder input (which is the *start token*) is passed to the decoder.\n", + "3. The decoder returns the *predictions* and the *decoder hidden state*.\n", + "4. The decoder hidden state is then passed back into the model and the predictions are used to calculate the loss.\n", + "5. Use *teacher forcing* to decide the next input to the decoder.\n", + "6. *Teacher forcing* is the technique where the *target word* is passed as the *next input* to the decoder.\n", + "7. The final step is to calculate the gradients and apply it to the optimizer and backpropagate." + ] + }, + { + "metadata": { + "id": "ddefjBMa3jF0", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "cell_type": "code", + "source": [ + "EPOCHS = 10\n", + "\n", + "for epoch in range(EPOCHS):\n", + " start = time.time()\n", + " \n", + " hidden = encoder.initialize_hidden_state()\n", + " total_loss = 0\n", + " \n", + " for (batch, (inp, targ)) in enumerate(dataset):\n", + " loss = 0\n", + " \n", + " with tf.GradientTape() as tape:\n", + " enc_output, enc_hidden = encoder(inp, hidden)\n", + " \n", + " dec_hidden = enc_hidden\n", + " \n", + " dec_input = tf.expand_dims([targ_lang.word2idx['']] * BATCH_SIZE, 1) \n", + " \n", + " # Teacher forcing - feeding the target as the next input\n", + " for t in range(1, targ.shape[1]):\n", + " # passing enc_output to the decoder\n", + " predictions, dec_hidden, _ = decoder(dec_input, dec_hidden, enc_output)\n", + " \n", + " loss += loss_function(targ[:, t], predictions)\n", + " \n", + " # using teacher forcing\n", + " dec_input = tf.expand_dims(targ[:, t], 1)\n", + " \n", + " total_loss += (loss / int(targ.shape[1]))\n", + " \n", + " variables = encoder.variables + decoder.variables\n", + " \n", + " gradients = tape.gradient(loss, variables)\n", + " \n", + " optimizer.apply_gradients(zip(gradients, variables), tf.train.get_or_create_global_step())\n", + "\n", + " if batch % 100 == 0:\n", + " print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1,\n", + " batch,\n", + " loss.numpy() / int(targ.shape[1])))\n", + " \n", + " print('Epoch {} Loss {:.4f}'.format(epoch + 1,\n", + " total_loss/len(input_tensor)))\n", + " print('Time taken for 1 epoch {} sec\\n'.format(time.time() - start))" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "mU3Ce8M6I3rz", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "## Translate\n", + "\n", + "* The evaluate function is similar to the training loop, except we don't use *teacher forcing* here. The input to the decoder at each time step is its previous predictions along with the hidden state and the encoder output.\n", + "* Stop predicting when the model predicts the *end token*.\n", + "* And store the *attention weights for every time step*.\n", + "\n", + "Note: The encoder output is calculated only once for one input." + ] + }, + { + "metadata": { + "id": "EbQpyYs13jF_", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "cell_type": "code", + "source": [ + "def evaluate(sentence, encoder, decoder, inp_lang, targ_lang, max_length_inp, max_length_targ):\n", + " attention_plot = np.zeros((max_length_targ, max_length_inp))\n", + " \n", + " sentence = preprocess_sentence(sentence)\n", + "\n", + " inputs = [inp_lang.word2idx[i] for i in sentence.split(' ')]\n", + " inputs = tf.keras.preprocessing.sequence.pad_sequences([inputs], maxlen=max_length_inp, padding='post')\n", + " inputs = tf.convert_to_tensor(inputs)\n", + " \n", + " result = ''\n", + "\n", + " hidden = [tf.zeros((1, units))]\n", + " enc_out, enc_hidden = encoder(inputs, hidden)\n", + "\n", + " dec_hidden = enc_hidden\n", + " dec_input = tf.expand_dims([targ_lang.word2idx['']], 0)\n", + "\n", + " for t in range(max_length_targ):\n", + " predictions, dec_hidden, attention_weights = decoder(dec_input, dec_hidden, enc_out)\n", + " \n", + " # storing the attention weigths to plot later on\n", + " attention_weights = tf.reshape(attention_weights, (-1, ))\n", + " attention_plot[t] = attention_weights.numpy()\n", + "\n", + " predicted_id = tf.multinomial(tf.exp(predictions), num_samples=1)[0][0].numpy()\n", + "\n", + " result += targ_lang.idx2word[predicted_id] + ' '\n", + "\n", + " if targ_lang.idx2word[predicted_id] == '':\n", + " return result, sentence, attention_plot\n", + " \n", + " # the predicted ID is fed back into the model\n", + " dec_input = tf.expand_dims([predicted_id], 0)\n", + "\n", + " return result, sentence, attention_plot" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "s5hQWlbN3jGF", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "cell_type": "code", + "source": [ + "# function for plotting the attention weights\n", + "def plot_attention(attention, sentence, predicted_sentence):\n", + " fig = plt.figure(figsize=(10,10))\n", + " ax = fig.add_subplot(1, 1, 1)\n", + " ax.matshow(attention, cmap='viridis')\n", + " \n", + " fontdict = {'fontsize': 14}\n", + " \n", + " ax.set_xticklabels([''] + sentence, fontdict=fontdict, rotation=90)\n", + " ax.set_yticklabels([''] + predicted_sentence, fontdict=fontdict)\n", + "\n", + " plt.show()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "sl9zUHzg3jGI", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "cell_type": "code", + "source": [ + "def translate(sentence, encoder, decoder, inp_lang, targ_lang, max_length_inp, max_length_targ):\n", + " result, sentence, attention_plot = evaluate(sentence, encoder, decoder, inp_lang, targ_lang, max_length_inp, max_length_targ)\n", + " \n", + " print('Input: {}'.format(sentence))\n", + " print('Predicted translation: {}'.format(result))\n", + " \n", + " attention_plot = attention_plot[:len(result.split(' ')), :len(sentence.split(' '))]\n", + " plot_attention(attention_plot, sentence.split(' '), result.split(' '))" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "WrAM0FDomq3E", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "cell_type": "code", + "source": [ + "translate('hace mucho frio aqui.', encoder, decoder, inp_lang, targ_lang, max_length_inp, max_length_targ)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "zSx2iM36EZQZ", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "cell_type": "code", + "source": [ + "translate('esta es mi vida.', encoder, decoder, inp_lang, targ_lang, max_length_inp, max_length_targ)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "A3LLCx3ZE0Ls", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "cell_type": "code", + "source": [ + "translate('¿todavia estan en casa?', encoder, decoder, inp_lang, targ_lang, max_length_inp, max_length_targ)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "DUQVLVqUE1YW", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "cell_type": "code", + "source": [ + "# wrong translation\n", + "translate('trata de averiguarlo.', encoder, decoder, inp_lang, targ_lang, max_length_inp, max_length_targ)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "RTe5P5ioMJwN", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "## Next steps\n", + "\n", + "* [Download a different dataset](http://www.manythings.org/anki/) to experiment with translations, for example, English to German, or English to French.\n", + "* Experiment with training on a larger dataset, or using more epochs\n" + ] + } + ] +} \ No newline at end of file diff --git a/tensorflow/contrib/gan/python/estimator/python/head_impl.py b/tensorflow/contrib/gan/python/estimator/python/head_impl.py index ff903a78cc..5b5557bd8f 100644 --- a/tensorflow/contrib/gan/python/estimator/python/head_impl.py +++ b/tensorflow/contrib/gan/python/estimator/python/head_impl.py @@ -24,6 +24,7 @@ from tensorflow.contrib.gan.python import namedtuples as tfgan_tuples from tensorflow.contrib.gan.python import train as tfgan_train from tensorflow.python.estimator import model_fn as model_fn_lib from tensorflow.python.estimator.canned import head +from tensorflow.python.estimator.export import export_output from tensorflow.python.framework import ops from tensorflow.python.ops import metrics as metrics_lib @@ -182,7 +183,10 @@ class GANHead(head._Head): # pylint: disable=protected-access if mode == model_fn_lib.ModeKeys.PREDICT: return model_fn_lib.EstimatorSpec( mode=model_fn_lib.ModeKeys.PREDICT, - predictions=gan_model.generated_data) + predictions=gan_model.generated_data, + export_outputs={ + 'predict': export_output.PredictOutput(gan_model.generated_data) + }) elif mode == model_fn_lib.ModeKeys.EVAL: gan_loss = self.create_loss( features=None, mode=mode, logits=gan_model, labels=None) diff --git a/tensorflow/contrib/gan/python/estimator/python/head_test.py b/tensorflow/contrib/gan/python/estimator/python/head_test.py index 6587f1fc60..5309d87765 100644 --- a/tensorflow/contrib/gan/python/estimator/python/head_test.py +++ b/tensorflow/contrib/gan/python/estimator/python/head_test.py @@ -26,8 +26,11 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import variable_scope from tensorflow.python.platform import test +from tensorflow.python.saved_model import signature_constants from tensorflow.python.training import training +_DEFAULT_SERVING_KEY = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY + def dummy_loss(gan_model, add_summaries=True): # pylint:disable=unused-argument return math_ops.reduce_sum(gan_model.discriminator_real_outputs - @@ -71,13 +74,15 @@ class GANHeadTest(test.TestCase): return {} def _test_modes_helper(self, mode): - self.gan_head.create_estimator_spec( + return self.gan_head.create_estimator_spec( features=None, mode=mode, logits=get_gan_model()) def test_modes_predict(self): - self._test_modes_helper(model_fn_lib.ModeKeys.PREDICT) + spec = self._test_modes_helper(model_fn_lib.ModeKeys.PREDICT) + self.assertItemsEqual((_DEFAULT_SERVING_KEY, 'predict'), + spec.export_outputs.keys()) def test_modes_eval(self): self._test_modes_helper(model_fn_lib.ModeKeys.EVAL) diff --git a/tensorflow/contrib/gdr/gdr_server_lib.cc b/tensorflow/contrib/gdr/gdr_server_lib.cc index 1f9dd0decb..9025c992a4 100644 --- a/tensorflow/contrib/gdr/gdr_server_lib.cc +++ b/tensorflow/contrib/gdr/gdr_server_lib.cc @@ -57,7 +57,7 @@ Status GdrServer::Init() { new GdrWorker(env, remote_memory_manager_.get())); }; TF_RETURN_IF_ERROR( - GrpcServer::Init(nullptr, rendezvous_mgr_func, worker_func)); + GrpcServer::Init(nullptr, rendezvous_mgr_func, nullptr, worker_func)); return remote_memory_manager_->Init(); } diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h index a7b0d805a3..4cfaa0f36d 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h @@ -26,7 +26,7 @@ namespace optimized_ops { // Enable for arm64 except for the Nvidia Linux 4 Tegra (L4T) running on // Jetson TX-2. This compiler does not support the offsetof() macro. #if defined(__aarch64__) && !defined(GOOGLE_L4T) - +#include // clang-format gets confused with this file and ends up formatting lines to // be larger than 80 characters. Turn off here and back on at the end of the // file. diff --git a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h index cbeb53bee7..681448be20 100644 --- a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h +++ b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h @@ -19,7 +19,9 @@ limitations under the License. #include #include +// Place `` before to avoid build failures in macOS. #include +#include // We forward declare TFLite classes here to avoid exposing them to SWIG. namespace tflite { diff --git a/tensorflow/contrib/opt/BUILD b/tensorflow/contrib/opt/BUILD index 4f35de4e5d..bbdf962d04 100644 --- a/tensorflow/contrib/opt/BUILD +++ b/tensorflow/contrib/opt/BUILD @@ -29,6 +29,7 @@ py_library( "python/training/reg_adagrad_optimizer.py", "python/training/sign_decay.py", "python/training/variable_clipping_optimizer.py", + "python/training/weight_decay_optimizers.py", ], srcs_version = "PY2AND3", deps = [ @@ -198,6 +199,25 @@ py_test( ], ) +py_test( + name = "weight_decay_optimizers_test", + srcs = ["python/training/weight_decay_optimizers_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":opt_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python:math_ops", + "//tensorflow/python:resource_variable_ops", + "//tensorflow/python:session", + "//tensorflow/python:variables", + "//third_party/py/numpy", + ], +) + tf_py_test( name = "drop_stale_gradient_optimizer_test", srcs = ["python/training/drop_stale_gradient_optimizer_test.py"], diff --git a/tensorflow/contrib/opt/__init__.py b/tensorflow/contrib/opt/__init__.py index b41148329d..65777b1323 100644 --- a/tensorflow/contrib/opt/__init__.py +++ b/tensorflow/contrib/opt/__init__.py @@ -22,16 +22,17 @@ from __future__ import print_function from tensorflow.contrib.opt.python.training.adamax import * from tensorflow.contrib.opt.python.training.addsign import * from tensorflow.contrib.opt.python.training.drop_stale_gradient_optimizer import * +from tensorflow.contrib.opt.python.training.elastic_average_optimizer import * from tensorflow.contrib.opt.python.training.external_optimizer import * +from tensorflow.contrib.opt.python.training.ggt import * from tensorflow.contrib.opt.python.training.lazy_adam_optimizer import * +from tensorflow.contrib.opt.python.training.model_average_optimizer import * from tensorflow.contrib.opt.python.training.moving_average_optimizer import * from tensorflow.contrib.opt.python.training.multitask_optimizer_wrapper import * from tensorflow.contrib.opt.python.training.nadam_optimizer import * from tensorflow.contrib.opt.python.training.powersign import * from tensorflow.contrib.opt.python.training.variable_clipping_optimizer import * -from tensorflow.contrib.opt.python.training.elastic_average_optimizer import * -from tensorflow.contrib.opt.python.training.model_average_optimizer import * -from tensorflow.contrib.opt.python.training.ggt import * +from tensorflow.contrib.opt.python.training.weight_decay_optimizers import * # pylint: enable=wildcard-import from tensorflow.python.util.all_util import remove_undocumented @@ -47,6 +48,10 @@ _allowed_symbols = [ 'LazyAdamOptimizer', 'NadamOptimizer', 'MovingAverageOptimizer', + 'MomentumWOptimizer', + 'AdamWOptimizer', + 'DecoupledWeightDecayExtension', + 'extend_with_decoupled_weight_decay', 'ScipyOptimizerInterface', 'VariableClippingOptimizer', 'MultitaskOptimizerWrapper', diff --git a/tensorflow/contrib/opt/python/training/weight_decay_optimizers.py b/tensorflow/contrib/opt/python/training/weight_decay_optimizers.py new file mode 100644 index 0000000000..b9cf40eb7b --- /dev/null +++ b/tensorflow/contrib/opt/python/training/weight_decay_optimizers.py @@ -0,0 +1,362 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Base class to make optimizers weight decay ready.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import state_ops +from tensorflow.python.training import adam +from tensorflow.python.training import momentum as momentum_opt +from tensorflow.python.training import optimizer +from tensorflow.python.util.tf_export import tf_export + + +class DecoupledWeightDecayExtension(object): + """This class allows to extend optimizers with decoupled weight decay. + + It implements the decoupled weight decay described by Loshchilov & Hutter + (https://arxiv.org/pdf/1711.05101.pdf), in which the weight decay is + decoupled from the optimization steps w.r.t. to the loss function. + For SGD variants, this simplifies hyperparameter search since it decouples + the settings of weight decay and learning rate. + For adaptive gradient algorithms, it regularizes variables with large + gradients more than L2 regularization would, which was shown to yield better + training loss and generalization error in the paper above. + + This class alone is not an optimizer but rather extends existing + optimizers with decoupled weight decay. We explicitly define the two examples + used in the above paper (SGDW and AdamW), but in general this can extend + any OptimizerX by using + `extend_with_weight_decay(OptimizerX, weight_decay=weight_decay)`. + In order for it to work, it must be the first class the Optimizer with + weight decay inherits from, e.g. + + ```python + class AdamWOptimizer(DecoupledWeightDecayExtension, adam.AdamOptimizer): + def __init__(self, weight_decay, *args, **kwargs): + super(AdamWOptimizer, self).__init__(weight_decay, *args, **kwargs). + ``` + + Note that this extension decays weights BEFORE applying the update based + on the gradient, i.e. this extension only has the desired behaviour for + optimizers which do not depend on the value of'var' in the update step! + """ + + def __init__(self, weight_decay, **kwargs): + """Construct the extension class that adds weight decay to an optimizer. + + Args: + weight_decay: A `Tensor` or a floating point value, the factor by which + a variable is decayed in the update step. + **kwargs: Optional list or tuple or set of `Variable` objects to + decay. + """ + self._decay_var_list = None # is set in minimize or apply_gradients + self._weight_decay = weight_decay + # The tensors are initialized in call to _prepare + self._weight_decay_tensor = None + super(DecoupledWeightDecayExtension, self).__init__(**kwargs) + + def minimize(self, loss, global_step=None, var_list=None, + gate_gradients=optimizer.Optimizer.GATE_OP, + aggregation_method=None, colocate_gradients_with_ops=False, + name=None, grad_loss=None, decay_var_list=None): + """Add operations to minimize `loss` by updating `var_list` with decay. + + This function is the same as Optimizer.minimize except that it allows to + specify the variables that should be decayed using decay_var_list. + If decay_var_list is None, all variables in var_list are decayed. + + For more information see the documentation of Optimizer.minimize. + + Args: + loss: A `Tensor` containing the value to minimize. + global_step: Optional `Variable` to increment by one after the + variables have been updated. + var_list: Optional list or tuple of `Variable` objects to update to + minimize `loss`. Defaults to the list of variables collected in + the graph under the key `GraphKeys.TRAINABLE_VARIABLES`. + gate_gradients: How to gate the computation of gradients. Can be + `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`. + aggregation_method: Specifies the method used to combine gradient terms. + Valid values are defined in the class `AggregationMethod`. + colocate_gradients_with_ops: If True, try colocating gradients with + the corresponding op. + name: Optional name for the returned operation. + grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`. + decay_var_list: Optional list of decay variables. + + Returns: + An Operation that updates the variables in `var_list`. If `global_step` + was not `None`, that operation also increments `global_step`. + + """ + self._decay_var_list = set(decay_var_list) if decay_var_list else False + return super(DecoupledWeightDecayExtension, self).minimize( + loss, global_step=global_step, var_list=var_list, + gate_gradients=gate_gradients, aggregation_method=aggregation_method, + colocate_gradients_with_ops=colocate_gradients_with_ops, name=name, + grad_loss=grad_loss) + + def apply_gradients(self, grads_and_vars, global_step=None, name=None, + decay_var_list=None): + """Apply gradients to variables and decay the variables. + + This function is the same as Optimizer.apply_gradients except that it + allows to specify the variables that should be decayed using + decay_var_list. If decay_var_list is None, all variables in var_list + are decayed. + + For more information see the documentation of Optimizer.apply_gradients. + + Args: + grads_and_vars: List of (gradient, variable) pairs as returned by + `compute_gradients()`. + global_step: Optional `Variable` to increment by one after the + variables have been updated. + name: Optional name for the returned operation. Default to the + name passed to the `Optimizer` constructor. + decay_var_list: Optional list of decay variables. + + Returns: + An `Operation` that applies the specified gradients. If `global_step` + was not None, that operation also increments `global_step`. + """ + self._decay_var_list = set(decay_var_list) if decay_var_list else False + return super(DecoupledWeightDecayExtension, self).apply_gradients( + grads_and_vars, global_step=global_step, name=name) + + def _prepare(self): + weight_decay = self._weight_decay + if callable(weight_decay): + weight_decay = weight_decay() + self._weight_decay_tensor = ops.convert_to_tensor( + weight_decay, name="weight_decay") + # Call the optimizers _prepare function. + super(DecoupledWeightDecayExtension, self)._prepare() + + def _decay_weights_op(self, var): + if not self._decay_var_list or var in self._decay_var_list: + return var.assign_sub(self._weight_decay * var, self._use_locking) + return control_flow_ops.no_op() + + def _decay_weights_sparse_op(self, var, indices, scatter_add): + if not self._decay_var_list or var in self._decay_var_list: + return scatter_add(var, indices, -self._weight_decay * var, + self._use_locking) + return control_flow_ops.no_op() + + # Here, we overwrite the apply functions that the base optimizer calls. + # super().apply_x resolves to the apply_x function of the BaseOptimizer. + def _apply_dense(self, grad, var): + with ops.control_dependencies([self._decay_weights_op(var)]): + return super(DecoupledWeightDecayExtension, self)._apply_dense(grad, var) + + def _resource_apply_dense(self, grad, var): + with ops.control_dependencies([self._decay_weights_op(var)]): + return super(DecoupledWeightDecayExtension, self)._resource_apply_dense( + grad, var) + + def _apply_sparse(self, grad, var): + scatter_add = state_ops.scatter_add + decay_op = self._decay_weights_sparse_op(var, grad.indices, scatter_add) + with ops.control_dependencies([decay_op]): + return super(DecoupledWeightDecayExtension, self)._apply_sparse( + grad, var) + + def _resource_scatter_add(self, x, i, v, _=None): + # last argument allows for one overflow argument, to have the same function + # signature as state_ops.scatter_add + with ops.control_dependencies( + [resource_variable_ops.resource_scatter_add(x.handle, i, v)]): + return x.value() + + def _resource_apply_sparse(self, grad, var, indices): + scatter_add = self._resource_scatter_add + decay_op = self._decay_weights_sparse_op(var, indices, scatter_add) + with ops.control_dependencies([decay_op]): + return super(DecoupledWeightDecayExtension, self)._resource_apply_sparse( + grad, var, indices) + + +def extend_with_decoupled_weight_decay(base_optimizer): + """Factory function returning an optimizer class with decoupled weight decay. + + Returns an optimizer class. An instance of the returned class computes the + update step of `base_optimizer` and additionally decays the weights. + E.g., the class returned by + `extend_with_decoupled_weight_decay(tf.train.AdamOptimizer)` is equivalent to + `tf.contrib.opt.AdamWOptimizer`. + + The API of the new optimizer class slightly differs from the API of the + base optimizer: + - The first argument to the constructor is the weight decay rate. + - `minimize` and `apply_gradients` accept the optional keyword argument + `decay_var_list`, which specifies the variables that should be decayed. + If `None`, all variables that are optimized are decayed. + + Usage example: + ```python + # MyAdamW is a new class + MyAdamW = extend_with_decoupled_weight_decay(tf.train.AdamOptimizer) + # Create a MyAdamW object + optimizer = MyAdamW(weight_decay=0.001, learning_rate=0.001) + sess.run(optimizer.minimize(loss, decay_variables=[var1, var2])) + + Note that this extension decays weights BEFORE applying the update based + on the gradient, i.e. this extension only has the desired behaviour for + optimizers which do not depend on the value of'var' in the update step! + ``` + + Args: + base_optimizer: An optimizer class that inherits from tf.train.Optimizer. + + Returns: + A new optimizer class that inherits from DecoupledWeightDecayExtension + and base_optimizer. + """ + + class OptimizerWithDecoupledWeightDecay(DecoupledWeightDecayExtension, + base_optimizer): + """Base_optimizer with decoupled weight decay. + + This class computes the update step of `base_optimizer` and + additionally decays the variable with the weight decay being decoupled from + the optimization steps w.r.t. to the loss function, as described by + Loshchilov & Hutter (https://arxiv.org/pdf/1711.05101.pdf). + For SGD variants, this simplifies hyperparameter search since + it decouples the settings of weight decay and learning rate. + For adaptive gradient algorithms, it regularizes variables with large + gradients more than L2 regularization would, which was shown to yield + better training loss and generalization error in the paper above. + """ + + def __init__(self, weight_decay, *args, **kwargs): + # super delegation is necessary here + # pylint: disable=useless-super-delegation + super(OptimizerWithDecoupledWeightDecay, self).__init__( + weight_decay, *args, **kwargs) + # pylint: enable=useless-super-delegation + + return OptimizerWithDecoupledWeightDecay + + +@tf_export("contrib.opt.MomentumWOptimizer") +class MomentumWOptimizer(DecoupledWeightDecayExtension, + momentum_opt.MomentumOptimizer): + """Optimizer that implements the Momentum algorithm with weight_decay. + + This is an implementation of the SGDW optimizer described in "Fixing + Weight Decay Regularization in Adam" by Loshchilov & Hutter + (https://arxiv.org/abs/1711.05101) + ([pdf])(https://arxiv.org/pdf/1711.05101.pdf). + It computes the update step of `train.MomentumOptimizer` and additionally + decays the variable. Note that this is different from adding + L2 regularization on the variables to the loss. Decoupling the weight decay + from other hyperparameters (in particular the learning rate) simplifies + hyperparameter search. + + For further information see the documentation of the Momentum Optimizer. + + Note that this optimizer can also be instantiated as + ```python + extend_with_weight_decay(tf.train.MomentumOptimizer, + weight_decay=weight_decay) + ``` + """ + + def __init__(self, weight_decay, learning_rate, momentum, + use_locking=False, name="MomentumW", use_nesterov=False): + """Construct a new MomentumW optimizer. + + For further information see the documentation of the Momentum Optimizer. + + Args: + weight_decay: A `Tensor` or a floating point value. The weight decay. + learning_rate: A `Tensor` or a floating point value. The learning rate. + momentum: A `Tensor` or a floating point value. The momentum. + use_locking: If `True` use locks for update operations. + name: Optional name prefix for the operations created when applying + gradients. Defaults to "Momentum". + use_nesterov: If `True` use Nesterov Momentum. + See [Sutskever et al., 2013]( + http://jmlr.org/proceedings/papers/v28/sutskever13.pdf). + This implementation always computes gradients at the value of the + variable(s) passed to the optimizer. Using Nesterov Momentum makes the + variable(s) track the values called `theta_t + mu*v_t` in the paper. + + @compatibility(eager) + When eager execution is enabled, learning_rate, weight_decay and momentum + can each be a callable that takes no arguments and returns the actual value + to use. This can be useful for changing these values across different + invocations of optimizer functions. + @end_compatibility + """ + super(MomentumWOptimizer, self).__init__( + weight_decay, learning_rate=learning_rate, momentum=momentum, + use_locking=use_locking, name=name, use_nesterov=use_nesterov) + + +@tf_export("contrib.opt.AdamWOptimizer") +class AdamWOptimizer(DecoupledWeightDecayExtension, adam.AdamOptimizer): + """Optimizer that implements the Adam algorithm with weight decay. + + This is an implementation of the AdamW optimizer described in "Fixing + Weight Decay Regularization in Adam" by Loshchilov & Hutter + (https://arxiv.org/abs/1711.05101) + ([pdf])(https://arxiv.org/pdf/1711.05101.pdf). + + It computes the update step of `train.AdamOptimizer` and additionally decays + the variable. Note that this is different from adding L2 regularization on + the variables to the loss: it regularizes variables with large + gradients more than L2 regularization would, which was shown to yield better + training loss and generalization error in the paper above. + + For further information see the documentation of the Adam Optimizer. + + Note that this optimizer can also be instantiated as + ```python + extend_with_weight_decay(tf.train.AdamOptimizer, weight_decay=weight_decay) + ``` + """ + + def __init__(self, weight_decay, learning_rate=0.001, beta1=0.9, beta2=0.999, + epsilon=1e-8, use_locking=False, name="AdamW"): + """Construct a new AdamW optimizer. + + For further information see the documentation of the Adam Optimizer. + + Args: + weight_decay: A `Tensor` or a floating point value. The weight decay. + learning_rate: A Tensor or a floating point value. The learning rate. + beta1: A float value or a constant float tensor. + The exponential decay rate for the 1st moment estimates. + beta2: A float value or a constant float tensor. + The exponential decay rate for the 2nd moment estimates. + epsilon: A small constant for numerical stability. This epsilon is + "epsilon hat" in the Kingma and Ba paper (in the formula just before + Section 2.1), not the epsilon in Algorithm 1 of the paper. + use_locking: If True use locks for update operations. + name: Optional name for the operations created when applying gradients. + Defaults to "Adam". + """ + super(AdamWOptimizer, self).__init__( + weight_decay, learning_rate=learning_rate, beta1=beta1, beta2=beta2, + epsilon=epsilon, use_locking=use_locking, name=name) diff --git a/tensorflow/contrib/opt/python/training/weight_decay_optimizers_test.py b/tensorflow/contrib/opt/python/training/weight_decay_optimizers_test.py new file mode 100644 index 0000000000..76d8a5697a --- /dev/null +++ b/tensorflow/contrib/opt/python/training/weight_decay_optimizers_test.py @@ -0,0 +1,188 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for optimizers with weight decay.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.opt.python.training import weight_decay_optimizers +from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import test +from tensorflow.python.training import adam + +WEIGHT_DECAY = 0.01 + + +def adamw_update_numpy(param, g_t, t, m, v, lr=0.001, beta1=0.9, + beta2=0.999, epsilon=1e-8): + lr_t = lr * np.sqrt(1 - beta2**t) / (1 - beta1**t) + + m_t = beta1 * m + (1 - beta1) * g_t + v_t = beta2 * v + (1 - beta2) * g_t * g_t + + param_t = (param - lr_t * m_t / (np.sqrt(v_t) + epsilon) - + (param * WEIGHT_DECAY)) + return param_t, m_t, v_t + + +def momentumw_update_numpy(param, g_t, m, lr=0.001, momentum=0.9, **_): + # v, t are not needed for momentum optimizer + m = momentum * m + g_t + param_t = param - lr * m - param * WEIGHT_DECAY + return param_t, m, None + + +class WeightDecayOptimizerTest(test.TestCase): + + def doTest(self, optimizer, update_fn, optimizer_name, slot_name, + use_resource=False, do_sparse=False): + for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]): + with self.test_session(graph=ops.Graph()): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + if use_resource: + var0 = resource_variable_ops.ResourceVariable( + var0_np, name="var0_%d" % i) + var1 = resource_variable_ops.ResourceVariable( + var1_np, name="var1_%d" % i) + else: + var0 = variables.Variable(var0_np) + var1 = variables.Variable(var1_np) + + if do_sparse: + grads0_np_indices = np.array([0, 1], dtype=np.int32) + grads0 = ops.IndexedSlices(constant_op.constant(grads0_np), + constant_op.constant(grads0_np_indices), + constant_op.constant([2])) + grads1_np_indices = np.array([0, 1], dtype=np.int32) + grads1 = ops.IndexedSlices(constant_op.constant(grads1_np), + constant_op.constant(grads1_np_indices), + constant_op.constant([2])) + else: + grads0 = constant_op.constant(grads0_np) + grads1 = constant_op.constant(grads1_np) + + opt = optimizer() + update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + + if not context.executing_eagerly(): + with ops.Graph().as_default(): + # Shouldn't return non-slot variables from other graphs. + self.assertEqual(0, len(opt.variables())) + self.evaluate(variables.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + + # Run 3 steps of the optimizer + for t in range(1, 4): + if not context.executing_eagerly(): + self.evaluate(update) + elif t > 1: + opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + + var0_np, m0, v0 = update_fn(var0_np, grads0_np, t=t, m=m0, v=v0) + var1_np, m1, v1 = update_fn(var1_np, grads1_np, t=t, m=m1, v=v1) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) + self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) + if use_resource: + self.assertEqual("var0_%d/%s:0" % (i, optimizer_name), + opt.get_slot(var=var0, name=slot_name).name) + + +class AdamWOptimizerTest(WeightDecayOptimizerTest): + + @staticmethod + def get_optimizer(): + return weight_decay_optimizers.AdamWOptimizer(WEIGHT_DECAY) + + def testSparse(self): + self.doTest(self.get_optimizer, adamw_update_numpy, "AdamW", "m", + use_resource=False, do_sparse=True) + + def testResourceSparse(self): + self.doTest(self.get_optimizer, adamw_update_numpy, "AdamW", "m", + use_resource=True, do_sparse=True) + + def testBasic(self): + self.doTest(self.get_optimizer, adamw_update_numpy, "AdamW", "m", + use_resource=False) + + @test_util.run_in_graph_and_eager_modes(reset_test=True) + def testResourceBasic(self): + self.doTest(self.get_optimizer, adamw_update_numpy, "AdamW", "m", + use_resource=True) + + +class MomentumWOptimizerTest(WeightDecayOptimizerTest): + + @staticmethod + def get_optimizer(): + return weight_decay_optimizers.MomentumWOptimizer(WEIGHT_DECAY, 0.001, 0.9) + + def testSparse(self): + self.doTest(self.get_optimizer, momentumw_update_numpy, "MomentumW", + "momentum", use_resource=False, do_sparse=True) + + def testResourceSparse(self): + self.doTest(self.get_optimizer, momentumw_update_numpy, "MomentumW", + "momentum", use_resource=True, do_sparse=True) + + def testBasic(self): + self.doTest(self.get_optimizer, momentumw_update_numpy, "MomentumW", + "momentum", use_resource=False) + + @test_util.run_in_graph_and_eager_modes(reset_test=True) + def testResourceBasic(self): + self.doTest(self.get_optimizer, momentumw_update_numpy, "MomentumW", + "momentum", use_resource=True) + + +class ExtendWithWeightDecayTest(WeightDecayOptimizerTest): + + @staticmethod + def get_optimizer(): + adamw = weight_decay_optimizers.extend_with_decoupled_weight_decay( + adam.AdamOptimizer) + return adamw(WEIGHT_DECAY) + + def testBasic(self): + self.doTest(self.get_optimizer, adamw_update_numpy, "Adam", "m", + use_resource=False) + + @test_util.run_in_graph_and_eager_modes(reset_test=True) + def testResourceBasic(self): + self.doTest(self.get_optimizer, adamw_update_numpy, "Adam", "m", + use_resource=True) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/solvers/python/ops/linear_equations.py b/tensorflow/contrib/solvers/python/ops/linear_equations.py index 9305c6a11c..85918bf850 100644 --- a/tensorflow/contrib/solvers/python/ops/linear_equations.py +++ b/tensorflow/contrib/solvers/python/ops/linear_equations.py @@ -28,7 +28,6 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops -from tensorflow.python.ops import linalg_ops def conjugate_gradient(operator, diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index a5d8b061b6..adda0b758b 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -49,7 +49,6 @@ tf_cuda_cc_test( tf_custom_op_library( name = "python/ops/_trt_engine_op.so", srcs = [ - "ops/trt_calib_op.cc", "ops/trt_engine_op.cc", ], deps = [ @@ -76,11 +75,9 @@ tf_cuda_library( cc_library( name = "trt_engine_op_kernel", srcs = [ - "kernels/trt_calib_op.cc", "kernels/trt_engine_op.cc", ], hdrs = [ - "kernels/trt_calib_op.h", "kernels/trt_engine_op.h", ], copts = tf_copts(), @@ -89,20 +86,22 @@ cc_library( ":trt_logging", ":trt_plugins", ":trt_resources", + ":trt_conversion", + ":utils", "//tensorflow/core:gpu_headers_lib", "//tensorflow/core:lib_proto_parsing", "//tensorflow/core:stream_executor_headers_lib", + "//tensorflow/core/grappler/costs:graph_properties", ] + if_tensorrt([ "@local_config_tensorrt//:nv_infer", ]) + tf_custom_op_library_additional_deps(), - # TODO(laigd) + # TODO(laigd): fix this by merging header file in cc file. alwayslink = 1, # buildozer: disable=alwayslink-with-hdrs ) tf_gen_op_libs( op_lib_names = [ "trt_engine_op", - "trt_calib_op", ], ) @@ -122,7 +121,6 @@ tf_gen_op_wrapper_py( name = "trt_engine_op", gen_locally = True, deps = [ - ":trt_calib_op_op_lib", ":trt_engine_op_op_lib", ":trt_logging", ":trt_shape_function", @@ -140,7 +138,6 @@ tf_custom_op_py_library( kernels = [ ":trt_engine_op_kernel", ":trt_engine_op_op_lib", - ":trt_calib_op_op_lib", ":trt_shape_function", ], srcs_version = "PY2AND3", @@ -191,7 +188,6 @@ tf_py_wrap_cc( deps = [ ":trt_conversion", ":trt_engine_op_kernel", - "//tensorflow/core:framework_lite", "//third_party/python_runtime:headers", ], ) @@ -211,6 +207,7 @@ tf_cuda_library( ], deps = [ ":trt_logging", + ":utils", "//tensorflow/core:framework_headers_lib", "//tensorflow/core:framework_lite", "//tensorflow/core:lib_proto_parsing", @@ -237,12 +234,12 @@ tf_cuda_library( ":trt_plugins", ":trt_logging", ":trt_resources", + ":utils", "//tensorflow/core/grappler/clusters:cluster", "//tensorflow/core/grappler/optimizers:custom_graph_optimizer", "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry", "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:utils", - "//tensorflow/core:framework", "//tensorflow/core:gpu_runtime", "//tensorflow/core:framework_lite", "//tensorflow/core:graph", @@ -343,3 +340,8 @@ py_test( "//tensorflow/python:framework_test_lib", ], ) + +cc_library( + name = "utils", + hdrs = ["convert/utils.h"], +) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index da4dd5a14c..4dc1c551cc 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -14,8 +14,8 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/contrib/tensorrt/convert/convert_graph.h" -#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h" +#include #include #include #include @@ -24,10 +24,17 @@ limitations under the License. #include #include "tensorflow/contrib/tensorrt/convert/convert_nodes.h" +#include "tensorflow/contrib/tensorrt/convert/utils.h" +#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h" +#include "tensorflow/contrib/tensorrt/resources/trt_resource_manager.h" +#include "tensorflow/contrib/tensorrt/resources/trt_resources.h" #include "tensorflow/contrib/tensorrt/segment/segment.h" #include "tensorflow/core/common_runtime/gpu/gpu_id.h" #include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h" #include "tensorflow/core/common_runtime/gpu/process_state.h" +#include "tensorflow/core/framework/function.h" +#include "tensorflow/core/framework/graph_to_functiondef.h" +#include "tensorflow/core/framework/node_def_builder.h" #include "tensorflow/core/graph/algorithm.h" #include "tensorflow/core/graph/graph.h" #include "tensorflow/core/graph/graph_constructor.h" @@ -39,17 +46,39 @@ limitations under the License. #include "tensorflow/core/grappler/utils.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/protobuf/config.pb.h" // NOLINT #include "tensorflow/core/protobuf/device_properties.pb.h" // NOLINT +#include "tensorflow/core/protobuf/rewriter_config.pb.h" // NOLINT +#include "tensorflow/core/util/device_name_utils.h" #if GOOGLE_CUDA #if GOOGLE_TENSORRT +#include "cuda/include/cuda_runtime_api.h" #include "tensorrt/include/NvInfer.h" - namespace tensorflow { namespace tensorrt { namespace convert { +using ::tensorflow::strings::StrAppend; +using ::tensorflow::strings::StrCat; + +// Returns compiled TRT version information {Maj, Min, Patch} +std::vector GetLinkedTensorRTVersion() { + return {NV_TENSORRT_MAJOR, NV_TENSORRT_MINOR, NV_TENSORRT_PATCH}; +} + +// Returns loaded TRT library version {Maj, Min, Patch} +std::vector GetLoadedTensorRTVersion() { + int ver = getInferLibVersion(); + int ver_major = ver / 1000; + ver = ver - ver_major * 1000; + int ver_minor = ver / 100; + int ver_patch = ver - ver_minor * 100; + return {ver_major, ver_minor, ver_patch}; +} + namespace { bool IsTensorRTCandidate(const tensorflow::Node* node) { @@ -82,229 +111,6 @@ bool IsTensorRTCandidate(const tensorflow::Node* node) { PluginFactoryTensorRT::GetInstance()->IsPlugin(node->type_string())); } -void GetSubGraphIncomingEdges(const tensorflow::Graph& graph, - const std::set& subgraph_node_ids, - tensorflow::EdgeSet* incoming_edges) { - for (int node_id : subgraph_node_ids) { - const tensorflow::Node* node = graph.FindNodeId(node_id); - for (const tensorflow::Edge* edge : node->in_edges()) { - if (!subgraph_node_ids.count(edge->src()->id()) && - !edge->src()->IsSource() && !edge->IsControlEdge()) { - incoming_edges->insert(edge); - VLOG(2) << "INCOMING " << edge->src()->name() << " -> " << node->name() - << " Y, "; - } else { - VLOG(2) << "INCOMING " << edge->src()->name() << " -> " << node->name() - << " N, "; - } - } - } -} - -void GetSubGraphOutgoingEdges(const tensorflow::Graph& graph, - const std::set& subgraph_node_ids, - tensorflow::EdgeSet* outgoing_edges) { - for (int node_id : subgraph_node_ids) { - const tensorflow::Node* node = graph.FindNodeId(node_id); - for (const tensorflow::Edge* edge : node->out_edges()) { - if (!subgraph_node_ids.count(edge->dst()->id()) && - !edge->dst()->IsSink() && !edge->IsControlEdge()) { - VLOG(2) << "OUTGOING " << node->name() << " -> " << edge->dst()->name() - << " Y, "; - outgoing_edges->insert(edge); - } else { - VLOG(2) << "OUTGOING " << node->name() << " -> " << edge->dst()->name() - << " N, "; - } - } - } -} - -std::pair ParseTensorName(const string& name, - int default_idx = 0) { - string name_no_idx = name; - int idx = default_idx; - const size_t sep = name_no_idx.find_last_of(':'); - if (sep != string::npos) { - name_no_idx = name_no_idx.substr(0, sep); - idx = std::stoi(name.substr(sep + 1)); - } - return std::make_pair(name_no_idx, idx); -} - -std::unordered_map> BuildTensorNameMap( - const std::vector& tensor_names) { - std::unordered_map> result; - for (const string& tensor_name : tensor_names) { - string node_name; - int index; - std::tie(node_name, index) = ParseTensorName(tensor_name); - result[node_name].push_back(index); - } - return result; -} - -// TODO(sami): convert references to pointers -struct ConvertGraphParams { - ConvertGraphParams( - tensorflow::Graph& inp_graph, - const std::vector& output_node_names, - const std::set& subgraph_node_id_numbers, - size_t max_supported_batch_size, size_t max_consumed_workspace_size_bytes, - const tensorflow::grappler::GraphProperties& current_graph_properties, - std::unordered_map>* output_edges, - int engine_precision_mode, const string& device_name, - std::shared_ptr allocator, int cuda_gpu_id) - : graph(inp_graph), - output_names(output_node_names), - subgraph_node_ids(subgraph_node_id_numbers), - max_batch_size(max_supported_batch_size), - max_workspace_size_bytes(max_consumed_workspace_size_bytes), - graph_properties(current_graph_properties), - output_edge_map(output_edges), - precision_mode(engine_precision_mode), - device_name_(device_name), - allocator_(allocator), - cuda_gpu_id_(cuda_gpu_id) {} - tensorflow::Graph& graph; - const std::vector& output_names; - const std::set& subgraph_node_ids; - size_t max_batch_size; - size_t max_workspace_size_bytes; - const tensorflow::grappler::GraphProperties& graph_properties; - std::unordered_map>* output_edge_map; - int precision_mode; - string device_name_; - std::shared_ptr allocator_; - int cuda_gpu_id_; - std::vector> subgraph_inputs; - std::vector> subgraph_outputs; - tensorflow::EdgeSet subgraph_incoming_edges; - tensorflow::EdgeSet subgraph_outgoing_edges; -}; - -static tensorflow::Status FillSubGraphEdgeSets(ConvertGraphParams* p) { - GetSubGraphIncomingEdges(p->graph, p->subgraph_node_ids, - &p->subgraph_incoming_edges); - - std::set> unique_tensors; - // Add only unique input source nodes. If output of an outside node is shared - // between multiple nodes inside the engine, only one edge should be created - for (const tensorflow::Edge* edge : p->subgraph_incoming_edges) { - unique_tensors.insert({edge->src()->id(), edge->src_output()}); - } - p->subgraph_inputs.insert(p->subgraph_inputs.begin(), unique_tensors.begin(), - unique_tensors.end()); - GetSubGraphOutgoingEdges(p->graph, p->subgraph_node_ids, - &p->subgraph_outgoing_edges); - unique_tensors.clear(); - // Similar to above, if multiple ouside nodes are sharing the output of an - // internal node only one output port should be created and shared between - // outputs - for (const tensorflow::Edge* edge : p->subgraph_outgoing_edges) { - unique_tensors.insert({edge->src()->id(), edge->src_output()}); - } - p->subgraph_outputs.reserve(unique_tensors.size()); - p->subgraph_outputs.insert(p->subgraph_outputs.begin(), - unique_tensors.begin(), unique_tensors.end()); - return tensorflow::Status::OK(); -} - -tensorflow::Status GetCalibNode(ConvertGraphParams* params) { - TF_RETURN_IF_ERROR(FillSubGraphEdgeSets(params)); - tensorflow::NodeDef trt_node_def; - SubGraphParams s(params->graph, params->subgraph_node_ids, - params->subgraph_inputs, params->subgraph_outputs, - params->max_batch_size, params->max_workspace_size_bytes, - params->graph_properties, params->output_edge_map, - &trt_node_def, params->precision_mode, params->device_name_, - params->allocator_, params->cuda_gpu_id_); - TF_RETURN_IF_ERROR(InjectCalibrationNode(s)); - tensorflow::Status status; - tensorflow::Node* trt_node = params->graph.AddNode(trt_node_def, &status); - - TF_RETURN_IF_ERROR(status); - - for (auto in_edge : - params->subgraph_incoming_edges) { // loop over incoming edges and - // attach them to calib node - auto src_output = in_edge->src_output(); - auto dst_node = in_edge->dst(); - auto dst_input = in_edge->dst_input(); - VLOG(1) << " update edge " << trt_node->name() << ":" << src_output - << " -> " << dst_node->name() << ":" << dst_input; - TF_RETURN_IF_ERROR( - params->graph.UpdateEdge(trt_node, src_output, dst_node, dst_input)); - } - return tensorflow::Status::OK(); -} - -tensorflow::Status ConvertSubGraphToTensorRT(ConvertGraphParams* params) { - TF_RETURN_IF_ERROR(FillSubGraphEdgeSets(params)); - tensorflow::NodeDef trt_node_def; - - SubGraphParams s(params->graph, params->subgraph_node_ids, - params->subgraph_inputs, params->subgraph_outputs, - params->max_batch_size, params->max_workspace_size_bytes, - params->graph_properties, params->output_edge_map, - &trt_node_def, params->precision_mode, params->device_name_, - params->allocator_, params->cuda_gpu_id_); - TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRTNodeDef(s)); - tensorflow::Status status; - tensorflow::Node* trt_node = params->graph.AddNode(trt_node_def, &status); - - // AddNode does not wire edges. - // Re-map incoming edges to use the new TRT node instead of the orig subgraph - std::map, int> subgraph_edge_to_input_map; - for (size_t i = 0; i < params->subgraph_inputs.size(); ++i) { - subgraph_edge_to_input_map.insert({params->subgraph_inputs.at(i), i}); - } - std::set> unique_tensors; - for (const tensorflow::Edge* edge : params->subgraph_incoming_edges) { - std::pair old_src = {edge->src()->id(), edge->src_output()}; - if (unique_tensors.count(old_src)) continue; - unique_tensors.insert(old_src); - int new_src_output = subgraph_edge_to_input_map.at(old_src); - params->graph.AddEdge(edge->src(), edge->src_output(), trt_node, - new_src_output); - VLOG(1) << "Wire " << edge->src()->name() << ":" << edge->src_output() - << " -> " << trt_node->name() << ":" << new_src_output; - params->graph.RemoveEdge(edge); - } - if (VLOG_IS_ON(2)) { - VLOG(2) << "new edge count: " << trt_node->in_edges().size(); - for (const tensorflow::Edge* edge : trt_node->in_edges()) { - VLOG(2) << edge->src()->name() << " port: " << edge->src_output(); - } - } - TF_RETURN_IF_ERROR(status); - - // Re-map outgoing edges to use the new TRT node instead of the orig subgraph - std::map, int> subgraph_edge_to_output_map; - for (size_t i = 0; i < params->subgraph_outputs.size(); ++i) { - subgraph_edge_to_output_map.insert({params->subgraph_outputs.at(i), i}); - } - TF_RETURN_IF_ERROR(status); - for (const tensorflow::Edge* edge : params->subgraph_outgoing_edges) { - std::pair old_src = {edge->src()->id(), edge->src_output()}; - int new_src_output = subgraph_edge_to_output_map.at(old_src); - TF_RETURN_IF_ERROR(params->graph.UpdateEdge( - trt_node, new_src_output, edge->dst(), edge->dst_input())); - VLOG(1) << "Wire " << trt_node->name() << ":" << new_src_output << " -> " - << edge->dst()->name() << ":" << edge->dst_input(); - } - // Remove the original subgraph - for (int node_id : params->subgraph_node_ids) { - tensorflow::Node* node = params->graph.FindNodeId(node_id); - // Don't remove the input placeholders - if (node->type_string() == "Placeholder") { - continue; - } - params->graph.RemoveNode(node); - } - return tensorflow::Status::OK(); -} - tensorflow::Status BuildNodeMap( const tensorflow::Graph& graph, std::unordered_map* node_map) { @@ -318,51 +124,77 @@ tensorflow::Status BuildNodeMap( } } // namespace + +// Function to get calibration from ResourceMgr and put them into nodedef. tensorflow::Status ConvertCalibGraphToInferGraph( - const tensorflow::GraphDef& graph_def, tensorflow::GraphDef* infer_graph) { + const tensorflow::GraphDef& graph_def, tensorflow::GraphDef* infer_graph, + bool is_dyn_op) { VLOG(0) << "Starting Calib Conversion"; - tensorflow::Graph graph(tensorflow::OpRegistry::Global()); - TF_RETURN_IF_ERROR(tensorflow::ConvertGraphDefToGraph( - tensorflow::GraphConstructorOptions(), graph_def, &graph)); - // get calib nodes - std::vector calib_nodes; - std::vector topo_order; - tensorflow::GetPostOrder(graph, &topo_order); - for (auto rit = topo_order.rbegin(); rit != topo_order.rend(); ++rit) { - auto node = *rit; - if (node->type_string() == "TRTCalibOp") { - VLOG(1) << "Found Calib Node " << node->name(); - calib_nodes.push_back(node); - } + infer_graph->CopyFrom(graph_def); + auto trt_rm = TRTResourceManager::instance(); + auto calib_rm = trt_rm->getManager("TRTCalibration"); + int num_nodes = infer_graph->node_size(); + if (!is_dyn_op) { + LOG(WARNING) << "Construction of static int8 engine is not implemented " + "yet!. Dynamic engine will be constructed"; } - VLOG(0) << "Num Calib nodes in graph= " << calib_nodes.size(); - if (calib_nodes.size() == 0) - return tensorflow::errors::FailedPrecondition( - "Graph doesn't contain any calibration nodes!." - " Please generate calibration graph and run calibration first"); - for (auto n : calib_nodes) { - TF_RETURN_IF_ERROR( - tensorrt::convert::ConvertCalibrationNodeToEngineNode(graph, n)); + for (int i = 0; i < num_nodes; ++i) { + auto n = infer_graph->mutable_node(i); + if (n->op() == "TRTEngineOp") { + VLOG(1) << "Processing " << n->name(); + string container_name = n->attr().at("segment_funcdef_name").s(); + TRTCalibrationResource* cres = nullptr; + auto status = calib_rm->Lookup(container_name, "Calibrator", &cres); + if (!status.ok()) { + LOG(ERROR) << "Could not get Calibration information. Did you run with " + "calibration data?"; + return tensorflow::errors::FailedPrecondition( + "Need to run graph with calibration data first!"); + } + if (cres->calibrator_) { + cres->calibrator_->setDone(); + cres->thr_->join(); + const auto& calibration_table = + cres->calibrator_->getCalibrationTableAsString(); + if (!calibration_table.size()) { + LOG(ERROR) << "Calibration table is empty"; + return tensorflow::errors::Unknown( + "Calibration table is missing. This shouldn't have happened!"); + } + n->mutable_attr()->at("calibration_data").set_s(calibration_table); + } else { + LOG(ERROR) << "Can't get TRTCalibrator from resource manager!"; + return tensorflow::errors::Unknown( + "Can't get TRTCalibrator from resource manager!"); + } + cres->Unref(); + } } - graph.ToGraphDef(infer_graph); return tensorflow::Status::OK(); } +// Entry function from Python. tensorflow::Status ConvertGraphDefToTensorRT( const tensorflow::GraphDef& graph_def, const std::vector& output_names, size_t max_batch_size, size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def, - int precision_mode = FP32MODE, int minimum_segment_size = 3) { + int precision_mode, int minimum_segment_size, bool is_dyn_op, + int max_cached_engines, std::vector cached_engine_batches) { // optimization pass tensorflow::grappler::GrapplerItem item; item.fetch = output_names; item.graph = graph_def; - + // grappler requires a virtual cluster with a proper GPU device + // in order to calculate flops>0 or fails with FATAL + // We add numbers from a Pascal card here to have flops>0 tensorflow::DeviceProperties device_properties; device_properties.set_type("GPU"); device_properties.mutable_environment()->insert({"architecture", "6"}); - tensorflow::grappler::Cluster* cluster = - new tensorflow::grappler::VirtualCluster({{"/GPU:0", device_properties}}); + device_properties.set_num_cores(3584); + device_properties.set_frequency(1531); + std::unique_ptr cluster( + new tensorflow::grappler::VirtualCluster( + {{"/GPU:0", device_properties}})); // single machine int num_cpu_cores = tensorflow::grappler::GetNumAvailableLogicalCPUCores(); @@ -370,134 +202,633 @@ tensorflow::Status ConvertGraphDefToTensorRT( VLOG(2) << "cpu_cores: " << num_cpu_cores; VLOG(2) << "gpus: " << num_gpus; tensorflow::RewriterConfig rw_cfg; + // use only const folding and layout for the time being since new optimizers + // break the graph for us + rw_cfg.add_optimizers("constfold"); + rw_cfg.add_optimizers("layout"); + rw_cfg.set_meta_optimizer_iterations(tensorflow::RewriterConfig::ONE); tensorflow::grappler::MetaOptimizer meta_opt(nullptr, rw_cfg); tensorflow::GraphDef gdef; - TF_RETURN_IF_ERROR(meta_opt.Optimize(cluster, item, &gdef)); + TF_RETURN_IF_ERROR(meta_opt.Optimize(cluster.get(), item, &gdef)); item.graph = gdef; // AJ refactoring shape inference through grappler/GraphProperties. tensorflow::grappler::GraphProperties static_graph_properties(item); TF_RETURN_IF_ERROR(static_graph_properties.InferStatically(true)); // Build full graph - - return ConvertAfterShapes(gdef, output_names, max_batch_size, - max_workspace_size_bytes, new_graph_def, - precision_mode, minimum_segment_size, - static_graph_properties, nullptr); + ConversionParams cp; + cp.input_graph_def = &gdef; + cp.output_names = &output_names; + cp.max_batch_size = max_batch_size; + cp.output_graph_def = new_graph_def; + cp.precision_mode = precision_mode; + cp.is_dyn_op = is_dyn_op; + cp.max_cached_engines = max_cached_engines; + cp.cached_engine_batches = cached_engine_batches; + cp.minimum_segment_size = minimum_segment_size; + cp.graph_properties = &static_graph_properties; + cp.max_workspace_size_bytes = max_workspace_size_bytes; + if (VLOG_IS_ON(5)) { + std::fstream f; + f.open("TRTConversionInput.pb", + std::fstream::out | std::fstream::binary | std::fstream::trunc); + f << gdef.SerializeAsString(); + f.close(); + } + return ConvertAfterShapes(cp); } -tensorflow::Status ConvertAfterShapes( - const tensorflow::GraphDef& gdef, const std::vector& output_names, - size_t max_batch_size, size_t max_workspace_size_bytes, - tensorflow::GraphDef* new_graph_def, int precision_mode, - int minimum_segment_size, +// Function to get subsegment information structure. +tensorflow::Status GetEngineInfo( + const tensorflow::Graph* g, const tensorflow::grappler::GraphProperties& graph_properties, - const tensorflow::grappler::Cluster* cluster) { - // Segment the graph into subgraphs that can be converted to TensorRT - tensorflow::tensorrt::segment::SegmentOptions segment_options; + const std::set& segment_nodes, + const std::unordered_map& node_map, + const std::vector& reverse_topo_order, + EngineInfo* info) { + std::vector subgraph_node_ids; + std::set segment_devices; + int input_port = 0; + int output_port = 0; + + // Map from src_node_name+port to the unique port numbers of the TRT op, where + // the src_node_name is the name of the source node of the input/output + // edge, thus there must not be any duplicates since source nodes of + // input/output edges must be in different split of the graph. + // TODO(aaroey): consider using node id and port instead. + std::unordered_map created_edges; + for (auto it = reverse_topo_order.rbegin(); it != reverse_topo_order.rend(); + ++it) { + const auto& node_name = (*it)->name(); + + if (segment_nodes.count(node_name) == 0) continue; + auto node = node_map.at(node_name); + auto node_device = node->requested_device(); + if (!node_device.empty()) { + segment_devices.insert(node_device); + } else { + if (node->has_assigned_device_name()) { + segment_devices.insert(node->assigned_device_name()); + } else { + VLOG(2) << "Node " << node->name() + << " neither have requested device nor assigned device"; + } + } + int node_id = node->id(); + subgraph_node_ids.push_back(node_id); + for (const auto edge : node->in_edges()) { + auto input_node = edge->src(); + if (segment_nodes.count(input_node->name()) == 0) { + // Add constant input node into the segment. We don't care if it has + // other output edges going into other engines or TF nodes. Since we add + // it only to the subsegment node list, not the subsegment itself, it + // won't be removed from the graph. If it doesn't have any edges, TF + // will prune it out. + if (input_node->type_string() == "Const") { + subgraph_node_ids.push_back(input_node->id()); + } else if (!edge->IsControlEdge() && !input_node->IsSource()) { + string s(input_node->name()); + StrAppend(&s, ":", edge->src_output()); + VLOG(1) << "Input edge = " << s; + int port = input_port; + if (created_edges.count(s)) { + port = created_edges.at(s); + } else { + created_edges.insert({s, port}); + input_port++; + } + info->connections.emplace_back(input_node->name(), input_node->id(), + edge->src_output(), node_name, node_id, + edge->dst_input(), true, port); + } + } + } + for (const auto edge : node->out_edges()) { + auto output_node = edge->dst(); + if (segment_nodes.count(output_node->name()) == 0 && + !edge->IsControlEdge() && !output_node->IsSink()) { + string s(node_name); + StrAppend(&s, ":", edge->src_output()); + VLOG(1) << "Output edge = " << s; + int port = output_port; + if (created_edges.count(s)) { + port = created_edges.at(s); + } else { + created_edges.insert({s, port}); + output_port++; + } + info->connections.emplace_back(output_node->name(), output_node->id(), + edge->dst_input(), node_name, node_id, + edge->src_output(), false, port); + } + } + } + + TF_RETURN_IF_ERROR(ConvertSegmentToGraphDef( + g, graph_properties, subgraph_node_ids, &info->connections, + &info->segment_graph_def, &info->engine_name)); + // TODO(sami): This should not happen once segmenter is updated. + if (segment_devices.size() == 1) { + info->device = *segment_devices.begin(); + } else if (segment_devices.size() > 1) { + LOG(WARNING) << "Detected multiple(" << segment_devices.size() + << ") devices for the segment. Picking first one to continue " + << "but this shouldn't have happened"; + info->device = *segment_devices.begin(); + } else { + VLOG(1) << "Segment devices size is 0"; + } + return Status::OK(); +} + +// Function to insert a TRT node into the graph. The graph is not modified if +// the returned status is not ok. +// 'alloc' is only used for creating static engine. +tensorflow::Status CreateTRTNode(tensorflow::Graph* graph, + const std::vector& infos, int pos, + nvinfer1::IGpuAllocator* alloc, + int max_batch_size) { + const auto& info = infos.at(pos); + std::vector out_shapes; + std::vector input_shapes; + std::vector shapes; + std::vector inputs; + std::vector out_types; + VLOG(1) << "Processing " << info.engine_name; + + // Update the shape and data types of input/output nodes, and find all unique + // inputs. + for (const auto& conn : info.connections) { + if (!conn.is_input_edge) { + // Set the shapes and data types of output edge. + tensorflow::TensorShapeProto out_shape; + // shape of the output node inside segment + conn.inside_shape.AsProto(&out_shape); + if (out_shapes.size() <= conn.port_number) { + out_shapes.resize(conn.port_number + 1); + out_types.resize(conn.port_number + 1); + } + out_shapes.at(conn.port_number) = out_shape; + out_types.at(conn.port_number) = conn.connection_type; + continue; + } + + // Set the shapes and data types of input edge. + tensorflow::TensorShapeProto in_shape; + conn.outside_shape.AsProto(&in_shape); + if (input_shapes.size() <= conn.port_number) { + input_shapes.resize(conn.port_number + 1); + shapes.resize(conn.port_number + 1); + } + input_shapes.at(conn.port_number) = in_shape; + shapes.at(conn.port_number) = conn.outside_shape; + + string input_node = conn.outside_node_name; + int input_port = conn.outside_port; + bool found_engine = false; + // Rewire the inputs to other engines if they contain original input node. + // Note that we use the information of the engine here, not the information + // of the created TRT nodes, so we're able to find all the connections to + // any other engines beforehand. + for (size_t t = 0; t < infos.size(); ++t) { + if (t == pos) continue; + auto& engine_info = infos.at(t); + for (const auto& eng_conn : engine_info.connections) { + if (eng_conn.is_input_edge) continue; + if (eng_conn.inside_node_name == input_node) { + input_node = engine_info.engine_name; + if (eng_conn.inside_port == input_port) { + input_port = eng_conn.port_number; + found_engine = true; + break; + } + } + } + if (found_engine) break; + } + VLOG(1) << "Engine Input " << input_node << ":" << input_port << " -> " + << info.engine_name << ":" << inputs.size(); + // Skip duplicate inputs. + bool new_input = true; + for (const auto& inp : inputs) { + if (inp.node == input_node && inp.index == input_port) { + new_input = false; + break; + } + } + if (new_input) { + inputs.emplace_back(input_node, input_port, conn.connection_type); + } + } + + // Build the engine and get its serialized representation. + string segment_string; + if (info.engine_type == EngineInfo::EngineType::TRTStatic || + info.precision_mode == INT8MODE) { + // Create static engine for fp32/fp16 mode, and test validity of the engine + // for int8 mode. We don't want engine to fail at the calibration time. + // So we are constructing a FP32 engine here to check its validity, and if + // it is a valid engine then we put the serialized graphdef to the op. + // Otherwise we skip node creation for this engine. + Logger trt_logger; + TrtUniquePtrType engine; + // TODO(sami): What happens if 1st dim is not batch? + TF_RETURN_IF_ERROR(ConvertGraphDefToEngine( + info.segment_graph_def, + info.precision_mode == INT8MODE ? FP32MODE : info.precision_mode, + max_batch_size, info.max_workspace_size_bytes, shapes, &trt_logger, + alloc, /*calibrator=*/nullptr, &engine, + /*convert_successfully=*/nullptr)); + TrtUniquePtrType engine_data(engine->serialize()); + segment_string = + string((const char*)engine_data->data(), engine_data->size()); + if (info.precision_mode == INT8MODE) { + // See above comment about why not putting this inside the 'else' branch. + segment_string = info.segment_graph_def.SerializeAsString(); + } + } else { + segment_string = info.segment_graph_def.SerializeAsString(); + } + + // TODO(aaroey): use enum instead, and add a helper method to do the + // conversion. + string prec_string; + switch (info.precision_mode) { + case FP32MODE: + prec_string = "FP32"; + break; + case FP16MODE: + prec_string = "FP16"; + break; + case INT8MODE: + prec_string = "INT8"; + if (!TRTResourceManager::instance()->getManager("TRTCalibration")) { + LOG(ERROR) << "Failed to construct calibration storage"; + } + break; + default: + return tensorflow::errors::OutOfRange("Unknown precision mode"); + } + tensorflow::NodeDefBuilder node_builder(info.engine_name, "TRTEngineOp"); + if (!info.device.empty()) node_builder.Device(info.device); + if (VLOG_IS_ON(1)) { + string ins = StrCat(info.engine_name, " inputs= "); + for (const auto& ii : inputs) { + StrAppend(&ins, ii.node, ":", ii.index, " "); + } + VLOG(1) << ins; + } + node_builder.Input(inputs); + if (info.engine_type == EngineInfo::EngineType::TRTStatic && + info.cached_engine_batches.size()) { + LOG(WARNING) << "Cached engine batches are ignored for static engines"; + } + tensorflow::NodeDef trt_node; + tensorflow::Status status = + node_builder.Attr("input_shapes", input_shapes) + .Attr("output_shapes", out_shapes) + .Attr("static_engine", + info.engine_type == EngineInfo::EngineType::TRTStatic) + .Attr("segment_funcdef_name", + StrCat(info.engine_name, "_native_segment")) + .Attr("serialized_segment", segment_string) + .Attr("calibration_data", "") + .Attr("max_cached_engines_count", info.maximum_cached_engines) + .Attr("cached_engine_batches", {max_batch_size}) + .Attr("workspace_size_bytes", info.max_workspace_size_bytes) + .Attr("precision_mode", prec_string) + .Attr("OutT", out_types) + .Finalize(&trt_node); + if (!status.ok()) { + LOG(ERROR) << "Node construction failed with" << status; + return status; + } + VLOG(1) << "Adding TRTEngine " << info.engine_name << " to graph"; + + // Up until this point, graph is not modified. If we return !status.ok() from + // here, this segment will be skipped + tensorflow::Node* engine_node = graph->AddNode(trt_node, &status); + if (!status.ok()) { + LOG(ERROR) << "Adding node failed " << status; + return status; + } + // Updates the inputs of output edges destination nodes, and point them to the + // engine node. + for (auto& conn : info.connections) { + if (conn.is_input_edge) continue; + VLOG(1) << " Updating DBG " << engine_node->name() << " out_port " + << conn.port_number << " out_id " << conn.outside_id + << " name=" << conn.outside_node_name; + auto dst_node = graph->FindNodeId(conn.outside_id); + // dst_node can only be removed if it is an input node of another engine. + // In this case, other engines input edge is updated in nodedef to point to + // this engine. Even though edge doesn't exists in the graph, when it is + // deserialized again, correct edges will be constructed. This is a problem + // of graph->AddNode(). + if (!dst_node) continue; + VLOG(1) << "Updating " << engine_node->name() << ":" << conn.port_number + << " to " << dst_node->name() << ":" << conn.outside_port; + auto new_edge = graph->AddEdge(engine_node, conn.port_number, dst_node, + conn.outside_port); + CHECK(new_edge) << "Adding a new edge failed " << engine_node->name() << ":" + << conn.port_number << " -> " << dst_node->name() << ":" + << conn.outside_port; + } + return status; +} + +// Function to construct a funcdef from the segment and add it to the graph. +tensorflow::Status RegisterSegmentFunctionToFunctionLibrary( + tensorflow::Graph* graph, const tensorflow::GraphDef& segment, + const string& name) { + tensorflow::Graph sgraph(graph->flib_def()); + tensorflow::GraphConstructorOptions gcopts; + TF_RETURN_IF_ERROR( + tensorflow::ConvertGraphDefToGraph(gcopts, segment, &sgraph)); + std::map io_nodes; + int num_inputs = 0; + for (auto n : sgraph.op_nodes()) { + if (tensorflow::str_util::StartsWith(n->name(), kInputPHName)) { + num_inputs++; + io_nodes.insert({n->name(), n}); + } else if (tensorflow::str_util::StartsWith(n->name(), kOutputPHName)) { + io_nodes.insert({n->name(), n}); + } + } + + for (int i = 0; i < num_inputs; ++i) { + auto name = StrCat(kInputPHName, i); + auto node = io_nodes[name]; + tensorflow::NodeDef nd; + tensorflow::NodeDefBuilder node_builder( + StrCat(name, "_Arg"), tensorflow::FunctionLibraryDefinition::kArgOp); + VLOG(1) << "Adding " << StrCat(name, "_Arg"); + TF_RETURN_IF_ERROR(node_builder.Attr("T", node->output_type(0)) + .Attr("index", i) + .Finalize(&nd)); + tensorflow::Status s; + auto node_arg = sgraph.AddNode(nd, &s); + if (!s.ok()) { + LOG(ERROR) << "Couldn't add _Arg node for " << name; + } + for (auto edge : node->out_edges()) { + sgraph.AddEdge(node_arg, 0, edge->dst(), edge->dst_input()); + VLOG(1) << "Updating funcdef input " << node_arg->name() << ":" << 0 + << " - > " << edge->dst()->name() << ":" << edge->dst_input(); + if (!s.ok()) { + LOG(ERROR) << "Failed to update edge from " << node_arg->name() + << " to " << edge->dst()->name() << ":" << edge->dst_input(); + } + } + sgraph.RemoveNode(node); + } + + for (int i = 0; i < io_nodes.size() - num_inputs; ++i) { + auto name = StrCat(kOutputPHName, i); + auto node = io_nodes[name]; + tensorflow::NodeDef nd; + tensorflow::NodeDefBuilder node_builder( + StrCat(name, "_Ret"), tensorflow::FunctionLibraryDefinition::kRetOp); + auto edge = *(node->in_edges().begin()); + tensorflow::NodeDefBuilder::NodeOut nout( + edge->src()->name(), edge->src_output(), + edge->src()->output_type(edge->src_output())); + VLOG(1) << " input " << nout.node << ":" << nout.index + << " dtype=" << tensorflow::DataTypeString(nout.data_type); + node_builder.Input({nout}); + TF_RETURN_IF_ERROR(node_builder.Attr("T", node->output_type(0)) + .Attr("index", i) + .Finalize(&nd)); + if (VLOG_IS_ON(3)) { + VLOG(3) << nd.DebugString(); + } + tensorflow::Status s; + auto node_ret = sgraph.AddNode(nd, &s); + if (!s.ok()) { + LOG(ERROR) << "Couldn't add _Ret node for " << name; + } + VLOG(1) << "Update edge from " << edge->src()->name() << ":" + << edge->src_output() << " - > " << node_ret->name() << ":" << 0; + sgraph.AddEdge(edge->src(), edge->src_output(), node_ret, 0); + s = sgraph.UpdateEdge(edge->src(), edge->src_output(), node_ret, 0); + if (!s.ok()) { + LOG(ERROR) << "Failed to update edge from " << edge->src()->name() << ":" + << edge->src_output() << " - > " << node_ret->name() << ":" + << 0; + } + sgraph.RemoveNode(node); + } + tensorflow::FunctionDefLibrary fdeflib; + auto native_segment = fdeflib.add_function(); + TF_RETURN_IF_ERROR(tensorflow::GraphToFunctionDef( + sgraph, StrCat(name, "_native_segment"), native_segment)); + if (VLOG_IS_ON(7)) { + VLOG(7) << name << " Function_Def "; + VLOG(7) << native_segment->DebugString(); + } + VLOG(1) << "Adding funcdef to graphlib"; + TF_RETURN_IF_ERROR(graph->AddFunctionLibrary(fdeflib)); + return tensorflow::Status::OK(); +} + +std::pair GetDeviceAndAllocator( + ConversionParams& params, EngineInfo& engine) { + int cuda_device_id = -1; + auto check_device_id = [](int tfid) -> int { + tensorflow::TfGpuId tf_gpu_id(tfid); + CudaGpuId cuda_gpu_id; + Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id); + if (s.ok()) { + VLOG(1) << "Found TF GPU " << tf_gpu_id.value() << " at cuda device " + << cuda_gpu_id.value(); + return cuda_gpu_id.value(); + } + VLOG(2) << "TF GPU with id " << tfid << " do not exist " << s; + return -1; + }; + tensorflow::Allocator* dev_allocator = nullptr; + // we need to us PM here since in python path there is no way to get + // to allocators. + // TODO(sami): when grappler devices become available else path will not be + // necessary + auto pm = tensorflow::ProcessState::singleton(); + if (params.cluster) { // get allocator + tensorflow::Device* device = nullptr; + if (params.cluster->GetDeviceSet()) { + device = params.cluster->GetDeviceSet()->FindDeviceByName(engine.device); + } + if (device) { + tensorflow::AllocatorAttributes alloc_attr; + dev_allocator = device->GetAllocator(alloc_attr); + VLOG(1) << "Using allocator " << dev_allocator->Name(); + } else { + LOG(WARNING) << "Cluster is set but device '" << engine.device + << "' is not found in the cluster"; + } + } else { // cluster not found, possibly a python call + VLOG(1) << "Cluster is not set, probably called from python"; + int found_device = 0; + bool try_gpu_ids = true; + // if device is set, try to find the device. Might be a problem for multi + // host case but TensorRT do not support multi host setups yet. + if (!engine.device.empty()) { + DeviceNameUtils::ParsedName parsed_name; + if (DeviceNameUtils::ParseFullName(engine.device, &parsed_name)) { + cuda_device_id = parsed_name.has_id ? parsed_name.id : -1; + } + try_gpu_ids = !parsed_name.has_id; + } + if (try_gpu_ids) { + while (found_device < 100) { + cuda_device_id = check_device_id(found_device); + if (cuda_device_id >= 0) break; + found_device++; + } + } + if (found_device == 100) { + LOG(ERROR) << " Can't find a GPU device to work with. Please " + "instantiate a session to initialize devices"; + return std::make_pair(cuda_device_id, dev_allocator); + } + LOG(WARNING) + << "Can't determine the device, constructing an allocator at device " + << found_device; + tensorflow::GPUOptions gpuoptions; + // this will be a noop if device is already initialized + gpuoptions.set_allow_growth(true); + tensorflow::TfGpuId tf_gpu_id(found_device); + dev_allocator = pm->GetGPUAllocator(gpuoptions, tf_gpu_id, 1); + } + return std::make_pair(cuda_device_id, dev_allocator); +} + +// Entry function from optimization pass. +tensorflow::Status ConvertAfterShapes(ConversionParams& params) { + // Convert graphdef to graph. tensorflow::FunctionLibraryDefinition flib(tensorflow::OpRegistry::Global(), - gdef.library()); + params.input_graph_def->library()); tensorflow::Graph graph(flib); TF_RETURN_IF_ERROR(tensorflow::ConvertGraphDefToGraph( - tensorflow::GraphConstructorOptions(), gdef, &graph)); + tensorflow::GraphConstructorOptions(), *params.input_graph_def, &graph)); + // Segment the graph into subgraphs that can be converted to TensorRT + tensorflow::tensorrt::segment::SegmentOptions segment_options; // TODO(ben,jie,sami): exclude output nodes (DISCUSS IT) - for (auto node : output_names) { + for (auto node : *(params.output_names)) { segment_options.exclude_node_list.insert(node); } - - // TODO(sami): this should be passed as a knob!!!! - segment_options.minimum_segment_size = minimum_segment_size; - tensorflow::tensorrt::segment::SegmentNodesVector segments; + segment_options.minimum_segment_size = params.minimum_segment_size; + tensorflow::tensorrt::segment::SegmentNodesVector initial_segments; TF_RETURN_IF_ERROR(tensorrt::segment::SegmentGraph( - &graph, IsTensorRTCandidate, segment_options, &segments)); - if (segments.size() > 1) { - VLOG(0) << "MULTIPLE tensorrt candidate conversion: " << segments.size(); + &graph, IsTensorRTCandidate, segment_options, &initial_segments)); + if (initial_segments.size() > 1) { + VLOG(0) << "MULTIPLE tensorrt candidate conversion: " + << initial_segments.size(); } + + // Get the EngineInfo for each segment. std::unordered_map node_map; TF_RETURN_IF_ERROR(BuildNodeMap(graph, &node_map)); - std::unordered_map> output_edge_map; - int count = 0; float total_num_nodes_in_segments = 0.; - for (auto s : segments) { - total_num_nodes_in_segments += s.first.size(); - } - // We create the map here since cluster may not be available in all cases. - std::map name_to_device_map; - if (cluster) { - // TODO(aaroey): consider using DeviceSet::FindDeviceByName(), as in a - // distributed environment, devices from different workers can have same - // short name. - for (const auto dm : cluster->GetDeviceSet()->devices()) { - name_to_device_map[dm->name()] = dm; + std::vector engine_segments; + engine_segments.reserve(initial_segments.size()); + std::vector reverse_topo_order; + tensorflow::GetPostOrder(graph, &reverse_topo_order); + size_t total_engine_bytes_size = 0; + std::vector engine_bytes_size; + tensorflow::tensorrt::segment::SegmentNodesVector converted_segments; + converted_segments.reserve(initial_segments.size()); + for (size_t t = 0; t < initial_segments.size(); t++) { + auto& curr_segment = initial_segments.at(t); + EngineInfo curr_engine; + Status status = + GetEngineInfo(&graph, *params.graph_properties, curr_segment.first, + node_map, reverse_topo_order, &curr_engine); + if (!status.ok()) { + LOG(WARNING) << "Failed to get engine info for segment " << t << ": " + << status; + continue; } - } - for (const auto& segment_nodes_and_device : segments) { - const std::set& subgraph_node_names = - segment_nodes_and_device.first; - std::set subgraph_node_ids; - size_t max_mem_per_engine = - max_workspace_size_bytes * - ((float)subgraph_node_names.size() / total_num_nodes_in_segments); - std::stringstream oss; - for (const string& node_name : subgraph_node_names) { - oss << " " << node_name; - subgraph_node_ids.insert(node_map.at(node_name)->id()); + curr_engine.precision_mode = params.precision_mode; + curr_engine.engine_type = + (params.is_dyn_op || params.precision_mode == INT8MODE + ? EngineInfo::EngineType::TRTDynamic + : EngineInfo::EngineType::TRTStatic); + curr_engine.cached_engine_batches = params.cached_engine_batches; + curr_engine.maximum_cached_engines = params.max_cached_engines; + StrAppend(&curr_engine.engine_name, "my_trt_op_", t); + status = RegisterSegmentFunctionToFunctionLibrary( + &graph, curr_engine.segment_graph_def, curr_engine.engine_name); + if (!status.ok()) { + LOG(WARNING) << "Failed to register segment graphdef as a function " << t + << ": " << status; + continue; } - VLOG(1) << "Subgraph nodes at device " << segment_nodes_and_device.second - << " : " << oss.str(); - auto target_device = - name_to_device_map.find(segment_nodes_and_device.second); - std::shared_ptr allocator(0); + engine_bytes_size.push_back(curr_engine.segment_graph_def.ByteSizeLong()); + total_engine_bytes_size += engine_bytes_size.back(); + total_num_nodes_in_segments += curr_segment.first.size(); + engine_segments.push_back(std::move(curr_engine)); + converted_segments.push_back(std::move(curr_segment)); + + if (VLOG_IS_ON(8)) { + string fname = curr_engine.engine_name; + StrAppend(&fname, ".pb"); + std::fstream f; + f.open(fname.c_str(), std::fstream::out | std::fstream::binary); + f << engine_segments.at(t).segment_graph_def.SerializeAsString(); + f.close(); + } + } + + // Create a TRT node for each segment using its EngineInfo. + int old_cuda_device = 0; + auto err = cudaGetDevice(&old_cuda_device); + if (err != cudaSuccess) { + LOG(ERROR) << "Couldn't get current device: " << cudaGetErrorString(err); + } + VLOG(1) << "Current cuda device is " << old_cuda_device; + for (int i = 0; i < engine_segments.size(); ++i) { + auto& engine = engine_segments.at(i); + // Partition the workspace size by the average of node ratio and segment + // graphdef size + engine.max_workspace_size_bytes = + params.max_workspace_size_bytes * + (engine_bytes_size.at(i) / total_engine_bytes_size + + converted_segments.at(i).first.size() / total_num_nodes_in_segments) / + 2.0; + // The allocator is used to build the engine. The build and the built engine + // will be destroyed after we get the serialized engine string, so it's fine + // to use unique_ptr here. + std::unique_ptr alloc; + auto device_alloc = GetDeviceAndAllocator(params, engine); int cuda_device_id = 0; - if (target_device != name_to_device_map.end()) { - tensorflow::TfGpuId tf_gpu_id(target_device->second->parsed_name().id); - CudaGpuId cuda_gpu_id; - Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id); - if (!s.ok()) { - LOG(ERROR) - << "Cuda device identification failed, using device 0. Error= " - << s; - } else { - cuda_device_id = cuda_gpu_id.value(); - } - tensorflow::GPUOptions gpuoptions; - // we need to us PM here since in python path there is no way to get to - // allocators - auto pm = tensorflow::ProcessState::singleton(); - // this should be instantiated by now - auto dev_allocator = pm->GetGPUAllocator(gpuoptions, tf_gpu_id, 1); - VLOG(1) << "Got an allocator for device tf_device=" << tf_gpu_id.value() - << " cuda device= " << cuda_device_id << " at " << dev_allocator; - allocator = std::make_shared(dev_allocator); - } else { // device unknown or not available - allocator = std::make_shared(); + if (device_alloc.first >= 0) { + cuda_device_id = device_alloc.first; + alloc.reset(new TRTDeviceAllocator(device_alloc.second)); + } else { + // Setting allocator as nullptr should get revert to the cudamalloc + LOG(WARNING) << "Can't identify the cuda device. Running on device 0 "; } - ConvertGraphParams p(graph, output_names, subgraph_node_ids, max_batch_size, - max_mem_per_engine, graph_properties, &output_edge_map, - precision_mode, segment_nodes_and_device.second, - allocator, cuda_device_id); - if (precision_mode == INT8MODE) { - tensorflow::Status status = GetCalibNode(&p); - if (status != tensorflow::Status::OK()) { - LOG(WARNING) << "subgraph conversion error for subgraph_index:" << count - << " due to: \"" << status.ToString() - << "\" SKIPPING......( " << subgraph_node_names.size() - << " nodes)"; + cudaSetDevice(cuda_device_id); + auto status = CreateTRTNode(&graph, engine_segments, i, alloc.get(), + params.max_batch_size); + // If status is ok, we successfully added the node to the graph and can + // remove segment ops. Otherwise graph is not modified. + if (status.ok()) { + for (auto node_name : converted_segments.at(i).first) { + graph.RemoveNode(node_map.at(node_name)); } } else { - tensorflow::Status status = ConvertSubGraphToTensorRT(&p); - if (status != tensorflow::Status::OK()) { - LOG(WARNING) << "subgraph conversion error for subgraph_index:" << count - << " due to: \"" << status.ToString() - << "\" SKIPPING......( " << subgraph_node_names.size() - << " nodes)"; - } + // Graph is not modified. + LOG(WARNING) << "Engine creation for segment " << i << ", composed of " + << converted_segments.at(i).first.size() + << " nodes failed: " << status << ". Skipping..."; } - count++; } - graph.ToGraphDef(new_graph_def); + cudaSetDevice(old_cuda_device); + graph.ToGraphDef(params.output_graph_def); + VLOG(1) << "Returning from conversion"; return tensorflow::Status::OK(); } diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.h b/tensorflow/contrib/tensorrt/convert/convert_graph.h index 65a67d7e73..9d986e4890 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.h +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.h @@ -30,29 +30,60 @@ namespace tensorflow { namespace tensorrt { namespace convert { -// This method converts an already generated calibration graph which was used in -// calibration runs to an inference graph +struct ConversionParams { + ConversionParams() + : input_graph_def(nullptr), + max_batch_size(1), + max_workspace_size_bytes(1 << 30), + output_graph_def(nullptr), + precision_mode(1), + minimum_segment_size(3), + graph_properties(nullptr), + cluster(nullptr), + is_dyn_op(false), + fixed_input_size(true), + max_cached_engines(1) {} + const tensorflow::GraphDef* input_graph_def; + const std::vector* output_names; + size_t max_batch_size; + size_t max_workspace_size_bytes; + tensorflow::GraphDef* output_graph_def; + int precision_mode; + int minimum_segment_size; + const tensorflow::grappler::GraphProperties* graph_properties; + const tensorflow::grappler::Cluster* cluster; + bool is_dyn_op; // Whether to create engine on conversion or execution time + bool fixed_input_size; // Assume non-batch ranks of input tensors are fixed + int max_cached_engines; // maximum number of cached engines + std::vector cached_engine_batches; // list of cached engines +}; + +// This method extracts calibration information from the resource managers +// and puts them in to engine nodedefs. tensorflow::Status ConvertCalibGraphToInferGraph( - const tensorflow::GraphDef& graph_def, tensorflow::GraphDef* new_graph_def); + const tensorflow::GraphDef& graph_def, tensorflow::GraphDef* new_graph_def, + bool is_dyn_op); -// max_batch_size: maximum batch size which can be used for inference for -// optimization targets inference run with max batch size. -// max_workspace_size_bytes: The upper bound of memory allowance for -// engine building. +// - max_batch_size: maximum batch size which can be used for inference for +// optimization targets inference run with max batch size. +// - max_workspace_size_bytes: The upper bound of memory allowance for engine +// building. tensorflow::Status ConvertGraphDefToTensorRT( const tensorflow::GraphDef& graph_def, const std::vector& output_names, size_t max_batch_size, size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def, - int precision_mode, int minimum_segment_size); + int precision_mode = 1, int minimum_segment_size = 3, + bool is_dyn_op = false, int max_cached_engines = 1, + std::vector cached_engine_batches = {}); // Method to call from optimization pass -tensorflow::Status ConvertAfterShapes( - const tensorflow::GraphDef& graph, const std::vector& output_names, - size_t max_batch_size, size_t max_workspace_size_bytes, - tensorflow::GraphDef* new_graph_def, int precision_mode, - int minimum_segment_size, - const tensorflow::grappler::GraphProperties& graph_properties, - const tensorflow::grappler::Cluster* cluster); +tensorflow::Status ConvertAfterShapes(ConversionParams& params); + +// Return compile time TensorRT library version information. +std::vector GetLinkedTensorRTVersion(); + +// Return runtime time TensorRT library version information. +std::vector GetLoadedTensorRTVersion(); } // namespace convert } // namespace tensorrt } // namespace tensorflow diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 4e4d295538..146b9c7344 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -14,7 +14,6 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/contrib/tensorrt/convert/convert_nodes.h" -#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h" #include #include @@ -25,7 +24,9 @@ limitations under the License. #include #include +#include "tensorflow/contrib/tensorrt/convert/utils.h" #include "tensorflow/contrib/tensorrt/log/trt_logger.h" +#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h" #include "tensorflow/contrib/tensorrt/resources/trt_resource_manager.h" #include "tensorflow/contrib/tensorrt/resources/trt_resources.h" #include "tensorflow/core/framework/node_def.pb.h" // NOLINT @@ -37,6 +38,7 @@ limitations under the License. #include "tensorflow/core/graph/graph_constructor.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/logging.h" @@ -54,8 +56,11 @@ limitations under the License. namespace tensorflow { namespace tensorrt { namespace convert { +using ::tensorflow::str_util::Split; + using ::tensorflow::strings::StrAppend; using ::tensorflow::strings::StrCat; + namespace { inline tensorflow::Status ConvertDType(tensorflow::DataType tf_dtype, @@ -121,12 +126,10 @@ static std::vector> CreateSamePadding( string GetCommonNameScope(const string& op_name_a, const string& op_name_b) { size_t last_scope_separator = 0; - for (size_t i = 0; i < std::min(op_name_a.size(), op_name_b.size()); ++i) { - if (op_name_a[i] != op_name_b[i]) { - break; - } else if (op_name_a[i] == '/') { - last_scope_separator = i + 1; - } + const size_t min_size = std::min(op_name_a.size(), op_name_b.size()); + for (size_t i = 0; i < min_size; ++i) { + if (op_name_a[i] != op_name_b[i]) break; + if (op_name_a[i] == '/') last_scope_separator = i + 1; } return op_name_a.substr(0, last_scope_separator); } @@ -417,20 +420,6 @@ void ReorderRSCKToKCRS(const TRT_ShapedWeights& iweights, } } -struct InferDeleter { - template - void operator()(T* obj) const { - if (obj) { - obj->destroy(); - } - } -}; - -template -inline std::shared_ptr infer_object(T* obj) { - return std::shared_ptr(obj, InferDeleter()); -} - class Converter; using OpConverter = @@ -444,7 +433,7 @@ class Converter { OpConverter plugin_converter_; nvinfer1::INetworkDefinition* trt_network_; std::list> temp_bufs_; - tensorflow::tensorrt::TRTWeightStore* weight_store_; + TRTWeightStore* weight_store_; bool fp16_; void register_op_converters(); tensorflow::Status get_inputs(const tensorflow::NodeDef& node_def, @@ -486,11 +475,11 @@ class Converter { public: explicit Converter(nvinfer1::INetworkDefinition* trt_network, - tensorflow::tensorrt::TRTWeightStore* ws, bool fp16) + TRTWeightStore* ws, bool fp16) : trt_network_(trt_network), weight_store_(ws), fp16_(fp16) { this->register_op_converters(); } - tensorflow::tensorrt::TRTWeightStore* weight_store() { return weight_store_; } + TRTWeightStore* weight_store() { return weight_store_; } TRT_ShapedWeights get_temp_weights(tensorflow::DataType type, nvinfer1::Dims shape) { TRT_ShapedWeights weights(type, nullptr, shape); @@ -2140,559 +2129,265 @@ void Converter::register_op_converters() { } // namespace -tensorflow::Status ConvertCalibrationNodeToEngineNode( - tensorflow::Graph& graph, tensorflow::Node* c_node) { - const auto ndef = c_node->def(); - - TFAttrs attrs(ndef); - std::vector segment_nodes( - attrs.get>("segment_nodes")); - std::vector output_nodes( - attrs.get>("segment_output_names")); - std::vector input_names( - attrs.get>("input_names")); - string res_name = attrs.get("resource_name"); - VLOG(1) << "Node name " << c_node->name() << " res_name " << res_name; - string engine_name = "my_trt_op"; - { - const auto node_id = tensorflow::str_util::Split(res_name, "_"); - engine_name += node_id.back(); - } - std::map node_maps; - - for (auto n : graph.op_nodes()) { - node_maps.insert({n->name(), n}); - } - std::set subgraph_ids; - for (const auto internal_node : segment_nodes) { - subgraph_ids.insert(node_maps.at(internal_node)->id()); - } - if (VLOG_IS_ON(2)) { - string node_names = StrCat(c_node->name(), " segment nodes= "); - - for (const auto& node_name : segment_nodes) { - StrAppend(&node_names, node_name, ", "); - } - VLOG(2) << node_names; +tensorflow::Status ConvertGraphDefToEngine( + const tensorflow::GraphDef& gdef, int precision_mode, int max_batch_size, + size_t max_workspace_size_bytes, + const std::vector& input_shapes, + Logger* logger, nvinfer1::IGpuAllocator* allocator, + TRTInt8Calibrator* calibrator, + TrtUniquePtrType* engine, + bool* convert_successfully) { + engine->reset(); + if (convert_successfully) *convert_successfully = false; + + // Create the builder. + TrtUniquePtrType builder( + nvinfer1::createInferBuilder(*logger)); + builder->setMaxBatchSize(max_batch_size); + // TODO(aaroey): use the allocator to allocate the TRT workspace. + builder->setMaxWorkspaceSize(max_workspace_size_bytes); +#if NV_TENSORRT_MAJOR > 3 + builder->setGpuAllocator(allocator); +#endif + if (precision_mode == FP16MODE) { + builder->setHalf2Mode(true); + } else if (precision_mode == INT8MODE) { + builder->setInt8Mode(true); + builder->setInt8Calibrator(calibrator); } - VLOG(1) << "Output Nodes:"; - std::vector out_types; - std::vector out_edges; + // Create the network. + auto trt_network = + TrtUniquePtrType(builder->createNetwork()); + if (!trt_network) { + return tensorflow::errors::Internal( + "Failed to create TensorRT network object"); + } + auto ws = std::unique_ptr(new TRTWeightStore()); - for (auto& i : output_nodes) { - auto node_port = tensorflow::str_util::Split(i, ":"); - VLOG(1) << " " << i << " in graph " << node_maps.count(i); - auto out_node_name = node_port.at(0); - if (node_port.size() > 1) { - VLOG(1) << "Multi port output" << node_port.at(0) << " " - << node_port.at(1) << " size=" << node_port.size(); - } - auto node_it = node_maps.find(out_node_name); - if (node_it != node_maps.end()) { - tensorflow::Node* out_node = node_it->second; - int port = 0; - if (node_port.size() == 2) { - port = std::strtoul(node_port.at(1).c_str(), nullptr, 10); - out_types.push_back(out_node->output_type(port)); - } else { - out_types.push_back(out_node->output_type(0)); + // Build the network + VLOG(1) << "Starting engine conversion "; + Converter converter(trt_network.get(), ws.get(), precision_mode == FP16MODE); + std::vector> output_tensors; + // Graph nodes are already topologically sorted during construction + for (const auto& node_def : gdef.node()) { + string node_name = node_def.name(); + VLOG(1) << "Converting op name=" << node_name << ", op=" << node_def.op(); + if (tensorflow::str_util::StartsWith(node_name, kInputPHName) && + (node_def.op() == "Placeholder")) { + nvinfer1::DimsCHW input_dim_pseudo_chw; + for (int i = 0; i < 8; i++) input_dim_pseudo_chw.d[i] = 0; + nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); + auto type_status = + ConvertDType(node_def.attr().at("dtype").type(), &dtype); + if (type_status != tensorflow::Status::OK()) { + LOG(WARNING) << "Type conversion failed for " << node_name; + return type_status; } - for (auto out_edge : out_node->out_edges()) { - if (subgraph_ids.count(out_edge->dst()->id())) - continue; // skip internal edges; - if (out_edge->src_output() == port) { - out_edges.push_back(out_edge); - VLOG(1) << "OUTPUT EDGE " << out_edge->src()->name() << ":" - << out_edge->src_output() << " -> " << out_edge->dst()->name() - << ":" << out_edge->dst_input(); + int32 slot_number = -1; + if (!tensorflow::strings::safe_strto32(node_name.c_str() + 8, + &slot_number)) { + LOG(ERROR) << "Failed to parse slot number from " << node_name + << " +8= " << node_name.c_str() + 8; + } + auto shape = input_shapes.at(slot_number); + if (shape.dims() > 8) { + LOG(ERROR) << "Tensor rank is greater than 8 for " << node_name + << " at input slot " << slot_number; + return tensorflow::errors::OutOfRange( + "Input tensor rank is greater than 8"); + } + if (VLOG_IS_ON(1)) { + string dim_str("dims="); + StrAppend(&dim_str, "[ ", shape.dim_size(0)); + for (int i = 1; i < shape.dims(); i++) { + StrAppend(&dim_str, ", ", shape.dim_size(i)); } + StrAppend(&dim_str, " ]"); + VLOG(1) << dim_str; + } + for (int i = 1; i < shape.dims(); i++) { + input_dim_pseudo_chw.d[i - 1] = shape.dim_size(i); } - } else { - LOG(WARNING) << " couldn't find output node " << out_node_name; - } - } - if (VLOG_IS_ON(1)) { - VLOG(1) << c_node->name() << " Input Nodes:"; - for (auto& i : input_names) { - VLOG(1) << " Input " << i << " in graph " << node_maps.count(i); - } - } - auto trt_rm = tensorflow::tensorrt::TRTResourceManager::instance(); - auto resmgr = trt_rm->getManager("TRTCalibOps"); - tensorflow::tensorrt::TRTCalibrationResource* calib_res = nullptr; - auto status = resmgr->Lookup(res_name, res_name, &calib_res); - if (!status.ok() || !calib_res->calibrator_) { - return tensorflow::errors::FailedPrecondition( - "You must run calibration" - " and inference conversion in the same process"); - } - - calib_res->calibrator_->setDone(); - calib_res->thr_->join(); - delete calib_res->thr_; - if (!calib_res->engine_) { - LOG(ERROR) << "Calibration failed!, engine does not exist. Did you run " - "calibration graph?"; - return tensorflow::errors::FailedPrecondition( - "Calibration graph needs to be executed on" - " calibration data before convertsion to inference graph"); - } - auto weight_rmgr = trt_rm->getManager("WeightStore"); - TF_CHECK_OK(weight_rmgr->Delete( - res_name, res_name)); - auto engine_plan = calib_res->engine_->serialize(); - calib_res->engine_->destroy(); - calib_res->network_->destroy(); - calib_res->builder_->destroy(); - calib_res->thr_ = nullptr; - calib_res->engine_ = nullptr; - calib_res->builder_ = nullptr; - tensorflow::NodeDefBuilder op_builder(engine_name, "TRTEngineOp"); - std::vector income_edges; - income_edges.resize(c_node->num_inputs()); - for (const auto in_edge : c_node->in_edges()) { - auto src = in_edge->src(); - int dest_port = in_edge->dst_input(); - VLOG(1) << "Incoming connection " << src->name() << ":" - << in_edge->src_output() << " -> " << c_node->name() << ":" - << dest_port; - income_edges.at(dest_port) = {src->name(), in_edge->src_output(), - c_node->input_type(dest_port)}; - } - tensorflow::gtl::ArraySlice input_list( - income_edges); - if (VLOG_IS_ON(2)) { - for (const auto& inp : input_list) { - VLOG(2) << " Input from inputlist " << inp.node << ":" << inp.index << " " - << tensorflow::DataTypeString(inp.data_type); - } - } - op_builder.Input(input_list); - tensorflow::NodeDef engine_node; - const char* engine_plan_data = static_cast(engine_plan->data()); - string engine_plan_string(engine_plan_data, - engine_plan_data + engine_plan->size()); - status = op_builder.Attr("serialized_engine", engine_plan_string) - .Attr("input_nodes", input_names) - .Attr("output_nodes", output_nodes) - .Attr("OutT", out_types) - .Finalize(&engine_node); - if (!status.ok()) { - LOG(ERROR) << "Engine Node creation failed"; - return status; - } - auto trt_engine_node = graph.AddNode(engine_node, &status); - TF_RETURN_IF_ERROR(status); - std::map port_map; - for (size_t t = 0; t < output_nodes.size(); t++) { - port_map.insert({output_nodes.at(t), t}); - } - for (auto& i : out_edges) { - string s(i->src()->name()); - if (i->src_output()) StrAppend(&s, ":", i->src_output()); - int out_port = port_map.at(s); - VLOG(1) << "Connecting " << trt_engine_node->name() << ":" << out_port - << " -> " << i->dst()->name() << ":" << i->dst_input(); - TF_RETURN_IF_ERROR( - graph.UpdateEdge(trt_engine_node, out_port, i->dst(), i->dst_input())); - } - for (const auto ed : trt_engine_node->in_edges()) { - VLOG(1) << "In Edge " << ed->src()->name() << ":" << ed->src_output() - << " -> " << ed->dst()->name() << ":" << ed->dst_input(); - } - for (const auto ed : trt_engine_node->out_edges()) { - VLOG(1) << "Out Edge " << ed->src()->name() << ":" << ed->src_output() - << " -> " << ed->dst()->name() << ":" << ed->dst_input(); - } - VLOG(1) << "Segment nodes:"; - for (auto& i : segment_nodes) { - VLOG(1) << " " << i << " in graph " << node_maps.count(i); - auto it = node_maps.find(i); - if (it != node_maps.end()) { - graph.RemoveNode(it->second); - } - } - graph.RemoveNode(c_node); - return tensorflow::Status::OK(); -} -tensorflow::Status ReverseTopologicalSort( - const tensorrt::convert::SubGraphParams& s, - std::list* order) { - std::vector order_vec; - tensorflow::GetPostOrder(s.graph, &order_vec); - // Select just the subgraph - for (tensorflow::Node* node : order_vec) { - if (s.subgraph_node_ids.count(node->id())) { - // We want topological order to contstruct the - // network layer by layer - order->push_front(node); + input_dim_pseudo_chw.nbDims = shape.dims() - 1; + nvinfer1::ITensor* input_tensor = converter.network()->addInput( + node_name.c_str(), dtype, input_dim_pseudo_chw); + if (!input_tensor) { + return tensorflow::errors::InvalidArgument( + "Failed to create Input layer tensor ", node_name, + " rank=", shape.dims() - 1); + } + VLOG(1) << "Input tensor name :" << node_name; + if (!converter.insert_input_tensor(node_name, input_tensor)) { + return tensorflow::errors::AlreadyExists( + "Output tensor already exists for op: " + node_name); + } + } else if (tensorflow::str_util::StartsWith(node_name, kOutputPHName) && + (node_def.op() == "Identity")) { + int32 slot_number = -1; + if (!tensorflow::strings::safe_strto32(node_name.c_str() + 9, + &slot_number)) { + LOG(ERROR) << "Failed to parse slot number from " << node_name + << " +9=" << node_name.c_str() + 9; + } + if (output_tensors.size() <= slot_number) { + output_tensors.resize(slot_number + 1); + } + output_tensors.at(slot_number) = {node_def.input(0), node_name}; + } else { + VLOG(2) << "Converting node: " << node_def.name() << " , " + << node_def.op(); + TF_RETURN_IF_ERROR(converter.convert_node(node_def)); } } - return tensorflow::Status::OK(); -} - -tensorflow::Status SetInputList( - const tensorrt::convert::SubGraphParams& s, - tensorflow::NodeDefBuilder* op_builder, - const std::vector* input_names, - std::vector* input_dtypes) { - std::vector income_edges; - VLOG(2) << "input edge size: " << input_names->size(); - for (size_t i = 0; i < input_names->size(); ++i) { - VLOG(2) << "input edges: " << i << " " << input_names->at(i); - int output_idx = s.input_inds.at(i).second; - // we wired up the input here already, it is redundant to do it again in - // ConvertSubGraphToTensorRT(convert_graph.cc) - auto incoming_edge = tensorflow::NodeDefBuilder::NodeOut( - input_names->at(i), output_idx, input_dtypes->at(i)); - income_edges.push_back(incoming_edge); - } - tensorflow::gtl::ArraySlice input_list( - income_edges); - op_builder->Input(input_list); - return tensorflow::Status::OK(); -} - -string SubgraphNameScopeGenerator(const std::list* order) { - string subgraph_name_scope; - if (!order->empty()) { - subgraph_name_scope = order->front()->name(); - } - for (const tensorflow::Node* node : *order) { - subgraph_name_scope = GetCommonNameScope(subgraph_name_scope, node->name()); - } - // TODO(sami,ben,jie): proper naming! - return subgraph_name_scope; -} - -tensorflow::Status ConvertSubgraph( - Converter& converter, tensorrt::convert::SubGraphParams& s, - std::list* order, std::vector* input_names, - std::vector* input_dtypes, - std::vector* output_names, - std::vector* output_dtypes, - const string& engine_name) { - std::set added_tensors; - for (const std::pair& input : s.input_inds) { - VLOG(2) << "parsing input. Node id= " << input.first; - int node_id = input.first; - int output_idx = input.second; - tensorflow::Node* node = s.graph.FindNodeId(node_id); - auto node_name = node->name(); - // input_names should use the node name in the graph - // here it should be the input tensor name -> matching the binding - // insert original node name without port - auto tensor_name = node_name; - if (output_idx != 0) { - tensor_name = StrCat(tensor_name, ":", output_idx); - } - - VLOG(2) << "input name: " << node_name << " tensor_name: " << tensor_name - << " idx: " << output_idx; - - auto shape_inference_node_name = node_name; - auto shape_inference_output_idx = output_idx; - // rewire the shape inference to original node in the graph - if (s.output_edge_map->count(tensor_name)) { - shape_inference_node_name = s.output_edge_map->at(tensor_name).second; - shape_inference_output_idx = s.output_edge_map->at(tensor_name).first; - } - if (shape_inference_output_idx < 0) continue; - VLOG(2) << "shapeinference name: " << shape_inference_node_name - << " idx: " << shape_inference_output_idx; - - if (!s.graph_properties.HasOutputProperties(shape_inference_node_name)) - return tensorflow::errors::Internal("failed to find input node: " + - shape_inference_node_name); - - auto op_info_vec = - s.graph_properties.GetOutputProperties(shape_inference_node_name); - if (static_cast(op_info_vec.size()) <= shape_inference_output_idx) - return tensorflow::errors::Internal( - "accessing output index of: ", shape_inference_output_idx, - ", at node: ", shape_inference_node_name, - " with output entry from shape_map: ", op_info_vec.size()); - - auto op_info = op_info_vec.at(shape_inference_output_idx); - tensorflow::DataType tf_dtype = op_info.dtype(); - - nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); - auto type_status = ConvertDType(tf_dtype, &dtype); - if (type_status != tensorflow::Status::OK()) { - LOG(WARNING) << "Type conversion failed for " << node_name; - return type_status; - } - - VLOG(2) << "Accessing output index of: " << output_idx - << ", at node: " << node_name - << " with output entry from shape_map: " << op_info_vec.size(); - // TODO(ben,jie): update TRT input format/dimension - nvinfer1::DimsCHW input_dim_pseudo_chw; - for (int i = 0; i < 3; i++) input_dim_pseudo_chw.d[i] = 1; - - // TODO(jie): TRT 3.x only support 4 dimensional input tensor. - // update the code once TRT 4.0 comes out. - if (op_info.shape().dim_size() != 4) { - string err_str = "Require 4 dimensional input."; - StrAppend(&err_str, " Got ", op_info.shape().dim_size(), " ", - shape_inference_node_name); - return tensorflow::errors::Unimplemented(err_str); - } - - for (int i = 1; i < op_info.shape().dim_size(); i++) { - VLOG(2) << "dimension: " << i - << " , size: " << op_info.shape().dim(i).size(); - input_dim_pseudo_chw.d[i - 1] = op_info.shape().dim(i).size(); - } - - // TODO(ben,jie): proper way to restore input tensor name? - auto input_tensor_name = node_name; - if (output_idx != 0) { - input_tensor_name = StrCat(node_name, ":", output_idx); - } - if (added_tensors.count(input_tensor_name)) continue; - added_tensors.insert(input_tensor_name); - input_names->push_back(input_tensor_name); - input_dtypes->push_back(tf_dtype); - nvinfer1::ITensor* input_tensor = converter.network()->addInput( - input_tensor_name.c_str(), dtype, input_dim_pseudo_chw); - - if (!input_tensor) - return tensorflow::errors::InvalidArgument( - "Failed to create Input layer"); - VLOG(2) << "Input tensor name :" << input_tensor_name; - - if (!converter.insert_input_tensor(input_tensor_name, input_tensor)) - return tensorflow::errors::AlreadyExists( - "Output tensor already exists for op: " + input_tensor_name); - } - - for (const tensorflow::Node* node : *order) { - const tensorflow::NodeDef& node_def = node->def(); - VLOG(2) << "Converting node: " << node_def.name() << " , " << node_def.op(); - TF_RETURN_IF_ERROR(converter.convert_node(node_def)); - } - - VLOG(2) << "Finished conversion"; - - // Gather output metadata - int trt_engine_op_output_idx = 0; - added_tensors.clear(); - for (const std::pair& output : s.output_inds) { - int node_id = output.first; - int output_idx = output.second; - tensorflow::Node* node = s.graph.FindNodeId(node_id); - string op_name = node->name(); - string tensor_name = op_name; - - s.output_edge_map->insert( - {trt_engine_op_output_idx == 0 - ? engine_name - : StrCat(engine_name, ":", trt_engine_op_output_idx), - {output_idx, tensor_name}}); - trt_engine_op_output_idx++; - if (output_idx != 0) - tensorflow::strings::StrAppend(&tensor_name, ":", output_idx); - VLOG(2) << "Output tensor name: " << tensor_name; - if (added_tensors.count(tensor_name)) continue; - added_tensors.insert(tensor_name); - output_names->push_back(tensor_name); - auto tensor_or_weights = converter.get_tensor(tensor_name); + for (const auto& output : output_tensors) { + auto tensor_or_weights = converter.get_tensor(output.first); if (!tensor_or_weights.is_tensor()) { - return tensorflow::errors::InvalidArgument("Output node '" + tensor_name + - "' is weights not tensor"); + return tensorflow::errors::InvalidArgument( + "Output node '" + output.first + "' is weights not tensor"); } nvinfer1::ITensor* tensor = tensor_or_weights.tensor(); + tensor->setName(output.second.c_str()); if (!tensor) { return tensorflow::errors::NotFound("Output tensor not found: " + - tensor_name); + output.first); } + VLOG(1) << "Marking output tensor " << output.first << ", as output tensor " + << output.second; + converter.network()->markOutput(*tensor); - tensorflow::DataType tf_dtype = node->output_type(output_idx); - output_dtypes->push_back(tf_dtype); - nvinfer1::DataType trt_dtype = nvinfer1::DataType::kFLOAT; - TF_RETURN_IF_ERROR(ConvertDType(tf_dtype, &trt_dtype)); - tensor->setType(trt_dtype); } + if (convert_successfully) *convert_successfully = true; - return tensorflow::Status::OK(); -} - -tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { - // Visit nodes in reverse topological order and construct the TRT network. - // Toposort - std::list order; - TF_RETURN_IF_ERROR(ReverseTopologicalSort(s, &order)); - - static int static_id = 0; - string subgraph_name_scope = SubgraphNameScopeGenerator(&order); - // TODO(sami,ben,jie): proper naming! - string calib_op_name = - StrCat(subgraph_name_scope, "my_trt_calib_op_", static_id); - string engine_name = StrCat(subgraph_name_scope, "my_trt_op", static_id); - static_id++; - - auto trt_rmgr = tensorflow::tensorrt::TRTResourceManager::instance(); - auto op_rmgr = trt_rmgr->getManager("TRTCalibOps"); - auto op_res = new tensorflow::tensorrt::TRTCalibrationResource(); - TF_CHECK_OK(op_rmgr->Create(calib_op_name, calib_op_name, op_res)); - op_res->logger_ = new tensorflow::tensorrt::Logger(); - cudaSetDevice(s.cuda_gpu_id_); - op_res->builder_ = nvinfer1::createInferBuilder(*(op_res->logger_)); - op_res->allocator_ = s.allocator_; -#if NV_TENSORRT_MAJOR > 3 - op_res->builder_->setGpuAllocator(s.allocator_.get()); -#endif - if (!op_res->builder_) { - return tensorflow::errors::Internal( - "failed to create TensorRT builder object"); + // Build the engine. + VLOG(1) << "Starting engine creation"; + engine->reset(builder->buildCudaEngine(*converter.network())); + if (engine->get() == nullptr) { + return tensorflow::errors::Internal("Failed to build TensorRT engine"); } - - op_res->network_ = op_res->builder_->createNetwork(); - if (!op_res->network_) { - return tensorflow::errors::Internal( - "failed to create TensorRT network object"); - } - - // Build the network - auto weight_rmgr = trt_rmgr->getManager("WeightStore"); - auto ws = new tensorflow::tensorrt::TRTWeightStore(); - TF_CHECK_OK(weight_rmgr->Create(calib_op_name, calib_op_name, ws)); - Converter converter(op_res->network_, ws, s.precision_mode == FP16MODE); - - std::vector input_names; - std::vector input_dtypes; - std::vector output_names; - std::vector output_dtypes; - TF_RETURN_IF_ERROR(ConvertSubgraph(converter, s, &order, &input_names, - &input_dtypes, &output_names, - &output_dtypes, engine_name)); - - VLOG(2) << "Finished processing outputs"; - - // Build the engine - op_res->builder_->setMaxBatchSize(s.max_batch_size); - op_res->builder_->setMaxWorkspaceSize(s.max_workspace_size_bytes); - VLOG(0) << "Max batch size= " << s.max_batch_size - << " max workspace size= " << s.max_workspace_size_bytes; - - // Build the TRT op - // TODO(sami,ben,jie): proper naming! - tensorflow::NodeDefBuilder op_builder(calib_op_name, "TRTCalibOp"); - TF_RETURN_IF_ERROR(SetInputList(s, &op_builder, &input_names, &input_dtypes)); - - std::vector segment_names; - segment_names.reserve(s.subgraph_node_ids.size()); - for (int i : s.subgraph_node_ids) { - auto node = s.graph.FindNodeId(i); - segment_names.push_back(node->name()); - } - LOG(INFO) << "finished op preparation"; - - auto status = op_builder.Attr("segment_nodes", segment_names) - .Attr("input_names", input_names) - .Attr("segment_output_names", output_names) - .Attr("resource_name", calib_op_name) - .Finalize(s.trt_node); - - LOG(INFO) << status.ToString(); - LOG(INFO) << "finished op building"; - + VLOG(1) << "Finished conversion"; return tensorflow::Status::OK(); } -tensorflow::Status ConvertSubGraphToTensorRTNodeDef( - tensorrt::convert::SubGraphParams& s) { - // Visit nodes in reverse topological order and construct the TRT network. - std::list order; - TF_RETURN_IF_ERROR(ReverseTopologicalSort(s, &order)); - - static int static_id = 0; - string subgraph_name_scope = SubgraphNameScopeGenerator(&order); - string engine_name = StrCat(subgraph_name_scope, "my_trt_op", static_id++); - - tensorflow::tensorrt::Logger trt_logger; - cudaSetDevice(s.cuda_gpu_id_); - auto trt_builder = infer_object(nvinfer1::createInferBuilder(trt_logger)); - if (!trt_builder) { - return tensorflow::errors::Internal( - "Failed to create TensorRT builder object"); - } -#if NV_TENSORRT_MAJOR > 3 - trt_builder->setGpuAllocator(s.allocator_.get()); -#endif - auto trt_network = infer_object(trt_builder->createNetwork()); - if (!trt_network) { - return tensorflow::errors::Internal( - "Failed to create TensorRT network object"); - } - - auto trt_rmgr = tensorflow::tensorrt::TRTResourceManager::instance(); - auto weight_rmgr = trt_rmgr->getManager("WeightStore"); - auto ws = new tensorflow::tensorrt::TRTWeightStore(); - TF_CHECK_OK(weight_rmgr->Create(engine_name, engine_name, ws)); - - // Build the network - Converter converter(trt_network.get(), ws, s.precision_mode == FP16MODE); - - std::vector input_names; - std::vector input_dtypes; - std::vector output_names; - std::vector output_dtypes; - TF_RETURN_IF_ERROR(ConvertSubgraph(converter, s, &order, &input_names, - &input_dtypes, &output_names, - &output_dtypes, engine_name)); - - VLOG(2) << "Finished output"; - - // Build the engine - trt_builder->setMaxBatchSize(s.max_batch_size); - trt_builder->setMaxWorkspaceSize(s.max_workspace_size_bytes); - VLOG(0) << "Max batch size= " << s.max_batch_size - << " max workspace size= " << s.max_workspace_size_bytes; - if (s.precision_mode == FP16MODE) { - trt_builder->setHalf2Mode(true); - VLOG(0) << "Using FP16 precision mode"; - } - LOG(INFO) << "starting build engine"; - string engine_plan_string; - { - auto trt_engine = - infer_object(trt_builder->buildCudaEngine(*converter.network())); - VLOG(0) << "Built network"; - if (trt_engine.get() == nullptr) { - return tensorflow::errors::Internal("Engine building failure"); +tensorflow::Status ConvertSegmentToGraphDef( + const tensorflow::Graph* graph, + const tensorflow::grappler::GraphProperties& graph_properties, + const std::vector& subgraph_node_ids, // In topological order + std::vector* connections, + tensorflow::GraphDef* segment_def, string* common_scope) { + std::set marker_nodes; + // Update connection shapes/data types and add corresponding input/output + // nodes in the segment graphdef. + for (size_t i = 0; i < connections->size(); ++i) { + auto& connection = connections->at(i); + auto outside_node = graph->FindNodeId(connection.outside_id); + if (!outside_node) { + // This should never happen, unless the original graph is problematic. + return tensorflow::errors::NotFound( + "Cannot find node with id ", connection.outside_id, " in the graph."); + } + // Updates the shape and data types of input/output connections. + tensorflow::DataType input_type = tensorflow::DT_FLOAT; + tensorflow::PartialTensorShape partial_shape; + if (connection.is_input_edge) { + if (graph_properties.HasOutputProperties(connection.outside_node_name)) { + auto output_params = + graph_properties.GetOutputProperties(connection.outside_node_name); + auto out_shape = output_params.at(connection.outside_port); + input_type = out_shape.dtype(); + std::vector dims; + partial_shape = out_shape.shape(); + connection.outside_shape = partial_shape; + } else { + VLOG(0) << "Unknown output shape" << outside_node->name(); + input_type = graph->FindNodeId(connection.outside_id) + ->output_type(connection.outside_port); + } + connection.connection_type = input_type; + + } else { // output edge + if (graph_properties.HasInputProperties(connection.outside_node_name)) { + auto input_params = + graph_properties.GetInputProperties(connection.outside_node_name); + auto in_shape = input_params.at(connection.outside_port); + input_type = in_shape.dtype(); + partial_shape = in_shape.shape(); + connection.inside_shape = partial_shape; + } else { + input_type = graph->FindNodeId(connection.inside_id) + ->output_type(connection.outside_port); + } + connection.connection_type = input_type; } - auto engine_plan = infer_object(trt_engine->serialize()); - VLOG(0) << "Serialized engine"; - const char* engine_plan_data = - static_cast(engine_plan->data()); - engine_plan_string = - string(engine_plan_data, engine_plan_data + engine_plan->size()); - } - TF_RETURN_IF_ERROR(weight_rmgr->Delete( - engine_name, engine_name)); - LOG(INFO) << "finished engine " << engine_name << " containing " - << s.subgraph_node_ids.size() << " nodes"; - - // Build the TRT op - tensorflow::NodeDefBuilder op_builder(engine_name, "TRTEngineOp"); - TF_RETURN_IF_ERROR(SetInputList(s, &op_builder, &input_names, &input_dtypes)); - - VLOG(0) << "Finished op preparation"; - - auto status = op_builder.Attr("serialized_engine", engine_plan_string) - .Attr("input_nodes", input_names) - .Attr("output_nodes", output_names) - .Attr("OutT", output_dtypes) - .Device(s.device_name_) - .Finalize(s.trt_node); - - VLOG(0) << status.ToString() << " finished op building for " << engine_name - << " on device " << s.device_name_; + // Add dummy input/output nodes to the segment graphdef. + if (connection.is_input_edge) { + const string node_name = StrCat(kInputPHName, connection.port_number); + if (marker_nodes.count(node_name)) { + VLOG(1) << "Reusing input " << node_name << " for the edge " + << connection.outside_node_name << ":" + << connection.outside_port << " -> " + << connection.inside_node_name << ":" << connection.inside_port; + continue; + } + marker_nodes.insert(node_name); + auto seg_node = segment_def->add_node(); + tensorflow::NodeDefBuilder builder(node_name, "Placeholder"); + auto status = builder.Attr("shape", partial_shape) + .Attr("dtype", input_type) + .Finalize(seg_node); + VLOG(1) << "Constructing input " << node_name << " for the edge " + << connection.outside_node_name << ":" << connection.outside_port + << " -> " << connection.inside_node_name << ":" + << connection.inside_port; + } else { + const string node_name = StrCat(kOutputPHName, connection.port_number); + if (marker_nodes.count(node_name)) { + VLOG(1) << "Reusing output " << node_name << " for the edge " + << connection.inside_node_name << ":" << connection.inside_port + << " -> " << connection.outside_node_name << ":" + << connection.outside_port; + continue; + } + marker_nodes.insert(node_name); + auto seg_node = segment_def->add_node(); + tensorflow::NodeDefBuilder builder(node_name, "Identity"); + auto status = builder.Input(connection.inside_node_name, 0, input_type) + .Finalize(seg_node); + VLOG(1) << "Constructing output " << node_name << " for the edge " + << connection.inside_node_name << ":" << connection.inside_port + << " -> " << connection.outside_node_name << ":" + << connection.outside_port; + } + } // for each connection. + + std::unordered_map old_to_new_id_map; + // Copy internal nodes to new graphdef + string local_scope = graph->FindNodeId(*subgraph_node_ids.begin())->name(); + for (const auto node_id : subgraph_node_ids) { + const auto node = graph->FindNodeId(node_id); + local_scope = GetCommonNameScope(local_scope, node->name()); + old_to_new_id_map[node_id] = segment_def->node_size(); + auto snode = segment_def->add_node(); + snode->CopyFrom(node->def()); + VLOG(1) << "Copying " << snode->name() << " to subgraph"; + } + // Update the inputs of the new input nodes to point to placeholder nodes. + for (int i = 0; i < connections->size(); ++i) { + auto& connection = connections->at(i); + if (!connection.is_input_edge) continue; + auto snode = + segment_def->mutable_node(old_to_new_id_map[connection.inside_id]); + const string placeholder_name = + StrCat(kInputPHName, connection.port_number); + VLOG(1) << "Updating " << snode->name() << ":" << connection.inside_port + << " from " << snode->input(connection.inside_port) << " to " + << placeholder_name; + snode->set_input(connection.inside_port, placeholder_name); + } + *common_scope = local_scope; + VLOG(0) << "Segment @scope '" << local_scope << "', converted to graph"; return tensorflow::Status::OK(); } diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.h b/tensorflow/contrib/tensorrt/convert/convert_nodes.h index 3f6592cd25..1a4c0e755d 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h @@ -22,69 +22,112 @@ limitations under the License. #include #include +#include "tensorflow/contrib/tensorrt/convert/utils.h" #include "tensorflow/contrib/tensorrt/resources/trt_allocator.h" +#include "tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h" #include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/graph/graph.h" #include "tensorflow/core/grappler/costs/graph_properties.h" #include "tensorflow/core/lib/core/status.h" + #if GOOGLE_CUDA #if GOOGLE_TENSORRT namespace tensorflow { namespace tensorrt { +static const char* kInputPHName = "InputPH_"; +static const char* kOutputPHName = "OutputPH_"; namespace convert { +// TODO(aaroey): use an enum instead. const int FP32MODE = 0; const int FP16MODE = 1; const int INT8MODE = 2; -struct SubGraphParams { - SubGraphParams( - tensorflow::Graph& inp_graph, - const std::set& subgraph_node_id_numbers, - const std::vector>& input_indices, - const std::vector>& output_indices, - size_t max_supported_batch_size, size_t max_consumed_workspace_size_bytes, - const tensorflow::grappler::GraphProperties& current_graph_properties, - std::unordered_map>* output_edges, - tensorflow::NodeDef* constructed_trt_node, - int engine_precision_mode = FP32MODE, const string& device_name = "", - std::shared_ptr allocator = nullptr, - int cuda_gpu_id = 0) - : graph(inp_graph), - subgraph_node_ids(subgraph_node_id_numbers), - input_inds(input_indices), - output_inds(output_indices), - max_batch_size(max_supported_batch_size), - max_workspace_size_bytes(max_consumed_workspace_size_bytes), - graph_properties(current_graph_properties), - output_edge_map(output_edges), - trt_node(constructed_trt_node), - precision_mode(engine_precision_mode), - device_name_(device_name), - allocator_(allocator), - cuda_gpu_id_(cuda_gpu_id) {} - - tensorflow::Graph& graph; - const std::set& subgraph_node_ids; - const std::vector>& input_inds; // {node_id, output_idx} - const std::vector>& output_inds; // {node_id, output_idx} - size_t max_batch_size; - size_t max_workspace_size_bytes; - const tensorflow::grappler::GraphProperties& graph_properties; - std::unordered_map>* output_edge_map; - tensorflow::NodeDef* trt_node; - const int precision_mode; - const string device_name_; - std::shared_ptr allocator_; - const int cuda_gpu_id_; +struct EngineConnection { + EngineConnection(const string& outside, int out_id, int out_port, + const string& inside, int in_id, int in_port, + bool input_edge, int port) + : outside_node_name(outside), + outside_id(out_id), + outside_port(out_port), + inside_node_name(inside), + inside_id(in_id), + inside_port(in_port), + is_input_edge(input_edge), + port_number(port) {} + + const string outside_node_name; + const int outside_id; + const int outside_port; + tensorflow::PartialTensorShape outside_shape; + + const string inside_node_name; + const int inside_id; + const int inside_port; + tensorflow::PartialTensorShape inside_shape; + + tensorflow::DataType connection_type; + bool is_input_edge; + + // The port number of the TRT node connecting to this edge. + int port_number; +}; + +struct EngineInfo { + EngineInfo() + : engine_type(EngineType::TRTStatic), + max_workspace_size_bytes(0), + precision_mode(FP32MODE) {} + + string engine_name; + string device; + tensorflow::GraphDef segment_graph_def; + + // The segment nodes that are on one side of the edges are topological sorted. + std::vector connections; + + enum class EngineType { TRTStatic = 0, TRTDynamic = 1 }; + EngineType engine_type; + int64 max_workspace_size_bytes; + int maximum_cached_engines; + std::vector cached_engine_batches; + int precision_mode; }; -// TODO(sami): Replace references with const reference or pointers -tensorflow::Status ConvertSubGraphToTensorRTNodeDef(SubGraphParams& params); -tensorflow::Status InjectCalibrationNode(SubGraphParams& params); -tensorflow::Status ConvertCalibrationNodeToEngineNode(tensorflow::Graph& graph, - tensorflow::Node* c_node); +// Constructs a graphdef from the segment in the given graph. Adds placeholder +// nodes for input edges (InputPH_*) and identity nodes for output edges +// (OutputPH_*). This function needs to be called before TensorRT nodes +// inserted in order to correctly get sizes from the original graph. +// +// - subgraph_node_ids: the node ids of the subgraph, must be sorted in +// topological order. +// - segment_def: the output GraphDef, whose non-input/output nodedefs will be +// sorted in topological order. +tensorflow::Status ConvertSegmentToGraphDef( + const tensorflow::Graph* graph, + const tensorflow::grappler::GraphProperties& graph_properties, + const std::vector& subgraph_node_ids, + std::vector* connections, + tensorflow::GraphDef* segment_def, string* common_scope); + +// Converts given subgraph to a TRT engine saved in 'engine'. Returns ok iff +// 'builder' successfully build the engine. If the result is not ok, 'engine' +// will be set to nullptr +// Once returned, 'builder' is not needed any more and can be safely detroyed. +// +// - convert_successfully: indicates whether the converson to TensorRT network +// is successful. This is different than successfully building the engine: +// building can still fail afterwards. +tensorflow::Status ConvertGraphDefToEngine( + const tensorflow::GraphDef& gdef, int precision_mode, int max_batch_size, + size_t max_workspace_size_bytes, + const std::vector& input_shapes, + Logger* logger, nvinfer1::IGpuAllocator* allocator, + TRTInt8Calibrator* calibrator, + TrtUniquePtrType* engine, + bool* convert_successfully); + } // namespace convert } // namespace tensorrt } // namespace tensorflow diff --git a/tensorflow/contrib/tensorrt/convert/trt_optimization_pass.cc b/tensorflow/contrib/tensorrt/convert/trt_optimization_pass.cc index 8f634b1f74..ec9dbfa13b 100644 --- a/tensorflow/contrib/tensorrt/convert/trt_optimization_pass.cc +++ b/tensorflow/contrib/tensorrt/convert/trt_optimization_pass.cc @@ -45,8 +45,24 @@ tensorflow::Status TRTOptimizationPass::Init( if (params.count("max_batch_size")) { maximum_batch_size_ = params.at("max_batch_size").i(); } - if (params.count("max_workspace_size_bytes")) + is_dynamic_op_ = false; + if (params.count("is_dynamic_op")) { + is_dynamic_op_ = params.at("is_dynamic_op").b(); + } + if (params.count("cached_engine_batches")) { + auto batch_vec = params.at("cached_engine_batches").list(); + batches_.reserve(batch_vec.i_size()); + for (const auto i : batch_vec.i()) { + batches_.push_back(i); + } + } + max_cached_batches_ = 1; + if (params.count("maximum_cached_engines")) { + max_cached_batches_ = params.at("maximum_cached_engines").i(); + } + if (params.count("max_workspace_size_bytes")) { maximum_workspace_size_ = params.at("max_workspace_size_bytes").i(); + } if (params.count("precision_mode")) { string pm = Uppercase(params.at("precision_mode").s()); if (pm == "FP32") { @@ -175,6 +191,17 @@ tensorflow::Status TRTOptimizationPass::Optimize( if (VLOG_IS_ON(1)) { PrintDebugInfo(cluster, item); } + // This is a hack to workaround optimizer issue. MetaOptimizer calls + // optimization passes on function objects as well, we should not modify + // generated funcdefs! This is fragile but we don't have any other option + // until framework fixes it. + if (item.id != "tf_graph") { + LOG(WARNING) << name_ + << " is probably called on funcdef! This optimizer must *NOT* " + "be called on function objects."; + *optimized_graph = item.graph; + return tensorflow::Status::OK(); + } int max_dim = -1; if (item.feed.size()) { for (const auto& f : item.feed) { @@ -204,11 +231,22 @@ tensorflow::Status TRTOptimizationPass::Optimize( } tensorflow::grappler::GraphProperties static_graph_properties(item); TF_RETURN_IF_ERROR(static_graph_properties.InferStatically(true)); - auto status = tensorflow::tensorrt::convert::ConvertAfterShapes( - item.graph, item.fetch, maximum_batch_size_, maximum_workspace_size_, - optimized_graph, precision_mode_, minimum_segment_size_, - static_graph_properties, cluster); + tensorflow::tensorrt::convert::ConversionParams cp; + cp.input_graph_def = &item.graph; + cp.output_names = &item.fetch; + cp.max_batch_size = maximum_batch_size_; + cp.max_workspace_size_bytes = maximum_workspace_size_; + cp.output_graph_def = optimized_graph; + cp.precision_mode = precision_mode_; + cp.minimum_segment_size = minimum_segment_size_; + cp.graph_properties = &static_graph_properties; + cp.cluster = cluster; + cp.is_dyn_op = is_dynamic_op_; + cp.cached_engine_batches = batches_; + cp.max_cached_engines = max_cached_batches_; + auto status = tensorflow::tensorrt::convert::ConvertAfterShapes(cp); VLOG(2) << optimized_graph->DebugString(); + VLOG(1) << "Returning from " << name_; return status; } diff --git a/tensorflow/contrib/tensorrt/convert/trt_optimization_pass.h b/tensorflow/contrib/tensorrt/convert/trt_optimization_pass.h index d8ecead23e..463ed3883e 100644 --- a/tensorflow/contrib/tensorrt/convert/trt_optimization_pass.h +++ b/tensorflow/contrib/tensorrt/convert/trt_optimization_pass.h @@ -61,6 +61,9 @@ class TRTOptimizationPass : public tensorflow::grappler::CustomGraphOptimizer { int minimum_segment_size_; int precision_mode_; int maximum_batch_size_; + bool is_dynamic_op_; + std::vector batches_; + int max_cached_batches_; int64_t maximum_workspace_size_; }; diff --git a/tensorflow/contrib/tensorrt/convert/utils.h b/tensorflow/contrib/tensorrt/convert/utils.h new file mode 100644 index 0000000000..f601c06701 --- /dev/null +++ b/tensorflow/contrib/tensorrt/convert/utils.h @@ -0,0 +1,37 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_TENSORRT_CONVERT_UTILS_H_ +#define TENSORFLOW_CONTRIB_TENSORRT_CONVERT_UTILS_H_ + +#include + +namespace tensorflow { +namespace tensorrt { + +template +struct TrtDestroyer { + void operator()(T* t) { + if (t) t->destroy(); + } +}; + +template +using TrtUniquePtrType = std::unique_ptr>; + +} // namespace tensorrt +} // namespace tensorflow + +#endif // TENSORFLOW_CONTRIB_TENSORRT_CONVERT_UTILS_H_ diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc index 9ac8047944..8a17eb02f1 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc @@ -14,8 +14,16 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/contrib/tensorrt/kernels/trt_engine_op.h" +#include +#include "tensorflow/contrib/tensorrt/convert/convert_nodes.h" +#include "tensorflow/contrib/tensorrt/convert/utils.h" #include "tensorflow/contrib/tensorrt/log/trt_logger.h" -#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h" +#include "tensorflow/contrib/tensorrt/resources/trt_resource_manager.h" +#include "tensorflow/contrib/tensorrt/resources/trt_resources.h" +#include "tensorflow/core/framework/graph_to_functiondef.h" +#include "tensorflow/core/lib/core/refcount.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/stream_executor.h" #include "tensorflow/core/platform/types.h" @@ -25,144 +33,556 @@ limitations under the License. #include "cuda/include/cuda_runtime_api.h" namespace tensorflow { -static ::tensorflow::tensorrt::Logger logger; -using IRuntime = nvinfer1::IRuntime; -using Dims = nvinfer1::Dims; - namespace tensorrt { +static Logger logger; +using ::nvinfer1::IRuntime; +using ::tensorflow::strings::StrAppend; +using ::tensorflow::strings::StrCat; + +// A helper class to call done() when destructed for asynchronous execution. +// Helps simultaneous execution of native and TRT engines. +class AsyncHelper : public tensorflow::core::RefCounted { + public: + AsyncHelper(tensorflow::AsyncOpKernel::DoneCallback done) { done_ = done; } + ~AsyncHelper() override { done_(); } -TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) : OpKernel(context) { + private: + tensorflow::AsyncOpKernel::DoneCallback done_; +}; + +#define TYPECASE(dt, X, Y) \ + case dt: { \ + return (void*)X->flat::Type>().data(); \ + } + +void* GetTensorAddress(const Tensor* tensor_ptr) { + auto tensor_type = tensor_ptr->dtype(); + switch (tensor_type) { + TYPECASE(tensorflow::DT_FLOAT, tensor_ptr, dest_ptr); + TYPECASE(tensorflow::DT_HALF, tensor_ptr, dest_ptr); + TYPECASE(tensorflow::DT_INT8, tensor_ptr, dest_ptr); + default: { + LOG(ERROR) << "Unsupported Data type " + << tensorflow::DataTypeString(tensor_type); + return nullptr; + } + } +} + +tensorflow::Status TRTEngineOp::ConstructFunctionHandle(OpKernelContext* ctx) { + VLOG(1) << "Constructing function handle"; + auto lib = ctx->function_library(); + if (lib == nullptr) { + return tensorflow::errors::Internal("Context function library is null"); + } + auto fdef = lib->GetFunctionLibraryDefinition()->Find(funcdef_name_); + if (fdef == nullptr) { + return tensorflow::errors::Internal("Native FunctionDef ", funcdef_name_, + " can't be found in function library"); + } + tensorflow::FunctionLibraryRuntime::InstantiateOptions inst_ops; + inst_ops.overlay_lib = nullptr; + inst_ops.state_handle = ""; + inst_ops.target = ctx->device()->name(); + native_func_ = 0; + auto status = lib->Instantiate(funcdef_name_, AttrSlice(&fdef->attr()), + inst_ops, &native_func_); + if (!status.ok()) { + LOG(ERROR) << " Instantiating native function " << funcdef_name_ + << " failed!"; + } + return status; +} + +TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) + : AsyncOpKernel(context) { // read serialized_engine OP_REQUIRES_OK(context, - context->GetAttr("serialized_engine", &serialized_engine_)); + context->GetAttr("serialized_segment", &serialized_segment_)); + OP_REQUIRES_OK(context, + context->GetAttr("workspace_size_bytes", &workspace_size_)); + OP_REQUIRES_OK(context, context->GetAttr("static_engine", &static_engine_)); + if (!static_engine_) { + if (!segment_graph_.ParseFromString(serialized_segment_)) { + LOG(ERROR) << "Parsing segment graph failed!"; + context->SetStatus(tensorflow::errors::InvalidArgument( + "Failed to parse segment graphdef!")); + return; + } + serialized_segment_.resize(0); + } + VLOG(1) << "Constructing " << name(); + string precision_string; + OP_REQUIRES_OK(context, + context->GetAttr("precision_mode", &precision_string)); + string calibration_data; + OP_REQUIRES_OK(context, + context->GetAttr("calibration_data", &calibration_data)); + OP_REQUIRES_OK(context, + context->GetAttr("segment_funcdef_name", &funcdef_name_)); + if (precision_string == "FP32") { + precision_mode_ = convert::FP32MODE; + } else if (precision_string == "FP16") { + precision_mode_ = convert::FP16MODE; + } else if (precision_string == "INT8") { + precision_mode_ = convert::INT8MODE; + } + calibration_mode_ = + (precision_mode_ == convert::INT8MODE && calibration_data.size() == 0); + if (calibration_data.size()) { + calibrator_.reset(new TRTInt8Calibrator(calibration_data)); + calibration_data.resize(0); + } + native_func_ = tensorflow::kInvalidHandle; + OP_REQUIRES_OK(context, context->GetAttr("max_cached_engines_count", + &max_cached_engines_)); + OP_REQUIRES_OK(context, + context->GetAttr("fixed_input_size", &fixed_input_size_)); + OP_REQUIRES_OK(context, context->GetAttr("cached_engine_batches", + &cached_engine_batches_)); + std::sort(cached_engine_batches_.begin(), cached_engine_batches_.end()); + if (VLOG_IS_ON(1)) { + string s("Engine Batches= "); + for (auto i : cached_engine_batches_) { + StrAppend(&s, i, " "); + } + VLOG(1) << s; + } +} - // register input output node name in trt_sub_graph - OP_REQUIRES_OK(context, context->GetAttr("input_nodes", &input_nodes_)); - OP_REQUIRES_OK(context, context->GetAttr("output_nodes", &output_nodes_)); +void TRTEngineOp::ExecuteNativeSegment(tensorflow::OpKernelContext* ctx, + AsyncHelper* helper) { + if (!calibration_mode_) { + VLOG(1) << "Executing native engine"; + } + std::vector inputs; + std::vector* outputs = new std::vector(); + if (native_func_ == tensorflow::kInvalidHandle) { + auto status = ConstructFunctionHandle(ctx); + if (!status.ok()) { + LOG(ERROR) << "Couldn't construct function handle " << funcdef_name_; + ctx->SetStatus(status); + return; + } + } + auto lib = ctx->function_library(); + tensorflow::FunctionLibraryRuntime::Options opts; + opts.step_id = ctx->step_id(); + opts.rendezvous = ctx->rendezvous(); + opts.cancellation_manager = ctx->cancellation_manager(); + opts.runner = ctx->runner(); + for (int i = 0; i < ctx->num_inputs(); i++) { + inputs.push_back(ctx->input(i)); + } + helper->Ref(); // Increment count for calculating native graph + VLOG(1) << "Executing native segment " << name(); + lib->Run(opts, native_func_, inputs, outputs, + [ctx, outputs, helper](const tensorflow::Status& s) { + tensorflow::core::ScopedUnref sc(helper); + VLOG(1) << "Native Segment completed"; + if (!s.ok()) { + ctx->SetStatus(s); + return; + } + for (size_t t = 0; t < outputs->size(); ++t) { + ctx->set_output(t, outputs->at(t)); + } + delete outputs; + }); } -void TRTEngineOp::Compute(OpKernelContext* context) { - // TODO(samikama) runtime should be taken from a resourcemanager as well. - // Only engine should be in the op and context and runtime should be taken - // from resourcemanager +void TRTEngineOp::ExecuteCalibration(tensorflow::OpKernelContext* ctx, + AsyncHelper* helper) { + helper->Ref(); + tensorflow::core::ScopedUnref sc(helper); + // TODO(aaroey): remove the ResourceMgr singleton. + auto trt_rm = TRTResourceManager::instance(); + auto res_mgr = trt_rm->getManager("TRTCalibration"); + TRTCalibrationResource* calib_res = nullptr; + auto status = res_mgr->LookupOrCreate( + funcdef_name_, "Calibrator", &calib_res, + {[ctx, this](TRTCalibrationResource** cr) -> tensorflow::Status { + return this->AllocateCalibrationResources(ctx, cr); + }}); + if (!status.ok()) { + ctx->SetStatus(status); + return; + } + int num_inputs = ctx->num_inputs(); + // Pass input data to calibrator + std::unordered_map input_data; + for (int i = 0; i < num_inputs; i++) { + const Tensor& t = ctx->input(i); + void* data_address = GetTensorAddress(&t); + if (data_address == nullptr) { + ctx->SetStatus(tensorflow::errors::InvalidArgument( + "Unsupported data type encountered in input ", i)); + return; + } + // Check the allocated buffer is sufficient for input + const auto device_tensor = dev_tensors_.at(i).AccessTensor(ctx); + CHECK_EQ(t.TotalBytes(), device_tensor->TotalBytes()); + input_data.emplace(StrCat(kInputPHName, i), data_address); + } + VLOG(2) << "Filled map for sending"; + // copied from cuda_kernel_helper since it seems only valid in *.cu.cc files + const cudaStream_t* stream = CHECK_NOTNULL( + reinterpret_cast(ctx->op_device_context() + ->stream() + ->implementation() + ->CudaStreamMemberHack())); + calib_res->calibrator_->setBatch(input_data, *stream); + VLOG(2) << "Passed calibration data"; + ExecuteNativeSegment(ctx, helper); +} - if (!trt_execution_context_ptr_) { - IRuntime* infer = nvinfer1::createInferRuntime(logger); -#if NV_TENSORRT_MAJOR > 3 - auto device = context->device(); - auto dev_allocator = - device->GetAllocator(tensorflow::AllocatorAttributes()); - if (!dev_allocator) { - LOG(FATAL) << "Can't find device allocator for gpu device " - << device->name(); - } - allocator_ = std::make_shared(dev_allocator); - infer->setGpuAllocator(allocator_.get()); -#endif - trt_engine_ptr_.reset(infer->deserializeCudaEngine( - serialized_engine_.c_str(), serialized_engine_.size(), - PluginFactoryTensorRT::GetInstance())); - trt_execution_context_ptr_.reset(trt_engine_ptr_->createExecutionContext()); - // Runtime is safe to delete after engine creation - infer->destroy(); - serialized_engine_.clear(); +int TRTEngineOp::GetEngineBatch(tensorflow::OpKernelContext* ctx) { + int num_batch = ctx->input(0).shape().dim_size(0); + int smallest_engine = 0; + for (const auto i : cached_engine_batches_) { + if (i >= num_batch) { + smallest_engine = i; + break; + } } - int num_binding = context->num_inputs() + context->num_outputs(); - std::vector buffers(num_binding); + // TODO(sami): Need an LRU here + if (smallest_engine == 0) { + if (max_cached_engines_ > cached_engine_batches_.size()) { + smallest_engine = num_batch; + cached_engine_batches_.push_back(num_batch); + VLOG(1) << "Running with batch size " << num_batch; + } else { + string s("Engine buffer is full. buffer limit= "); + StrAppend(&s, max_cached_engines_, ", current entries= "); + for (auto i : cached_engine_batches_) StrAppend(&s, i, ", "); + StrAppend(&s, "Requested batch= ", num_batch); + LOG(ERROR) << s; + ctx->SetStatus(tensorflow::errors::ResourceExhausted( + "Requested batch size is not available and engine cache is full")); + return -1; + } + } + return smallest_engine; +} - size_t binding_index; - int num_batch = 0; - for (int i = 0; i < context->num_inputs(); i++) { - // Grab the input tensor - binding_index = trt_engine_ptr_->getBindingIndex(input_nodes_[i].c_str()); +void TRTEngineOp::ComputeAsync(tensorflow::OpKernelContext* ctx, + tensorflow::AsyncOpKernel::DoneCallback done) { + auto helper = new AsyncHelper(done); + tensorflow::core::ScopedUnref sc(helper); + if (calibration_mode_) { + ExecuteCalibration(ctx, helper); + return; + } + const int smallest_engine = GetEngineBatch(ctx); + if (smallest_engine < 0) return; // GetEngineBatch already set the status. + + const int num_batch = ctx->input(0).shape().dim_size(0); + auto& engine_ctx_pair = GetEngine(smallest_engine, ctx); + auto& trt_engine_ptr = engine_ctx_pair.first; + if (!trt_engine_ptr) { + LOG(WARNING) << "Engine retrieval for batch size " << num_batch + << " failed Running native segment"; + ExecuteNativeSegment(ctx, helper); + return; + } - const Tensor& input_tensor = context->input(i); + const int num_binding = ctx->num_inputs() + ctx->num_outputs(); + std::vector buffers(num_binding); + for (int i = 0; i < ctx->num_inputs(); i++) { + const string inp_name = StrCat(kInputPHName, i); + const size_t binding_index = + trt_engine_ptr->getBindingIndex(inp_name.c_str()); + + const Tensor& input_tensor = ctx->input(i); const TensorShape& input_shape = input_tensor.shape(); - if (i == 0) { - num_batch = input_shape.dim_size(0); - if (num_batch > trt_engine_ptr_->getMaxBatchSize()) { - LOG(FATAL) << "input tensor batch larger than max_batch_size: " - << trt_engine_ptr_->getMaxBatchSize(); - } - } else if (num_batch != input_shape.dim_size(0)) { - LOG(FATAL) << "input data inconsistent batch size"; - break; + if (num_batch != input_shape.dim_size(0)) { + LOG(ERROR) << "input data inconsistent batch size"; + ctx->SetStatus(tensorflow::errors::FailedPrecondition( + "Different batch sizes between input tensors")); + return; } - auto dtype = trt_engine_ptr_->getBindingDataType(binding_index); + auto dtype = trt_engine_ptr->getBindingDataType(binding_index); switch (dtype) { case nvinfer1::DataType::kFLOAT: buffers[binding_index] = (void*)(input_tensor.flat().data()); break; case nvinfer1::DataType::kHALF: - LOG(FATAL) << "half size is not supported yet!"; - break; + LOG(ERROR) << "FP16 inputs are not supported yet!"; + ctx->SetStatus(tensorflow::errors::InvalidArgument( + "FP16 inputs are not supported!")); + return; case nvinfer1::DataType::kINT8: - LOG(FATAL) << "int8 is not supported yet!"; - break; + LOG(ERROR) << "INT8 inputs are not supported yet!"; + ctx->SetStatus(tensorflow::errors::InvalidArgument( + "INT8 inputs are not supported!")); + return; default: - LOG(FATAL) << "Unknown data type: " << int(dtype); - break; + LOG(ERROR) << "Unknown TRT data type: " << int(dtype); + ctx->SetStatus(tensorflow::errors::InvalidArgument( + "Unknown output TRT data type! ", static_cast(dtype))); + return; } } - for (int i = 0; i < static_cast(output_nodes_.size()); i++) { - // This is bad that we have to reallocate output buffer every run. + for (int i = 0; i < ctx->num_outputs(); i++) { // Create an output tensor - binding_index = trt_engine_ptr_->getBindingIndex(output_nodes_[i].c_str()); + const string output_name = StrCat(kOutputPHName, i); + const size_t binding_index = + trt_engine_ptr->getBindingIndex(output_name.c_str()); Tensor* output_tensor = nullptr; TensorShape output_shape; if (binding_index != -1) { - auto dims = trt_engine_ptr_->getBindingDimensions(binding_index); + auto dims = trt_engine_ptr->getBindingDimensions(binding_index); std::vector trt_shape(dims.nbDims + 1); trt_shape[0] = num_batch; for (int j = 0; j < dims.nbDims; j++) trt_shape[j + 1] = dims.d[j]; - OP_REQUIRES_OK(context, - TensorShapeUtils::MakeShape( - trt_shape.data(), trt_shape.size(), &output_shape)); + OP_REQUIRES_OK( + ctx, TensorShapeUtils::MakeShape(trt_shape.data(), trt_shape.size(), + &output_shape)); } else { - LOG(FATAL) << "output node not found, at " << output_nodes_[i]; - break; + LOG(ERROR) << "output node not found, at " << output_name; + ctx->SetStatus(tensorflow::errors::Internal("output ", output_name, + " couldn't be found!")); + return; } - - OP_REQUIRES_OK(context, - context->allocate_output(i, output_shape, &output_tensor)); - auto dtype = trt_engine_ptr_->getBindingDataType(binding_index); + auto status = ctx->allocate_output(i, output_shape, &output_tensor); + if (!status.ok()) { + LOG(ERROR) << "Allocating output failed with " << status; + ctx->SetStatus(status); + return; + } + auto dtype = trt_engine_ptr->getBindingDataType(binding_index); switch (dtype) { case nvinfer1::DataType::kFLOAT: buffers[binding_index] = reinterpret_cast(output_tensor->flat().data()); break; case nvinfer1::DataType::kHALF: - LOG(FATAL) << "half size is not supported yet!"; - break; + LOG(ERROR) << "half size is not supported yet!"; + ctx->SetStatus(tensorflow::errors::InvalidArgument( + "Half outputs are not supported!")); + return; case nvinfer1::DataType::kINT8: - LOG(FATAL) << "int8 is not supported yet!"; - break; + LOG(ERROR) << "int8 is not supported yet!"; + ctx->SetStatus(tensorflow::errors::InvalidArgument( + "INT8 outputs are not supported!")); + return; default: - LOG(FATAL) << "Unknown data type: " << int(dtype); - break; + LOG(ERROR) << "Unknown TRT data type: " << static_cast(dtype); + ctx->SetStatus(tensorflow::errors::InvalidArgument( + "Unsupported output data type! ", static_cast(dtype))); + return; } } // copied from cuda_kernel_helper since it seems only valid in *.cu.cc files const cudaStream_t* stream = CHECK_NOTNULL( - reinterpret_cast(context->op_device_context() + reinterpret_cast(ctx->op_device_context() ->stream() ->implementation() ->CudaStreamMemberHack())); // TODO(jie): trt enqueue does not return error - auto ret = trt_execution_context_ptr_->enqueue(num_batch, &buffers[0], - *stream, nullptr); - VLOG(2) << "enqueue returns: " << ret; + auto& trt_execution_context_ptr = engine_ctx_pair.second; + auto ret = trt_execution_context_ptr->enqueue(num_batch, &buffers[0], *stream, + nullptr); + if (!ret) { + LOG(ERROR) << "Failed to enqueue batch for TRT engine: " << name(); + ctx->SetStatus(tensorflow::errors::Internal( + "Failed to enqueue batch for TRT engine: ", name())); + } // sync should be done by TF. } + TRTEngineOp::~TRTEngineOp() { - // Order matters! - trt_execution_context_ptr_.reset(); - trt_engine_ptr_.reset(); + // We need to manually destroy the engine and execution context before + // the allocator is destructed. + for (auto& eng : engine_map_) { + eng.second.first.reset(); + eng.second.second.reset(); + } allocator_.reset(); } + +nvinfer1::IGpuAllocator* TRTEngineOp::GetAllocator(OpKernelContext* ctx) { + if (allocator_) return allocator_.get(); + auto device = ctx->device(); + auto alloc = device->GetAllocator(tensorflow::AllocatorAttributes()); + if (!alloc) { + LOG(ERROR) << "Can't find device allocator for gpu device " + << device->name(); + ctx->SetStatus(tensorflow::errors::Internal( + "Can't get device allocator for device ", device->name())); + return nullptr; + } + allocator_.reset(new TRTDeviceAllocator(alloc)); + return allocator_.get(); +} + +TRTEngineOp::EngineCtxPair& TRTEngineOp::GetEngine(int batch_size, + OpKernelContext* ctx) { + static EngineCtxPair null_pair = { + TrtUniquePtrType(nullptr), + TrtUniquePtrType(nullptr)}; + // TODO(sami): This method needs to be re-written to use resource manager and + // with LRU mechanism option. + tensorflow::mutex_lock lock(engine_mutex_); + + if (static_engine_) { + if (engine_map_.size()) { + if (engine_map_.begin()->first >= batch_size) { + return engine_map_.begin()->second; + } + return null_pair; + } + TrtUniquePtrType infer(nvinfer1::createInferRuntime(logger)); +#if NV_TENSORRT_MAJOR > 3 + auto allocator = GetAllocator(ctx); + if (allocator == nullptr) { + // GetAllocator already set the Status. + return null_pair; + } + infer->setGpuAllocator(allocator); +#endif + TrtUniquePtrType static_engine( + infer->deserializeCudaEngine(serialized_segment_.c_str(), + serialized_segment_.size(), nullptr)); + auto raw_static_engine = static_engine.get(); + const auto max_batch_size = raw_static_engine->getMaxBatchSize(); + engine_map_[max_batch_size] = { + std::move(static_engine), + TrtUniquePtrType( + raw_static_engine->createExecutionContext())}; + // Runtime is safe to delete after engine creation + serialized_segment_.clear(); + if (max_batch_size < batch_size) return null_pair; + return engine_map_.at(max_batch_size); + } // static_engine_ + + // Handle the dynamic engine case. + auto engine_it = engine_map_.find(batch_size); + if (engine_it == engine_map_.end() && + engine_map_.size() < (size_t)max_cached_engines_) { + nvinfer1::IGpuAllocator* allocator = nullptr; +#if NV_TENSORRT_MAJOR > 3 + allocator = GetAllocator(ctx); + if (allocator == nullptr) { + // GetAllocator already set the Status. + return null_pair; + } +#endif + std::vector shapes; + for (int i = 0; i < ctx->num_inputs(); ++i) { + shapes.emplace_back(ctx->input(i).shape()); + } + TrtUniquePtrType engine; + bool convert_successfully = false; + VLOG(0) << name() << " Constructing a new engine with batch size " + << batch_size; + // Up to this point, calibrator_ can never be empty, since otherwise it + // means calibration_mode_ is true and this path won't get executed. + auto status = convert::ConvertGraphDefToEngine( + segment_graph_, precision_mode_, batch_size, workspace_size_, shapes, + &logger, allocator, calibrator_.get(), &engine, &convert_successfully); + if (!status.ok()) { + if (convert_successfully) { + // This means it fail to build the engine even when the network is built + // successfully, probably due to internal issues. In this case we don't + // retry in the future. + engine_map_[batch_size] = {nullptr, nullptr}; + } + LOG(ERROR) << "Engine creation for batch size " << batch_size + << " failed " << status; + ctx->SetStatus(tensorflow::errors::Internal("Engine creation failed!")); + return null_pair; + } + VLOG(1) << "Conversion is done"; + TrtUniquePtrType exec_context( + engine->createExecutionContext()); + engine_map_[batch_size] = {std::move(engine), std::move(exec_context)}; + } + return engine_map_.at(batch_size); +} + +tensorflow::Status TRTEngineOp::AllocateCalibrationResources( + tensorflow::OpKernelContext* ctx, TRTCalibrationResource** cr) { + auto cres = new TRTCalibrationResource(); + *cr = cres; + // Get the allocator. + auto alloc = ctx->device()->GetAllocator(tensorflow::AllocatorAttributes()); + if (!alloc) { + LOG(WARNING) << "Can't get device allocator will not be able to " + "allocate memory from TensorFlow memory pool"; + cres->allocator_.reset(new TRTCudaAllocator); + } else { + cres->allocator_.reset(new TRTDeviceAllocator(alloc)); + } + // Get the input shapes. + const int batch_size = ctx->input(0).dim_size(0); + const int num_inputs = ctx->num_inputs(); + std::vector shapes; + dev_tensors_.resize(num_inputs); + VLOG(1) << " Constructing calibrator"; + for (int i = 0; i < num_inputs; i++) { + // allocate workspace on device for inputs + const tensorflow::Tensor& t = ctx->input(i); + shapes.emplace_back(t.shape()); + Tensor* device_tensor; + TF_RETURN_IF_ERROR(ctx->allocate_persistent( + t.dtype(), t.shape(), &dev_tensors_.at(i), &device_tensor)); + CHECK_EQ(t.TotalBytes(), device_tensor->TotalBytes()); + void* device_address = GetTensorAddress(device_tensor); + if (device_address == nullptr) { + return tensorflow::errors::InvalidArgument( + "Unsupported data type encountered in input ", i); + } + device_buffers_.emplace( + StrCat(kInputPHName, i), + std::pair(device_address, device_tensor->TotalBytes())); + } + cres->calibrator_.reset( + new TRTInt8Calibrator(device_buffers_, batch_size, name())); + const string label(name()); + auto segment_graph = &segment_graph_; + const int cuda_gpu_id = ctx->device()->tensorflow_gpu_device_info()->gpu_id; + if (cuda_gpu_id < 0) { + LOG(ERROR) << "Can't get gpu_device_info from context->device()"; + return tensorflow::errors::InvalidArgument( + "Context->device doesn't contain device info!"); + } + const int64 workspace_size_bytes = workspace_size_; + cres->thr_.reset(new std::thread([cres, label, segment_graph, shapes, + cuda_gpu_id, workspace_size_bytes]() { + VLOG(0) << "Starting calibration thread on device " << cuda_gpu_id + << ", Calibration Resource @ " << cres; + auto err = cudaSetDevice(cuda_gpu_id); + if (err != cudaSuccess) { + // TODO(aaroey): should return error here. + LOG(ERROR) << "Couldn't set cuda device to " << cuda_gpu_id + << " in calibration thread"; + } + // ConvertGraphDefToEngine() will try to build the engine. This thread + // will loop inside buildCudaEngine() consuming the calibration data + // that is set by the TF op, and drive the builder until calibrator returns + // false. Engine is discarded after calibration table is generated + // + // TODO(aaroey): maybe setting the max batch size using the python + // calibration wrapper class. + auto s = convert::ConvertGraphDefToEngine( + *segment_graph, convert::INT8MODE, cres->calibrator_->getBatchSize(), + workspace_size_bytes, shapes, &cres->logger_, cres->allocator_.get(), + cres->calibrator_.get(), &cres->engine_, + /*convert_successfully=*/nullptr); + if (!s.ok()) { + LOG(ERROR) << "Calibration failed: " << s; + cres->calibrator_->setDone(); // Ignore further pushes + } + VLOG(1) << "Calibration loop terminated " << label; + })); + VLOG(1) << "initialized calibrator resource"; + return tensorflow::Status::OK(); +} + REGISTER_KERNEL_BUILDER(Name("TRTEngineOp").Device(DEVICE_GPU), TRTEngineOp); } // namespace tensorrt diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.h b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.h index e613a71422..6fe318be6a 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.h +++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.h @@ -19,9 +19,14 @@ limitations under the License. #include #include +#include "tensorflow/contrib/tensorrt/convert/utils.h" +#include "tensorflow/contrib/tensorrt/log/trt_logger.h" #include "tensorflow/contrib/tensorrt/resources/trt_allocator.h" +#include "tensorflow/core/framework/function.h" +#include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/platform/mutex.h" #if GOOGLE_CUDA #if GOOGLE_TENSORRT @@ -30,32 +35,95 @@ limitations under the License. namespace tensorflow { namespace tensorrt { -class Logger; - +class TRTInt8Calibrator; +class TRTCalibrationResource; +class AsyncHelper; // TODO(Sami): Remove this file? -class TRTEngineOp : public OpKernel { + +// This OP can construct TRTEngine on the fly and if construction of engine +// fails, executes equivalent subgraph as a TensorFlow function. +class TRTEngineOp : public AsyncOpKernel { public: explicit TRTEngineOp(OpKernelConstruction* context); - void Compute(OpKernelContext* context) override; + void ComputeAsync(OpKernelContext* context, + AsyncOpKernel::DoneCallback done) override; ~TRTEngineOp(); private: - template - struct Destroyer { - void operator()(T* d) { d->destroy(); } - }; - - template - using destroyed_ptr = std::unique_ptr>; - destroyed_ptr trt_engine_ptr_; + // Execute calibration + void ExecuteCalibration(OpKernelContext* ctx, AsyncHelper* helper); + + // Construct a function handle for executing native funcdef graph + Status ConstructFunctionHandle(OpKernelContext* ctx); + + // Execute replaced native segment as function Op. + void ExecuteNativeSegment(OpKernelContext* ctx, AsyncHelper* helper); + + // Allocate necessary resources for calibration + Status AllocateCalibrationResources(OpKernelContext* ctx, + TRTCalibrationResource** cr); + // TODO(samikama): context should go to a resource manager! - destroyed_ptr trt_execution_context_ptr_; + typedef std::pair, + TrtUniquePtrType> + EngineCtxPair; + EngineCtxPair& GetEngine(int batch_size, OpKernelContext* ctx); + // Return engine batch closest to input batch. + int GetEngineBatch(OpKernelContext* ctx); + + nvinfer1::IGpuAllocator* GetAllocator(OpKernelContext* ctx); + + // map to keep engines and their execution context for given batch size. + std::unordered_map engine_map_; std::vector input_nodes_; std::vector output_nodes_; - std::shared_ptr allocator_; - string serialized_engine_; + + // keep device allocator for TRT. + std::unique_ptr allocator_; + + // serialized protobuf segment or trt engine depending on static_engine_ flag. + string serialized_segment_; + + // Name of the function for TF native execution of the segment. + string funcdef_name_; + + // GraphDef representation of the segment. + GraphDef segment_graph_; + + // Lookup table for temporary staging areas of input tensors for calibration. + std::unordered_map> device_buffers_; + + // Temporary staging areas for calibration inputs. + std::vector dev_tensors_; + + // Engine Precision mode. + int precision_mode_; + + // Whether engine is constructed during the conversion or needs to be + // constructed from protobuf segment. + bool static_engine_; + + // Whether to calibrate INT8 engine. + bool calibration_mode_; + + // Whether non-batch ranks of the inputs are assumed to be fixed or not for + // engine construction. + bool fixed_input_size_; + + // Batches of the cached engines + std::vector cached_engine_batches_; + + // Maximum number of cached engines + int max_cached_engines_; + + int64 workspace_size_; + mutex engine_mutex_; + FunctionLibraryRuntime::Handle native_func_; + + // The finalized calibrator for inference. + std::unique_ptr calibrator_; }; } // namespace tensorrt diff --git a/tensorflow/contrib/tensorrt/ops/trt_engine_op.cc b/tensorflow/contrib/tensorrt/ops/trt_engine_op.cc index 079d73f7be..383635f428 100644 --- a/tensorflow/contrib/tensorrt/ops/trt_engine_op.cc +++ b/tensorflow/contrib/tensorrt/ops/trt_engine_op.cc @@ -28,11 +28,19 @@ extern Status TRTEngineOpShapeInference(InferenceContext* c); } REGISTER_OP("TRTEngineOp") - .Attr("serialized_engine: string") - .Attr("input_nodes: list(string)") - .Attr("output_nodes: list(string)") - .Attr("InT: list({float32})") - .Attr("OutT: list({float32})") + .Attr("serialized_segment: string") + .Attr("input_shapes: list(shape)") + .Attr("output_shapes: list(shape)") + .Attr("segment_funcdef_name: string") + .Attr("InT: list({int8,float16,float32})") + .Attr("OutT: list({int8,float16,float32})") + .Attr("static_engine: bool = true") + .Attr("fixed_input_size: bool = true") + .Attr("cached_engine_batches: list(int) = []") + .Attr("max_cached_engines_count: int = 1") + .Attr("workspace_size_bytes: int") + .Attr("precision_mode: {'FP32', 'FP16', 'INT8', 'INT8CALIB'}") + .Attr("calibration_data: string = ''") .Input("in_tensor: InT") .Output("out_tensor: OutT") .SetShapeFn(shape_inference::TRTEngineOpShapeInference); diff --git a/tensorflow/contrib/tensorrt/python/trt_convert.py b/tensorflow/contrib/tensorrt/python/trt_convert.py index 338475d90e..79f512dbcf 100644 --- a/tensorflow/contrib/tensorrt/python/trt_convert.py +++ b/tensorflow/contrib/tensorrt/python/trt_convert.py @@ -21,6 +21,8 @@ from __future__ import print_function # pylint: disable=unused-import,line-too-long import six as _six from tensorflow.contrib.tensorrt.wrap_conversion import calib_convert +from tensorflow.contrib.tensorrt.wrap_conversion import get_linked_tensorrt_version +from tensorflow.contrib.tensorrt.wrap_conversion import get_loaded_tensorrt_version from tensorflow.contrib.tensorrt.wrap_conversion import trt_convert from tensorflow.core.framework import graph_pb2 from tensorflow.core.protobuf import rewriter_config_pb2 @@ -29,7 +31,9 @@ from tensorflow.python.framework import errors_impl as _impl from tensorflow.python.framework import meta_graph from tensorflow.python.framework import ops from tensorflow.python.grappler import tf_optimizer +from tensorflow.python.platform import tf_logging from tensorflow.python.util import compat + # pylint: enable=unused-import,line-too-long @@ -40,7 +44,10 @@ def create_inference_graph(input_graph_def, max_batch_size=1, max_workspace_size_bytes=2 << 20, precision_mode="FP32", - minimum_segment_size=3): + minimum_segment_size=3, + is_dynamic_op=False, + maximum_cached_engines=1, + cached_engine_batches=[]): """Python wrapper for the TRT transformation. Args: @@ -51,6 +58,10 @@ def create_inference_graph(input_graph_def, precision_mode: one of 'FP32', 'FP16' and 'INT8' minimum_segment_size: the minimum number of nodes required for a subgraph to be replaced by TRTEngineOp. + is_dynamic_op: whether to generate dynamic TRT ops which will build the TRT + network and engine at run time. + maximum_cached_engines: max number of cached TRT engines in dynamic TRT ops. + cached_engine_batches: batch sizes used to pre-create cached engines. Returns: New GraphDef with TRTEngineOps placed in graph replacing subgraphs. @@ -65,6 +76,30 @@ def create_inference_graph(input_graph_def, "It should be one of {}").format( precision_mode, "{'FP32', 'FP16', 'INT8'}")) mode = supported_precision_modes[precision_mode.upper()] + compiled_version = get_linked_tensorrt_version() + loaded_version = get_loaded_tensorrt_version() + version_mismatch = False + if loaded_version[0] < compiled_version[0]: + tf_logging.error( + "TensorRT version mismatch. Tensorflow was compiled against " + + "TensorRT %s but library loaded from environment is TensorRT %s" % + (".".join([str(x) for x in compiled_version]), + ".".join([str(x) for x in loaded_version])) + + ". Please make sure that correct version of TensorRT " + + "is available in the system and added to ldconfig or LD_LIBRARY_PATH" + ) + raise RuntimeError("Incompatible TensorRT library version") + for i in zip(loaded_version, compiled_version): + if i[0] != i[1]: + tf_logging.warn("TensorRT mismatch. Compiled against version " + + "%s, but loaded %s. Things may not work" % + (".".join([str(x) for x in compiled_version]), + ".".join([str(x) for x in loaded_version]))) + version_mismatch = True + break + if not version_mismatch: + tf_logging.info("Running against TensorRT version %s" % ".".join( + [str(x) for x in loaded_version])) def py2bytes(inp): return inp @@ -100,7 +135,9 @@ def create_inference_graph(input_graph_def, # pair or strings where first one is encoded status and the second # one is the transformed graphs protobuf string. out = trt_convert(input_graph_def_str, out_names, max_batch_size, - max_workspace_size_bytes, mode, minimum_segment_size) + max_workspace_size_bytes, mode, minimum_segment_size, + is_dynamic_op, maximum_cached_engines, + cached_engine_batches) status = to_string(out[0]) output_graph_def_string = out[1] del input_graph_def_str # Save some memory @@ -120,11 +157,12 @@ def create_inference_graph(input_graph_def, return output_graph_def -def calib_graph_to_infer_graph(calibration_graph_def): +def calib_graph_to_infer_graph(calibration_graph_def, is_dynamic_op=False): """Convert an existing calibration graph to inference graph. Args: calibration_graph_def: the calibration GraphDef object with calibration data + is_dynamic_op: whether to create dynamic static engines from calibration Returns: New GraphDef with TRTEngineOps placed in graph replacing calibration nodes. Raises: @@ -141,9 +179,16 @@ def calib_graph_to_infer_graph(calibration_graph_def): to_string = py2string else: to_string = py3string - + is_calib_graph = False + for n in calibration_graph_def.node: + if n.op == "TRTEngineOp": + is_calib_graph = is_calib_graph or not n.attr["calibration_data"].s + if not is_calib_graph: + tf_logging.error( + "Not a calib graph. Doesn't seem to contain any calibration nodes.") + return None graph_str = calibration_graph_def.SerializeToString() - out = calib_convert(graph_str) + out = calib_convert(graph_str, is_dynamic_op) status = to_string(out[0]) output_graph_def_string = out[1] del graph_str # Save some memory diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator.cc b/tensorflow/contrib/tensorrt/resources/trt_allocator.cc index 0f0508331c..9f115990c3 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_allocator.cc +++ b/tensorflow/contrib/tensorrt/resources/trt_allocator.cc @@ -50,7 +50,7 @@ TRTDeviceAllocator::TRTDeviceAllocator(tensorflow::Allocator* allocator) } void TRTDeviceAllocator::free(void* memory) { - VLOG(2) << "Deallocating " << memory; + VLOG(2) << "Deallocating @ " << memory; allocator_->DeallocateRaw(memory); } diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator.h b/tensorflow/contrib/tensorrt/resources/trt_allocator.h index a0c2540a76..c5d2cec730 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_allocator.h +++ b/tensorflow/contrib/tensorrt/resources/trt_allocator.h @@ -16,7 +16,6 @@ limitations under the License. #ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_ALLOCATOR_H_ #define TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_ALLOCATOR_H_ - #include "tensorflow/contrib/tensorrt/log/trt_logger.h" #include "tensorflow/core/framework/allocator.h" @@ -52,7 +51,9 @@ class TRTDeviceAllocator : public nvinfer1::IGpuAllocator { // Allocator implementation wrapping TF device allocators. public: TRTDeviceAllocator(tensorflow::Allocator* allocator); - virtual ~TRTDeviceAllocator() {} + virtual ~TRTDeviceAllocator() { + VLOG(1) << "Destroying allocator attached to " << allocator_->Name(); + } void* allocate(uint64_t size, uint64_t alignment, uint32_t flags) override; void free(void* memory) override; diff --git a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc index dc7c93f869..32e81858b9 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc +++ b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc @@ -16,7 +16,6 @@ limitations under the License. #include "tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h" #include -#include #include #include "tensorflow/core/platform/logging.h" @@ -37,15 +36,22 @@ TRTInt8Calibrator::TRTInt8Calibrator( : batch_size_(batch_size), done_(false), dev_buffers_(dev_buffers), - calib_running_(false), + calib_running_(true), batch_is_set_(false), engine_name_(engine_name) {} +TRTInt8Calibrator::TRTInt8Calibrator(const string& calib_data) + : batch_size_(0), + done_(false), + calib_running_(false), + batch_is_set_(false), + calibration_table_(calib_data) {} + bool TRTInt8Calibrator::setBatch(const std::unordered_map& data, const cudaStream_t stream) { tensorflow::mutex_lock lock(cond_mtx_); - while ((calib_running_ || batch_is_set_) && - !done_) { // wait while calibration is running + // wait while calibration is running. + while ((calib_running_ || batch_is_set_) && !done_) { cond_.wait(lock); } if (done_) return false; @@ -59,8 +65,6 @@ bool TRTInt8Calibrator::setBatch(const std::unordered_map& data, } const auto& d = devptr->second; - // TODO(aaroey): we should not use sync copy on default stream. Make sure - // stream->ThenMemcpy() is used in future PRs. // TODO(sami,aaroey): Need to figure out a way to ensure synchronization // between stream, perhaps using a tensor? auto status = cudaMemcpyAsync(d.first, it.second, d.second, @@ -84,13 +88,11 @@ bool TRTInt8Calibrator::getBatch(void** bindings, const char** names, tensorflow::mutex_lock lock(cond_mtx_); calib_running_ = false; cond_.notify_all(); - while ((!batch_is_set_ && !done_)) { // wait until new batch arrives + // wait until new batch arrives + while ((!batch_is_set_ && !done_)) { cond_.wait(lock); - - } - if (done_) { - return false; } + if (done_) return false; for (int i = 0; i < num_bindings; i++) { auto it = dev_buffers_.find(names[i]); @@ -107,7 +109,9 @@ bool TRTInt8Calibrator::getBatch(void** bindings, const char** names, } const void* TRTInt8Calibrator::readCalibrationCache(std::size_t& length) { - return nullptr; + if (calibration_table_.empty()) return nullptr; + length = calibration_table_.size(); + return calibration_table_.data(); } void TRTInt8Calibrator::setDone() { @@ -117,7 +121,11 @@ void TRTInt8Calibrator::setDone() { } void TRTInt8Calibrator::writeCalibrationCache(const void* ptr, - std::size_t length) {} + std::size_t length) { + calibration_table_ = string((const char*)ptr, length); + VLOG(1) << "Got calibration data for " << engine_name_ << " @" << ptr + << " length=" << length; +} TRTInt8Calibrator::~TRTInt8Calibrator() { VLOG(1) << "Destroying calibrator for " << engine_name_; } diff --git a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h index d77aa2c5ab..994312d7c3 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h +++ b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h @@ -39,29 +39,48 @@ struct TRTInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator { TRTInt8Calibrator( const std::unordered_map>& dev_buffers, int batch_size, string engine_name); + + TRTInt8Calibrator(const string& calibration_data); + + ~TRTInt8Calibrator(); + int getBatchSize() const override; + bool getBatch(void* bindings[], const char* names[], int num_bindings) override; + bool setBatch(const std::unordered_map& data, const cudaStream_t stream); + void setDone(); + + // If not null, calibration is skipped. const void* readCalibrationCache(std::size_t& length) override; + void writeCalibrationCache(const void* ptr, std::size_t length) override; - ~TRTInt8Calibrator(); + + const string& getCalibrationTableAsString() { return calibration_table_; } private: const int batch_size_; - tensorflow::mutex cond_mtx_; // mutex for condition_variable - tensorflow::condition_variable cond_; // condition variable to implement - // producer-consumer queue for - // calibration + + // mutex for condition_variable + tensorflow::mutex cond_mtx_; + + // condition variable to implement producer-consumer queue for calibration + tensorflow::condition_variable cond_; + + // Is calibration finished? bool done_; - const std::unordered_map> - dev_buffers_; // map to keep tensorrt input buffers and sizes keyed with - // buffer names + + // Map to keep tensorrt input buffers and sizes keyed with buffer names + const std::unordered_map> dev_buffers_; + bool calib_running_; bool batch_is_set_; + string engine_name_; + string calibration_table_; }; } // namespace tensorrt diff --git a/tensorflow/contrib/tensorrt/resources/trt_resources.h b/tensorflow/contrib/tensorrt/resources/trt_resources.h index e3469124ac..b7d5ffd674 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_resources.h +++ b/tensorflow/contrib/tensorrt/resources/trt_resources.h @@ -22,6 +22,7 @@ limitations under the License. #include #include +#include "tensorflow/contrib/tensorrt/convert/utils.h" #include "tensorflow/contrib/tensorrt/log/trt_logger.h" #include "tensorflow/contrib/tensorrt/resources/trt_allocator.h" #include "tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h" @@ -34,50 +35,48 @@ limitations under the License. namespace tensorflow { namespace tensorrt { + class TRTCalibrationResource : public tensorflow::ResourceBase { public: - TRTCalibrationResource() - : calibrator_(nullptr), - builder_(nullptr), - network_(nullptr), - engine_(nullptr), - logger_(nullptr), - thr_(nullptr) {} - ~TRTCalibrationResource() { VLOG(0) << "Destroying Calibration Resource " << std::endl << DebugString(); + builder_.reset(); + engine_.reset(); + // We need to manually destroy the builder and engine before the allocator + // is destroyed. + allocator_.reset(); } string DebugString() override { std::stringstream oss; - oss << " Calibrator = " << std::hex << calibrator_ << std::dec << std::endl - << " Builder = " << std::hex << builder_ << std::dec << std::endl - << " Network = " << std::hex << network_ << std::dec << std::endl - << " Engine = " << std::hex << engine_ << std::dec << std::endl - << " Logger = " << std::hex << logger_ << std::dec << std::endl - << " Allocator = " << std::hex << allocator_.get() << std::dec - << std::endl - << " Thread = " << std::hex << thr_ << std::dec << std::endl; + using std::dec; + using std::endl; + using std::hex; + oss << " Calibrator = " << hex << calibrator_.get() << dec << endl + << " Builder = " << hex << builder_.get() << dec << endl + << " Engine = " << hex << engine_.get() << dec << endl + << " Logger = " << hex << &logger_ << dec << endl + << " Allocator = " << hex << allocator_.get() << dec << endl + << " Thread = " << hex << thr_.get() << dec << endl; return oss.str(); } - TRTInt8Calibrator* calibrator_; - nvinfer1::IBuilder* builder_; - nvinfer1::INetworkDefinition* network_; - nvinfer1::ICudaEngine* engine_; - std::shared_ptr allocator_; - tensorflow::tensorrt::Logger* logger_; + std::unique_ptr calibrator_; + TrtUniquePtrType builder_; + TrtUniquePtrType engine_; + std::unique_ptr allocator_; + tensorflow::tensorrt::Logger logger_; // TODO(sami): Use threadpool threads! - std::thread* thr_; + std::unique_ptr thr_; }; -class TRTWeightStore : public tensorflow::ResourceBase { +class TRTWeightStore { public: TRTWeightStore() {} virtual ~TRTWeightStore() { VLOG(1) << "Destroying store" << DebugString(); } - string DebugString() override { + string DebugString() { std::stringstream oss; size_t len_bytes = 0; for (const auto& v : store_) { diff --git a/tensorflow/contrib/tensorrt/segment/segment.h b/tensorflow/contrib/tensorrt/segment/segment.h index 1568dd9153..81b4bfe49f 100644 --- a/tensorflow/contrib/tensorrt/segment/segment.h +++ b/tensorflow/contrib/tensorrt/segment/segment.h @@ -29,8 +29,9 @@ namespace tensorflow { namespace tensorrt { namespace segment { -// vector of segments, each entry contains a device name and a set of nodes in -// segment +// Vector of segments, each entry contains a set of node names and a device name +// in the segment. +// TODO(aaroey): use node pointer instead of node name. using SegmentNodesVector = std::vector, string>>; struct SegmentOptions { @@ -48,6 +49,8 @@ struct SegmentOptions { // in the vector describes a subgraph by giving a set of the names of // all the NodeDefs in that subgraph. // @return the status. +// +// TODO(aaroey): remove this method. tensorflow::Status SegmentGraph( const tensorflow::GraphDef& gdef, const std::function& candidate_fn, diff --git a/tensorflow/contrib/tensorrt/shape_fn/trt_shfn.cc b/tensorflow/contrib/tensorrt/shape_fn/trt_shfn.cc index f36495f6b6..227ac120dd 100644 --- a/tensorflow/contrib/tensorrt/shape_fn/trt_shfn.cc +++ b/tensorflow/contrib/tensorrt/shape_fn/trt_shfn.cc @@ -29,61 +29,35 @@ namespace tensorflow { namespace shape_inference { tensorflow::Status TRTEngineOpShapeInference(InferenceContext* context) { - tensorflow::tensorrt::Logger logger; - string serialized_engine; - TF_RETURN_IF_ERROR(context->GetAttr("serialized_engine", &serialized_engine)); - nvinfer1::IRuntime* infer = nvinfer1::createInferRuntime(logger); - nvinfer1::ICudaEngine* trt_engine = infer->deserializeCudaEngine( - serialized_engine.c_str(), serialized_engine.size(), - tensorrt::PluginFactoryTensorRT::GetInstance()); - - int num_batch = -1; - std::vector<::tensorflow::DataType> input_type; - TF_RETURN_IF_ERROR(context->GetAttr("InT", &input_type)); - for (size_t i = 0; i < context->num_inputs(); i++) { - // Check if input shape is legit - auto input_shape = context->input(i); - for (int j = 0; j < context->Rank(input_shape); j++) { - auto dim_handler = context->Dim(input_shape, j); - if (j == 0) { - if (i == 0) { - num_batch = context->Value(dim_handler); - } else if (num_batch != context->Value(dim_handler)) { - // TODO(jie): TensorRT engine requires consistent batch between inputs - // tensors. Segmenter should be aware of this. - LOG(FATAL) << "TensorRT engine requires consistent batch size"; - } - } - } + std::vector shapes; + for (int i = 0; i < context->num_outputs(); ++i) { + context->set_output(i, context->UnknownShape()); } - - // Arrange input here - std::vector input_nodes; - TF_RETURN_IF_ERROR(context->GetAttr("input_nodes", &input_nodes)); - - // Arrange output here - std::vector output_nodes; - TF_RETURN_IF_ERROR(context->GetAttr("output_nodes", &output_nodes)); - for (size_t i = 0; i < output_nodes.size(); i++) { - int binding_index = trt_engine->getBindingIndex(output_nodes[i].c_str()); - ShapeHandle output_shape; - std::vector dim_vec; - dim_vec.emplace_back(context->MakeDim(num_batch)); - if (binding_index != -1) { - auto dims = trt_engine->getBindingDimensions(binding_index); - for (int j = 0; j < dims.nbDims; j++) { - dim_vec.emplace_back(context->MakeDim(dims.d[j])); - } - } else { - LOG(FATAL) << "TensorRT engine cannot find binding: " << output_nodes[i]; - } - output_shape = context->MakeShape(dim_vec); - context->set_output(i, output_shape); + auto status = context->GetAttr("input_shapes", &shapes); + // it is ok to not to have shapes + if (!status.ok()) return Status::OK(); + if ((int)shapes.size() != context->num_inputs()) return Status::OK(); + bool different_input = false; + for (int i = 0; i < context->num_inputs(); ++i) { + if (shapes.at(i) != context->input_tensor(i)->shape()) + different_input = true; + } + if (different_input) return Status::OK(); + shapes.resize(0); + status = context->GetAttr("output_shapes", &shapes); + if (!status.ok()) return Status::OK(); + if ((int)shapes.size() != context->num_outputs()) return Status::OK(); + std::vector shape_handles(shapes.size()); + for (size_t i = 0; i < shapes.size(); ++i) { + status = + context->MakeShapeFromTensorShape(shapes.at(i), &shape_handles.at(i)); + if (!status.ok()) return Status::OK(); + } + for (int i = 0; i < context->num_outputs(); ++i) { + context->set_output(i, shape_handles.at(i)); } - return Status::OK(); } - } // namespace shape_inference } // namespace tensorflow diff --git a/tensorflow/contrib/tensorrt/test/test_tftrt.py b/tensorflow/contrib/tensorrt/test/test_tftrt.py index 175ccd8006..090aa8bdb0 100644 --- a/tensorflow/contrib/tensorrt/test/test_tftrt.py +++ b/tensorflow/contrib/tensorrt/test/test_tftrt.py @@ -20,6 +20,7 @@ from __future__ import print_function import argparse import numpy as np +import six as _six # normally we should do import tensorflow as tf and then # tf.placeholder, tf.constant, tf.nn.conv2d etc but @@ -35,10 +36,75 @@ from tensorflow.python.framework import dtypes as dtypes from tensorflow.python.framework import importer as importer from tensorflow.python.framework import ops as ops from tensorflow.python.ops import array_ops as aops +from tensorflow.python.ops import math_ops as mops from tensorflow.python.ops import nn as nn from tensorflow.python.ops import nn_ops as nn_ops +def py2bytes(inp): + return inp + + +def py3bytes(inp): + return inp.encode("utf-8", errors="surrogateescape") + + +def py2string(inp): + return inp + + +def py3string(inp): + return inp.decode("utf-8") + + +if _six.PY2: + to_bytes = py2bytes + to_string = py2string +else: + to_bytes = py3bytes + to_string = py3string + + +def get_multi_engine_graph_def(mode="FP32"): + """Create a simple graph and return its graph_def.""" + dtype = dtypes.float32 + if mode.upper() == "FP16": + dtype = dtypes.float16 + else: + pass + + g = ops.Graph() + with g.as_default(): + x = aops.placeholder(shape=[None, 3, 7, 5], name="input", dtype=dtype) + with g.name_scope("Global_scope"): + with g.name_scope("first_scope"): + e = cop.constant( + np.random.randn(3, 2, 3, 4), name="weights", dtype=dtype) + conv = nn.conv2d( + input=x, + filter=e, + data_format="NCHW", + strides=[1, 1, 1, 1], + padding="VALID", + name="conv") + b = cop.constant(np.random.randn(1, 4, 1, 1), name="bias1", dtype=dtype) + t = conv * b + + b = cop.constant(np.random.randn(1, 4, 1, 1), name="bias2", dtype=dtype) + q = conv / b + edge = mops.sin(q) + edge1 = mops.cos(conv) + with g.name_scope("test_scope"): + de = edge + edge1 + t -= edge1 + q *= edge + t += q + t -= de + k = aops.squeeze(t, name="output") + print(k.dtype) + return g.as_graph_def() + + def get_simple_graph_def(): """Create a simple graph and return its graph_def.""" g = ops.Graph() @@ -65,7 +131,9 @@ def get_simple_graph_def(): def execute_graph(gdef, dumm_inp): """Run given graphdef once.""" print("executing") - gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50) + gpu_options = None + if trt.trt_convert.get_linked_tensorrt_version()[0] == 3: + gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50) sessconfig = cpb2.ConfigProto(gpu_options=gpu_options) ops.reset_default_graph() g = ops.Graph() @@ -83,7 +151,9 @@ def execute_graph(gdef, dumm_inp): # for calibration. For this test script it is random data. def execute_calibration(gdef, dumm_inp): """Run given calibration graph multiple times.""" - gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50) + gpu_options = None + if trt.trt_convert.get_linked_tensorrt_version()[0] == 3: + gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50) ops.reset_default_graph() g = ops.Graph() with g.as_default(): @@ -100,12 +170,17 @@ def execute_calibration(gdef, dumm_inp): return val -def user(run_graph=execute_graph, run_calibration=execute_calibration): +def user(multi_engine, + run_graph=execute_graph, + run_calibration=execute_calibration): """Example function that converts a graph to TFTRT graph.""" - - inp_dims = (100, 24, 24, 2) + if multi_engine: + inp_dims = (2, 3, 7, 5) + orig_graph = get_multi_engine_graph_def() + else: + inp_dims = (100, 24, 24, 2) + orig_graph = get_simple_graph_def() # use a frozen graph for inference dummy_input = np.random.random_sample(inp_dims) - orig_graph = get_simple_graph_def() # use a frozen graph for inference # Get optimized graph trt_graph = trt.create_inference_graph( input_graph_def=orig_graph, @@ -113,8 +188,10 @@ def user(run_graph=execute_graph, run_calibration=execute_calibration): max_batch_size=inp_dims[0], max_workspace_size_bytes=1 << 25, precision_mode="FP32", # TRT Engine precision "FP32","FP16" or "INT8" - minimum_segment_size=2 # minimum number of nodes in an engine - ) + minimum_segment_size=2, # minimum number of nodes in an engine + is_dynamic_op=False, + maximum_cached_engines=1, + cached_engine_batches=[]) o1 = run_graph(orig_graph, dummy_input) o2 = run_graph(trt_graph, dummy_input) o3 = run_graph(trt_graph, dummy_input) @@ -126,40 +203,51 @@ def user(run_graph=execute_graph, run_calibration=execute_calibration): max_batch_size=inp_dims[0], max_workspace_size_bytes=1 << 25, precision_mode="FP16", # TRT Engine precision "FP32","FP16" or "INT8" - minimum_segment_size=2 # minimum number of nodes in an engine - ) + minimum_segment_size=2, # minimum number of nodes in an engine + is_dynamic_op=False, + maximum_cached_engines=1, + cached_engine_batches=[]) int8_calib_gdef = trt.create_inference_graph( input_graph_def=orig_graph, outputs=["output"], max_batch_size=inp_dims[0], max_workspace_size_bytes=1 << 25, precision_mode="INT8", # TRT Engine precision "FP32","FP16" or "INT8" - minimum_segment_size=2 # minimum number of nodes in an engine - ) + minimum_segment_size=2, # minimum number of nodes in an engine + is_dynamic_op=False, + maximum_cached_engines=1, + cached_engine_batches=[]) o4 = run_graph(fp16_graph, dummy_input) _ = run_calibration(int8_calib_gdef, dummy_input) int8_graph = trt.calib_graph_to_infer_graph(int8_calib_gdef) o5 = run_graph(int8_graph, dummy_input) - assert np.allclose(o1, o4) - assert np.allclose(o1, o5) + print("Is FP32 == FP16? %s (False is possible)" % np.allclose(o1, o4)) + print("Is FP32 == INT8? %s (False is possible)" % np.allclose(o1, o5)) print("Pass") -def auto(): +def auto(multi_engine): """Run the conversion as an optimization pass.""" - inp_dims = (100, 24, 24, 2) + if multi_engine: + inp_dims = (2, 3, 7, 5) + orig_graph = get_multi_engine_graph_def() + else: + inp_dims = (100, 24, 24, 2) + orig_graph = get_simple_graph_def() # use a frozen graph for inference dummy_input = np.random.random_sample(inp_dims) - orig_graph = get_simple_graph_def() opt_config = rwpb2.RewriterConfig() + opt_config.meta_optimizer_iterations = opt_config.ONE opt_config.optimizers.extend(["constfold", "layout"]) custom_op = opt_config.custom_optimizers.add() custom_op.name = "TensorRTOptimizer" custom_op.parameter_map["minimum_segment_size"].i = 3 - custom_op.parameter_map["precision_mode"].s = "FP32" + custom_op.parameter_map["precision_mode"].s = to_bytes("FP32") custom_op.parameter_map["max_batch_size"].i = inp_dims[0] custom_op.parameter_map["max_workspace_size_bytes"].i = 1 << 25 print(custom_op) - gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50) + gpu_options = None + if trt.trt_convert.get_linked_tensorrt_version()[0] == 3: + gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50) graph_options = cpb2.GraphOptions(rewrite_options=opt_config) sessconfig = cpb2.ConfigProto( gpu_options=gpu_options, graph_options=graph_options) @@ -168,7 +256,7 @@ def auto(): ops.reset_default_graph() with g.as_default(): inp, out = importer.import_graph_def( - graph_def=orig_graph, return_elements=["input", "output"]) + graph_def=orig_graph, return_elements=["input", "output"], name="") inp = inp.outputs[0] out = out.outputs[0] with csess.Session(config=sessconfig, graph=g) as sess: @@ -186,8 +274,14 @@ if "__main__" in __name__: action="store_true", help="Do TRT conversion automatically", default=False) + P.add_argument( + "--multi-engine", + "-m", + action="store_true", + help="Use a graph that will result in 2 engines", + default=False) flags, unparsed = P.parse_known_args() if flags.automatic: - auto() + auto(flags.multi_engine) else: - user() + user(flags.multi_engine) diff --git a/tensorflow/contrib/tensorrt/trt_conversion.i b/tensorflow/contrib/tensorrt/trt_conversion.i index 46480e99a1..d51a0b59e2 100644 --- a/tensorflow/contrib/tensorrt/trt_conversion.i +++ b/tensorflow/contrib/tensorrt/trt_conversion.i @@ -48,12 +48,53 @@ PyObject* pair_helper(std::pair* in) { } return tuple; } + +struct version_struct{ + int vmajor; + int vminor; + int vpatch; +}; + +PyObject* version_helper(version_struct* in) { + PyObject *tuple(nullptr); + tuple = Py_BuildValue("(iii)", in->vmajor, in->vminor, in->vpatch); + if (!tuple) { + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_TypeError, + "Tuple creation from version structure failed!"); + } + return NULL; + } + return tuple; +} +/* Define converters for vector */ +template<> +bool _PyObjAs(PyObject *pyobj, int* dest) { + *dest = PyLong_AsLong(pyobj); + return true; +} + +template<> +PyObject *_PyObjFrom(const int& src) { + return PyLong_FromLong(src); +} + %} + +_LIST_OUTPUT_TYPEMAP(int, PyLong_FromLong); + %typemap(out) std::pair { PyObject *tuple = pair_helper(&$1); if (!tuple) SWIG_fail; $result = tuple; } + +%typemap(out) version_struct { + PyObject *tuple = version_helper(&$1); + if (!tuple) SWIG_fail; + $result = tuple; +} + %{ #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" @@ -65,6 +106,8 @@ PyObject* pair_helper(std::pair* in) { %unignore tensorflow; %unignore trt_convert; %unignore calib_convert; +%unignore get_linked_tensorrt_version; +%unignore get_loaded_tensorrt_version; %{ @@ -74,7 +117,10 @@ std::pair trt_convert( size_t max_batch_size, size_t max_workspace_size_bytes, int precision_mode, - int minimum_segment_size + int minimum_segment_size, + bool is_dyn_op, + int max_cached_engines, + std::vector cached_engine_batches // Unfortunately we can't use TF_Status here since it // is in c/c_api and brings in a lot of other libraries // which in turn declare ops. These ops are included @@ -102,11 +148,12 @@ std::pair trt_convert( out_status = "InvalidArgument;Size of the output_names vector is 0"; return std::pair{out_status, ""}; } - tensorflow::GraphDef outGraph; + tensorflow::GraphDef out_graph; tensorflow::Status conversion_status = tensorflow::tensorrt::convert::ConvertGraphDefToTensorRT( graph_def, output_names, max_batch_size, max_workspace_size_bytes, - &outGraph, precision_mode, minimum_segment_size); + &out_graph, precision_mode, minimum_segment_size, + is_dyn_op, max_cached_engines, cached_engine_batches); if (!conversion_status.ok()) { auto retCode = (int)conversion_status.code(); char buff[2000]; @@ -116,7 +163,7 @@ std::pair trt_convert( return std::pair{out_status, ""}; } string result; - if (!outGraph.SerializeToString(&result)) { + if (!out_graph.SerializeToString(&result)) { out_status = "InvalidArgument;Couldn't serialize output as a GraphDef"; return std::pair{out_status, ""}; } @@ -128,7 +175,8 @@ std::pair trt_convert( #endif // GOOGLE_CUDA && GOOGLE_TENSORRT } -std::pair calib_convert(string graph_def_string // const tensorflow::GraphDef& +std::pair calib_convert( + string graph_def_string, bool is_dyn_op // unfortunately we can't use TF_Status here since it // is in c/c_api and brings in a lot of other libraries // which in turn declare ops. These ops are included @@ -147,11 +195,11 @@ std::pair calib_convert(string graph_def_string // const tenso out_status = "InvalidArgument;Couldn't interpret input as a GraphDef"; return std::pair{out_status, ""}; } - - tensorflow::GraphDef outGraph; + graph_def_string.resize(0); + tensorflow::GraphDef out_graph; tensorflow::Status conversion_status = - tensorflow::tensorrt::convert::ConvertCalibGraphToInferGraph(graph_def, - &outGraph); + tensorflow::tensorrt::convert::ConvertCalibGraphToInferGraph( + graph_def, &out_graph, is_dyn_op); if (!conversion_status.ok()) { auto retCode = (int)conversion_status.code(); char buff[2000]; @@ -161,7 +209,7 @@ std::pair calib_convert(string graph_def_string // const tenso return std::pair{out_status, ""}; } string result; - if (!outGraph.SerializeToString(&result)) { + if (!out_graph.SerializeToString(&result)) { out_status = "InvalidArgument;Couldn't serialize output as a GraphDef"; return std::pair{out_status, ""}; } @@ -172,15 +220,39 @@ std::pair calib_convert(string graph_def_string // const tenso return std::pair{"9;TensorRT is not enabled!", ""}; #endif // GOOGLE_CUDA && GOOGLE_TENSORRT } + +version_struct get_linked_tensorrt_version(){ + // Return the version at the link time. + const auto &lv = tensorflow::tensorrt::convert::GetLinkedTensorRTVersion(); + version_struct s; + s.vmajor = lv[0]; + s.vminor = lv[1]; + s.vpatch = lv[2]; + return s; +} +version_struct get_loaded_tensorrt_version(){ + // Return the version from the loaded library. + const auto &lv = tensorflow::tensorrt::convert::GetLoadedTensorRTVersion(); + version_struct s; + s.vmajor = lv[0]; + s.vminor = lv[1]; + s.vpatch = lv[2]; + return s; +} + %} -std::pair calib_convert(string graph_def_string); +std::pair calib_convert(string graph_def_string, bool is_dyn_op); std::pair trt_convert(string graph_def_string, std::vector output_names, size_t max_batch_size, size_t max_workspace_size_bytes, - int precision_mode, int minimum_segment_size); - + int precision_mode, int minimum_segment_size, + bool is_dyn_op, + int max_cached_engines, + std::vector cached_engine_batches); +version_struct get_linked_tensorrt_version(); +version_struct get_loaded_tensorrt_version(); %unignoreall diff --git a/tensorflow/contrib/tpu/profiler/BUILD b/tensorflow/contrib/tpu/profiler/BUILD index 3b2d7adfff..38d1c3049e 100644 --- a/tensorflow/contrib/tpu/profiler/BUILD +++ b/tensorflow/contrib/tpu/profiler/BUILD @@ -49,11 +49,11 @@ tf_cc_binary( ":tpu_profiler_analysis_proto_cc", ":tpu_profiler_proto_cc", ":version", + "//tensorflow:grpc++", "//tensorflow/core:framework_internal", "//tensorflow/core:lib", "//tensorflow/core/distributed_runtime/rpc:grpc_util", "//tensorflow/core/platform/cloud:gcs_file_system", - "@grpc//:grpc++", ], ) diff --git a/tensorflow/contrib/verbs/BUILD b/tensorflow/contrib/verbs/BUILD index 1b45584dcb..19cb8983b6 100644 --- a/tensorflow/contrib/verbs/BUILD +++ b/tensorflow/contrib/verbs/BUILD @@ -53,12 +53,12 @@ cc_library( ":grpc_verbs_service_impl", ":rdma_mgr", ":verbs_service_proto_cc", + "//tensorflow:grpc++", "//tensorflow/core:lib_internal", "//tensorflow/core/distributed_runtime:session_mgr", "//tensorflow/core/distributed_runtime/rpc:async_service_interface", "//tensorflow/core/distributed_runtime/rpc:grpc_call", "//tensorflow/core/distributed_runtime/rpc:grpc_util", - "@grpc//:grpc++", ], alwayslink = 1, ) @@ -69,7 +69,7 @@ cc_library( hdrs = ["grpc_verbs_service_impl.h"], deps = [ ":verbs_service_proto_cc", - "@grpc//:grpc++", + "//tensorflow:grpc++", ], ) diff --git a/tensorflow/core/api_def/BUILD b/tensorflow/core/api_def/BUILD index 19d6438809..06b797e32e 100644 --- a/tensorflow/core/api_def/BUILD +++ b/tensorflow/core/api_def/BUILD @@ -4,6 +4,7 @@ # The following targets can be used to access ApiDefs: # :base_api_def # :python_api_def +# :java_api_def package( default_visibility = ["//visibility:private"], @@ -29,6 +30,12 @@ filegroup( visibility = ["//tensorflow:internal"], ) +filegroup( + name = "java_api_def", + srcs = glob(["java_api/*"]), + visibility = ["//tensorflow:internal"], +) + cc_library( name = "excluded_ops_lib", srcs = ["excluded_ops.cc"], diff --git a/tensorflow/core/api_def/base_api/api_def_SampleDistortedBoundingBox.pbtxt b/tensorflow/core/api_def/base_api/api_def_SampleDistortedBoundingBox.pbtxt index 6f1121dd37..5ab5917bd3 100644 --- a/tensorflow/core/api_def/base_api/api_def_SampleDistortedBoundingBox.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_SampleDistortedBoundingBox.pbtxt @@ -68,7 +68,7 @@ END name: "area_range" description: <