diff options
Diffstat (limited to 'tensorflow/contrib')
98 files changed, 478 insertions, 2571 deletions
diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index 986b61b3ea..bab37e8906 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -8,7 +8,6 @@ package(default_visibility = ["//tensorflow:__subpackages__"]) load("//third_party/mpi:mpi.bzl", "if_mpi") load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") load("@local_config_tensorrt//:build_defs.bzl", "if_tensorrt") -load("//tensorflow:tensorflow.bzl", "if_not_windows") py_library( name = "contrib_py", @@ -52,6 +51,7 @@ py_library( "//tensorflow/contrib/image:single_image_random_dot_stereograms_py", "//tensorflow/contrib/input_pipeline:input_pipeline_py", "//tensorflow/contrib/integrate:integrate_py", + "//tensorflow/contrib/kafka", "//tensorflow/contrib/keras", "//tensorflow/contrib/kernel_methods", "//tensorflow/contrib/kfac", @@ -63,6 +63,7 @@ py_library( "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/contrib/linear_optimizer:sdca_estimator_py", "//tensorflow/contrib/linear_optimizer:sdca_ops_py", + "//tensorflow/contrib/lite/python:lite", "//tensorflow/contrib/lookup:lookup_py", "//tensorflow/contrib/losses:losses_py", "//tensorflow/contrib/losses:metric_learning_py", @@ -109,10 +110,6 @@ py_library( "//tensorflow/python:util", ] + if_mpi(["//tensorflow/contrib/mpi_collectives:mpi_collectives_py"]) + if_tensorrt([ "//tensorflow/contrib/tensorrt:init_py", - ]) + if_not_windows([ - "//tensorflow/contrib/ffmpeg:ffmpeg_ops_py", # unix dependency, need to fix code - "//tensorflow/contrib/lite/python:lite", # unix dependency, need to fix code - "//tensorflow/contrib/kafka", # has some linking issue on opensssl. ]), ) @@ -124,7 +121,6 @@ cc_library( "//tensorflow/contrib/coder:all_kernels", "//tensorflow/contrib/cudnn_rnn:cudnn_rnn_kernels", "//tensorflow/contrib/data/kernels:dataset_kernels", - "//tensorflow/contrib/kafka:dataset_kernels", "//tensorflow/contrib/factorization/kernels:all_kernels", "//tensorflow/contrib/input_pipeline:input_pipeline_ops_kernels", "//tensorflow/contrib/layers:sparse_feature_cross_op_kernel", @@ -151,7 +147,7 @@ cc_library( "//tensorflow/contrib/factorization:all_ops", "//tensorflow/contrib/framework:all_ops", "//tensorflow/contrib/input_pipeline:input_pipeline_ops_op_lib", - "//tensorflow/contrib/kafka:dataset_ops_op_lib", + "//tensorflow/contrib/kafka:kafka_ops_op_lib", "//tensorflow/contrib/layers:sparse_feature_cross_op_op_lib", "//tensorflow/contrib/nccl:nccl_ops_op_lib", "//tensorflow/contrib/nearest_neighbor:nearest_neighbor_ops_op_lib", diff --git a/tensorflow/contrib/__init__.py b/tensorflow/contrib/__init__.py index 669d611b01..4f6f539027 100644 --- a/tensorflow/contrib/__init__.py +++ b/tensorflow/contrib/__init__.py @@ -18,8 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import os - # Add projects here, they will show up under tf.contrib. from tensorflow.contrib import batching from tensorflow.contrib import bayesflow @@ -85,8 +83,7 @@ from tensorflow.contrib import tpu from tensorflow.contrib import training from tensorflow.contrib import util from tensorflow.contrib.eager.python import tfe as eager -if os.name != 'nt': - from tensorflow.contrib.lite.python import lite +from tensorflow.contrib.lite.python import lite from tensorflow.contrib.receptive_field import receptive_field_api as receptive_field from tensorflow.contrib.remote_fused_graph import pylib as remote_fused_graph from tensorflow.contrib.specs import python as specs @@ -95,7 +92,6 @@ from tensorflow.contrib.summary import summary from tensorflow.python.util.lazy_loader import LazyLoader ffmpeg = LazyLoader("ffmpeg", globals(), "tensorflow.contrib.ffmpeg") -del os del LazyLoader del absolute_import diff --git a/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h b/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h index 7815fa049a..da5e744851 100644 --- a/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h +++ b/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h @@ -48,9 +48,9 @@ class BatchFeatures { Status GetFeatureColumnSizes(int64* const num_dense_float_features, int64* const num_sparse_float_features, int64* const num_sparse_int_features) const { - QCHECK_NE(num_dense_float_features, (int64*) nullptr); - QCHECK_NE(num_sparse_float_features, (int64*) nullptr); - QCHECK_NE(num_sparse_int_features, (int64*) nullptr); + QCHECK_NE(num_dense_float_features, nullptr); + QCHECK_NE(num_sparse_float_features, nullptr); + QCHECK_NE(num_sparse_int_features, nullptr); *num_dense_float_features = dense_float_feature_columns_.size(); *num_sparse_float_features = sparse_float_feature_columns_.size(); *num_sparse_int_features = sparse_int_feature_columns_.size(); diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md index fe83bb3204..8f85a75ee4 100644 --- a/tensorflow/contrib/cmake/README.md +++ b/tensorflow/contrib/cmake/README.md @@ -26,7 +26,7 @@ The CMake files in this directory can build the core TensorFlow runtime, an example C++ binary, and a PIP package containing the runtime and Python bindings. -### Prerequisites +### Pre-requisites * CMake version 3.5 or later. @@ -34,16 +34,14 @@ bindings. * [SWIG](http://www.swig.org/download.html) -* Additional prerequisites for Microsoft Windows: +* Additional pre-requisites for Microsoft Windows: - Visual Studio 2015 - Python 3.5 + - NumPy 1.11.0 or later -* Additional prerequisites for Linux: +* Additional pre-requisites for Linux: - Python 2.7 or later - [Docker](https://www.docker.com/) (for automated testing) - -* Python dependencies: - - wheel - NumPy 1.11.0 or later ### Known-good configurations @@ -104,7 +102,7 @@ ops or APIs. Step-by-step Windows build ========================== -1. Install the prerequisites detailed above, and set up your environment. +1. Install the pre-requisites detailed above, and set up your environment. * The following commands assume that you are using the Windows Command Prompt (`cmd.exe`). You will need to set up your environment to use the diff --git a/tensorflow/contrib/cmake/external/grpc.cmake b/tensorflow/contrib/cmake/external/grpc.cmake index 17f65999fa..a9f43a3ecb 100644 --- a/tensorflow/contrib/cmake/external/grpc.cmake +++ b/tensorflow/contrib/cmake/external/grpc.cmake @@ -35,7 +35,6 @@ else() set(grpc_STATIC_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgrpc++_unsecure.a ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgrpc_unsecure.a - ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/third_party/cares/cares/lib/libcares.a ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgpr.a) endif() diff --git a/tensorflow/contrib/cmake/external/protobuf.cmake b/tensorflow/contrib/cmake/external/protobuf.cmake index ab464bc99a..aba8a5244e 100644 --- a/tensorflow/contrib/cmake/external/protobuf.cmake +++ b/tensorflow/contrib/cmake/external/protobuf.cmake @@ -16,7 +16,7 @@ include (ExternalProject) set(PROTOBUF_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/src) set(PROTOBUF_URL https://github.com/google/protobuf.git) -set(PROTOBUF_TAG b04e5cba356212e4e8c66c61bbe0c3a20537c5b9) +set(PROTOBUF_TAG 396336eb961b75f03b25824fe86cf6490fb75e3a) if(WIN32) if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*") diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake index b3e5b30826..9f96a4b797 100644 --- a/tensorflow/contrib/cmake/tf_tests.cmake +++ b/tensorflow/contrib/cmake/tf_tests.cmake @@ -476,10 +476,6 @@ if (tensorflow_BUILD_CC_TESTS) "${tensorflow_source_dir}/tensorflow/core/profiler/internal/advisor/*_test.cc" ) - list(REMOVE_ITEM tf_test_src_simple - ${tf_core_profiler_test_srcs} - ) - set(tf_test_lib tf_test_lib) add_library(${tf_test_lib} STATIC ${tf_src_testlib}) diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py index 9212b69700..f09d156832 100644 --- a/tensorflow/contrib/data/__init__.py +++ b/tensorflow/contrib/data/__init__.py @@ -40,7 +40,6 @@ See the @{$datasets$Importing Data} Programmer's Guide for an overview. @@rejection_resample @@scan @@shuffle_and_repeat -@@sliding_window_batch @@sloppy_interleave @@unbatch @@ -73,9 +72,6 @@ from tensorflow.contrib.data.python.ops.readers import SqlDataset from tensorflow.contrib.data.python.ops.resampling import rejection_resample from tensorflow.contrib.data.python.ops.scan_ops import scan from tensorflow.contrib.data.python.ops.shuffle_ops import shuffle_and_repeat -from tensorflow.contrib.data.python.ops.sliding import sliding_window_batch -from tensorflow.python.data.ops.iterator_ops import Iterator -from tensorflow.python.ops.parsing_ops import parse_single_example_v2 as parse_single_example # pylint: enable=unused-import from tensorflow.python.util.all_util import remove_undocumented diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 2c4d4adfda..22418b38e3 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -498,23 +498,6 @@ py_test( ], ) -tf_py_test( - name = "slide_dataset_op_test", - size = "small", - srcs = ["slide_dataset_op_test.py"], - additional_deps = [ - "//tensorflow/contrib/data/python/ops:dataset_ops", - "//tensorflow/contrib/data/python/ops:transformation_ops", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:dtypes", - "//tensorflow/python:errors", - "//tensorflow/python:math_ops", - "//tensorflow/python:sparse_tensor", - "//third_party/py/numpy", - ], -) - filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py deleted file mode 100644 index 33c48e20be..0000000000 --- a/tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py +++ /dev/null @@ -1,242 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for the experimental input pipeline ops.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -from tensorflow.contrib.data.python.ops import sliding -from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import errors -from tensorflow.python.framework import sparse_tensor -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.platform import test - - -class SlideDatasetTest(test.TestCase): - - def testSlideDataset(self): - """Test an dataset that maps a TF function across its input elements.""" - components = (np.arange(7), - np.array([[1, 2, 3]]) * np.arange(7)[:, np.newaxis], - np.array(37.0) * np.arange(7)) - - count = array_ops.placeholder(dtypes.int64, shape=[]) - window_size = array_ops.placeholder(dtypes.int64, shape=[]) - stride = array_ops.placeholder(dtypes.int64, shape=[]) - - def _map_fn(x, y, z): - return math_ops.square(x), math_ops.square(y), math_ops.square(z) - - # The pipeline is TensorSliceDataset -> MapDataset(square_3) -> - # RepeatDataset(count) -> _SlideDataset(window_size, stride). - iterator = (dataset_ops.Dataset.from_tensor_slices(components) - .map(_map_fn) - .repeat(count) - .apply(sliding.sliding_window_batch(window_size, stride)) - .make_initializable_iterator()) - init_op = iterator.initializer - get_next = iterator.get_next() - - self.assertEqual([[None] + list(c.shape[1:]) for c in components], - [t.shape.as_list() for t in get_next]) - - with self.test_session() as sess: - # Slide over a finite input, where the window_size divides the - # total number of elements. - sess.run(init_op, feed_dict={count: 20, window_size: 14, stride: 7}) - # Same formula with convolution layer. - num_batches = (20 * 7 - 14) // 7 + 1 - for i in range(num_batches): - result = sess.run(get_next) - for component, result_component in zip(components, result): - for j in range(14): - self.assertAllEqual(component[(i*7 + j) % 7]**2, - result_component[j]) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - # Slide over a finite input, where the window_size does not - # divide the total number of elements. - sess.run(init_op, feed_dict={count: 20, window_size: 17, stride: 9}) - - num_batches = (20 * 7 - 17) // 9 + 1 - for i in range(num_batches): - result = sess.run(get_next) - for component, result_component in zip(components, result): - for j in range(17): - self.assertAllEqual(component[(i*9 + j) % 7]**2, - result_component[j]) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - # Slide over a finite input, which is less than window_size, - # should fail straight away. - sess.run(init_op, feed_dict={count: 1, window_size: 10, stride: 4}) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - sess.run(init_op, feed_dict={count: 1, window_size: 10, stride: 8}) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - # Slide over an empty input should fail straight away. - sess.run(init_op, feed_dict={count: 0, window_size: 8, stride: 4}) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - # Empty window_size should be an initialization time error. - with self.assertRaises(errors.InvalidArgumentError): - sess.run(init_op, feed_dict={count: 14, window_size: 0, stride: 0}) - - # Invalid stride should be an initialization time error. - with self.assertRaises(errors.InvalidArgumentError): - sess.run(init_op, feed_dict={count: 14, window_size: 3, stride: 0}) - with self.assertRaises(errors.InvalidArgumentError): - sess.run(init_op, feed_dict={count: 14, window_size: 3, stride: 3}) - with self.assertRaises(errors.InvalidArgumentError): - sess.run(init_op, feed_dict={count: 14, window_size: 3, stride: 5}) - - def assertSparseValuesEqual(self, a, b): - self.assertAllEqual(a.indices, b.indices) - self.assertAllEqual(a.values, b.values) - self.assertAllEqual(a.dense_shape, b.dense_shape) - - def testSlideSparse(self): - - def _sparse(i): - return sparse_tensor.SparseTensorValue( - indices=[[0]], values=(i * [1]), dense_shape=[1]) - - iterator = dataset_ops.Dataset.range(10).map(_sparse).apply( - sliding.sliding_window_batch(5, 3)).make_initializable_iterator() - init_op = iterator.initializer - get_next = iterator.get_next() - - with self.test_session() as sess: - sess.run(init_op) - num_batches = (10 - 5) // 3 + 1 - for i in range(num_batches): - actual = sess.run(get_next) - expected = sparse_tensor.SparseTensorValue( - indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]], - values=[i * 3, i * 3 + 1, i * 3 + 2, i * 3 + 3, i * 3 + 4], - dense_shape=[5, 1]) - self.assertTrue(sparse_tensor.is_sparse(actual)) - self.assertSparseValuesEqual(actual, expected) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - def testSlideSparseWithDifferentDenseShapes(self): - - def _sparse(i): - return sparse_tensor.SparseTensorValue( - indices=array_ops.expand_dims( - math_ops.range(i, dtype=dtypes.int64), 1), - values=array_ops.fill([math_ops.to_int32(i)], i), - dense_shape=[i]) - - iterator = dataset_ops.Dataset.range(10).map(_sparse).apply( - sliding.sliding_window_batch(5, 3)).make_initializable_iterator() - init_op = iterator.initializer - get_next = iterator.get_next() - - with self.test_session() as sess: - sess.run(init_op) - num_batches = (10 - 5) // 3 + 1 - for i in range(num_batches): - actual = sess.run(get_next) - expected_indices = [] - expected_values = [] - for j in range(5): - for k in range(i * 3 + j): - expected_indices.append([j, k]) - expected_values.append(i * 3 + j) - expected = sparse_tensor.SparseTensorValue( - indices=expected_indices, - values=expected_values, - dense_shape=[5, i * 3 + 5 - 1]) - self.assertTrue(sparse_tensor.is_sparse(actual)) - self.assertSparseValuesEqual(actual, expected) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - def testNestedSlideSparse(self): - - def _sparse(i): - return sparse_tensor.SparseTensorValue( - indices=[[0]], values=(i * [1]), dense_shape=[1]) - - iterator = (dataset_ops.Dataset.range(10) - .map(_sparse) - .apply(sliding.sliding_window_batch(4, 2)) - .apply(sliding.sliding_window_batch(3, 1)) - .make_initializable_iterator()) - init_op = iterator.initializer - get_next = iterator.get_next() - - with self.test_session() as sess: - sess.run(init_op) - # Slide: 1st batch. - actual = sess.run(get_next) - expected = sparse_tensor.SparseTensorValue( - indices=[[0, 0, 0], [0, 1, 0], [0, 2, 0], [0, 3, 0], - [1, 0, 0], [1, 1, 0], [1, 2, 0], [1, 3, 0], - [2, 0, 0], [2, 1, 0], [2, 2, 0], [2, 3, 0]], - values=[0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7], - dense_shape=[3, 4, 1]) - self.assertTrue(sparse_tensor.is_sparse(actual)) - self.assertSparseValuesEqual(actual, expected) - # Slide: 2nd batch. - actual = sess.run(get_next) - expected = sparse_tensor.SparseTensorValue( - indices=[[0, 0, 0], [0, 1, 0], [0, 2, 0], [0, 3, 0], - [1, 0, 0], [1, 1, 0], [1, 2, 0], [1, 3, 0], - [2, 0, 0], [2, 1, 0], [2, 2, 0], [2, 3, 0]], - values=[2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8, 9], - dense_shape=[3, 4, 1]) - self.assertTrue(sparse_tensor.is_sparse(actual)) - self.assertSparseValuesEqual(actual, expected) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - def testSlideShapeError(self): - - def generator(): - yield [1.0, 2.0, 3.0] - yield [4.0, 5.0, 6.0] - yield [7.0, 8.0, 9.0, 10.0] - - iterator = (dataset_ops.Dataset.from_generator(generator, dtypes.float32, - output_shapes=[None]) - .apply(sliding.sliding_window_batch(3, 1)) - .make_initializable_iterator()) - next_element = iterator.get_next() - - with self.test_session() as sess: - sess.run(iterator.initializer) - with self.assertRaisesRegexp( - errors.InvalidArgumentError, - r"Cannot batch tensors with different shapes in component 0. " - r"First element had shape \[3\] and element 2 had shape \[4\]."): - sess.run(next_element) - - -if __name__ == "__main__": - test.main() diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD index c3331e9636..f03430c5c5 100644 --- a/tensorflow/contrib/data/python/ops/BUILD +++ b/tensorflow/contrib/data/python/ops/BUILD @@ -106,7 +106,6 @@ py_library( "interleave_ops.py", "resampling.py", "scan_ops.py", - "sliding.py", "stats_ops.py", "threadpool.py", "unique.py", diff --git a/tensorflow/contrib/data/python/ops/sliding.py b/tensorflow/contrib/data/python/ops/sliding.py deleted file mode 100644 index 19cc3cb89f..0000000000 --- a/tensorflow/contrib/data/python/ops/sliding.py +++ /dev/null @@ -1,102 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Sliding dataset transformations.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.data.util import nest -from tensorflow.python.data.util import sparse -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_shape -from tensorflow.python.ops import gen_dataset_ops - - -class _SlideDataset(dataset_ops.Dataset): - """A `Dataset` that passes a sliding window over its input.""" - - def __init__(self, input_dataset, window_size, stride=1): - """See `sliding_window_batch` for details.""" - super(_SlideDataset, self).__init__() - self._input_dataset = input_dataset - self._window_size = ops.convert_to_tensor( - window_size, dtype=dtypes.int64, name="window_size") - self._stride = ops.convert_to_tensor( - stride, dtype=dtypes.int64, name="stride") - - def _as_variant_tensor(self): - return gen_dataset_ops.slide_dataset( - self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access - window_size=self._window_size, - stride=self._stride, - output_shapes=nest.flatten( - sparse.as_dense_shapes(self.output_shapes, self.output_classes)), - output_types=nest.flatten( - sparse.as_dense_types(self.output_types, self.output_classes))) - - @property - def output_classes(self): - return self._input_dataset.output_classes - - @property - def output_shapes(self): - input_shapes = self._input_dataset.output_shapes - return nest.pack_sequence_as(input_shapes, [ - tensor_shape.vector(None).concatenate(s) - for s in nest.flatten(self._input_dataset.output_shapes) - ]) - - @property - def output_types(self): - return self._input_dataset.output_types - - -def sliding_window_batch(window_size, stride=1): - """A sliding window with size of `window_size` and step of `stride`. - - This transformation passes a sliding window over this dataset. The - window size is `window_size` and step size is `stride`. If the left - elements cannot fill up the sliding window, this transformation will - drop the final smaller element. For example: - - ```python - # NOTE: The following examples use `{ ... }` to represent the - # contents of a dataset. - a = { [1], [2], [3], [4], [5], [6] } - - a.apply(tf.contrib.data.sliding_window_batch(window_size=3, stride=2)) == - { - [[1], [2], [3]], - [[3], [4], [5]], - } - ``` - - Args: - window_size: A `tf.int64` scalar `tf.Tensor`, representing the number of - elements in the sliding window. - stride: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the - steps moving the sliding window forward for one iteration. The default - is `1`. It must be in `[1, window_size)`. - - Returns: - A `Dataset` transformation function, which can be passed to - @{tf.data.Dataset.apply}. - """ - def _apply_fn(dataset): - return _SlideDataset(dataset, window_size, stride) - - return _apply_fn diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index 1bd73ee704..6bd3f5f09b 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -454,7 +454,6 @@ cuda_py_test( "//tensorflow/python:framework_test_lib", "//tensorflow/python:platform_test", ], - tags = ["no_windows"], # TODO: needs investigation on Windows ) cuda_py_test( @@ -1144,7 +1143,6 @@ cuda_py_test( "//tensorflow/python:math_ops", "//tensorflow/python:platform_test", ], - tags = ["no_windows"], # TODO: needs investigation on Windows ) cuda_py_test( diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD index eb810e06dd..32aa2c0a4a 100644 --- a/tensorflow/contrib/eager/python/BUILD +++ b/tensorflow/contrib/eager/python/BUILD @@ -267,10 +267,7 @@ cuda_py_test( "//tensorflow/python/eager:test", "//tensorflow/python/keras", ], - tags = [ - "no_windows", # TODO: needs investigation on Windows - "notsan", - ], + tags = ["notsan"], ) filegroup( diff --git a/tensorflow/contrib/eager/python/examples/linear_regression/BUILD b/tensorflow/contrib/eager/python/examples/linear_regression/BUILD index 2f6cfdf31e..f86331af6f 100644 --- a/tensorflow/contrib/eager/python/examples/linear_regression/BUILD +++ b/tensorflow/contrib/eager/python/examples/linear_regression/BUILD @@ -22,7 +22,6 @@ cuda_py_test( ":linear_regression", "//tensorflow:tensorflow_py", ], - tags = ["no_windows"], # TODO: needs investigation on Windows ) cuda_py_test( diff --git a/tensorflow/contrib/factorization/BUILD b/tensorflow/contrib/factorization/BUILD index ad8568ad44..90f10f1fa8 100644 --- a/tensorflow/contrib/factorization/BUILD +++ b/tensorflow/contrib/factorization/BUILD @@ -224,10 +224,7 @@ py_test( srcs = ["python/ops/kmeans_test.py"], shard_count = 4, srcs_version = "PY2AND3", - tags = [ - "nomac", # b/73741358 - "notsan", # b/67512932 - ], + tags = ["notsan"], # b/67512932 deps = [ ":factorization_py", ":factorization_py_CYCLIC_DEPENDENCIES_THAT_NEED_TO_GO", diff --git a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc index 35341406a0..e61221a6b0 100644 --- a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc +++ b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc @@ -256,9 +256,6 @@ Status ReadInfoFile(const string& filename, uint32* width, uint32* height, if (p != std::string::npos) { string rgb24 = line.substr(p + 9, line.find(" ", p + 9)); rgb24 = rgb24.substr(0, rgb24.find(",")); - // Strip anything after " ", in case the format is - // `640x360 [SAR 1:1 DAR 16:9]` - rgb24 = rgb24.substr(0, rgb24.find(" ")); string rgb24_width = rgb24.substr(0, rgb24.find("x")); string rgb24_height = rgb24.substr(rgb24_width.length() + 1); if (strings::safe_strtou32(rgb24_width, &width_value) && @@ -273,10 +270,8 @@ Status ReadInfoFile(const string& filename, uint32* width, uint32* height, // We only look for the first stream mapping to have the number of the // frames. // Once processed we will not further process stream mapping section. - if (line.find("frame=") == 0) { - // The format might be `frame= 166 ` or `frame=12488 ` - string number = line.substr(6); - number = number.substr(number.find_first_not_of(" ")); + if (line.find("frame= ") == 0) { + string number = line.substr(8, line.find(" ", 8)); number = number.substr(0, number.find(" ")); if (strings::safe_strtou32(number, &frames_value)) { in_mapping = false; diff --git a/tensorflow/contrib/gan/BUILD b/tensorflow/contrib/gan/BUILD index ff6f3b7441..0eb0e3cbe2 100644 --- a/tensorflow/contrib/gan/BUILD +++ b/tensorflow/contrib/gan/BUILD @@ -354,7 +354,6 @@ py_test( name = "classifier_metrics_test", srcs = ["python/eval/python/classifier_metrics_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":classifier_metrics", "//tensorflow/core:protos_all_py", diff --git a/tensorflow/contrib/kafka/BUILD b/tensorflow/contrib/kafka/BUILD index 1c3974871c..efb403462a 100644 --- a/tensorflow/contrib/kafka/BUILD +++ b/tensorflow/contrib/kafka/BUILD @@ -1,93 +1,66 @@ -package(default_visibility = ["//tensorflow:internal"]) +package( + default_visibility = ["//visibility:private"], +) licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) -load( - "//tensorflow:tensorflow.bzl", - "tf_gen_op_wrapper_py", - "tf_kernel_library", - "tf_custom_op_library", - "tf_custom_op_py_library", - "tf_gen_op_libs", - "tf_py_test", -) - -py_library( - name = "kafka", - srcs = ["__init__.py"], - srcs_version = "PY2AND3", - deps = [ - ":dataset_ops", - ], -) - -tf_custom_op_library( - name = "_dataset_ops.so", - srcs = ["ops/dataset_ops.cc"], - deps = [":dataset_kernels"], -) - -tf_gen_op_libs( - op_lib_names = ["dataset_ops"], -) +load("//tensorflow:tensorflow.bzl", "tf_gen_op_libs") +load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_py") +load("//tensorflow:tensorflow.bzl", "tf_kernel_library") +load("//tensorflow:tensorflow.bzl", "tf_py_test") -cc_library( - name = "dataset_kernels", +tf_kernel_library( + name = "kafka_kernels", srcs = ["kernels/kafka_dataset_ops.cc"], + visibility = ["//visibility:public"], deps = [ - "//tensorflow/core:framework_headers_lib", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core/kernels:bounds_check_lib", + "//tensorflow/core/kernels:dataset", "//third_party/eigen3", "@kafka", - "@protobuf_archive//:protobuf_headers", ], - alwayslink = 1, ) -py_library( - name = "dataset_ops", - srcs = [ - "python/ops/kafka_dataset_ops.py", - ], - srcs_version = "PY2AND3", +tf_gen_op_libs( + op_lib_names = ["kafka_ops"], deps = [ - ":kafka_op_loader", - "//tensorflow/python:dataset_ops_gen", - "//tensorflow/python:util", - "//tensorflow/python/data/ops:dataset_ops", - "//tensorflow/python/data/util:nest", + "//tensorflow/core:lib", ], ) tf_gen_op_wrapper_py( - name = "gen_dataset_ops", - out = "python/ops/gen_dataset_ops.py", - deps = ["//tensorflow/contrib/kafka:dataset_ops_op_lib"], -) - -tf_kernel_library( - name = "dataset_ops_kernels", - deps = [ - ":dataset_kernels", - "//tensorflow/core:framework", - ], - alwayslink = 1, + name = "gen_kafka_ops", + out = "python/ops/gen_kafka_ops.py", + require_shape_functions = True, + deps = [":kafka_ops_op_lib"], ) -tf_custom_op_py_library( - name = "kafka_op_loader", - srcs = ["python/ops/kafka_op_loader.py"], - dso = ["//tensorflow/contrib/kafka:_dataset_ops.so"], - kernels = [ - ":dataset_ops_kernels", - "//tensorflow/contrib/kafka:dataset_ops_op_lib", +py_library( + name = "kafka", + srcs = [ + "__init__.py", + "python/ops/kafka_dataset_ops.py", ], srcs_version = "PY2AND3", + visibility = ["//visibility:public"], deps = [ - ":gen_dataset_ops", + ":gen_kafka_ops", "//tensorflow/contrib/util:util_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:control_flow_ops", + "//tensorflow/python:framework", + "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:platform", + "//tensorflow/python:state_ops", + "//tensorflow/python:training", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/ops:iterator_ops", + "//tensorflow/python/data/ops:readers", ], ) @@ -115,7 +88,6 @@ tf_py_test( ], tags = [ "manual", - "no_windows", "notap", ], ) @@ -123,9 +95,7 @@ tf_py_test( filegroup( name = "all_files", srcs = glob( - include = [ - "**/*", - ], + ["**/*"], exclude = [ "**/METADATA", "**/OWNERS", diff --git a/tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc b/tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc index a4cd4a2cc4..88ef5f3571 100644 --- a/tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc +++ b/tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/framework/dataset.h" +#include "tensorflow/core/kernels/dataset.h" + +#include "tensorflow/core/framework/tensor.h" #include "src-cpp/rdkafkacpp.h" diff --git a/tensorflow/contrib/kafka/ops/dataset_ops.cc b/tensorflow/contrib/kafka/ops/dataset_ops.cc deleted file mode 100644 index 8cdf16103b..0000000000 --- a/tensorflow/contrib/kafka/ops/dataset_ops.cc +++ /dev/null @@ -1,44 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/core/framework/common_shape_fns.h" -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/shape_inference.h" - -namespace tensorflow { - -REGISTER_OP("KafkaDataset") - .Input("topics: string") - .Input("servers: string") - .Input("group: string") - .Input("eof: bool") - .Input("timeout: int64") - .Output("handle: variant") - .SetIsStateful() - .SetShapeFn(shape_inference::ScalarShape) - .Doc(R"doc( -Creates a dataset that emits the messages of one or more Kafka topics. - -topics: A `tf.string` tensor containing one or more subscriptions, - in the format of [topic:partition:offset:length], - by default length is -1 for unlimited. -servers: A list of bootstrap servers. -group: The consumer group id. -eof: If True, the kafka reader will stop on EOF. -timeout: The timeout value for the Kafka Consumer to wait - (in millisecond). -)doc"); - -} // namespace tensorflow diff --git a/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py b/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py index a1624614d1..8e51d27a34 100644 --- a/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py +++ b/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py @@ -17,9 +17,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.kafka.python.ops import kafka_op_loader # pylint: disable=unused-import -from tensorflow.contrib.kafka.python.ops import gen_dataset_ops -from tensorflow.python.data.ops.dataset_ops import Dataset +from tensorflow.contrib.kafka.python.ops import gen_kafka_ops +from tensorflow.python.data.ops.readers import Dataset from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape @@ -59,8 +58,8 @@ class KafkaDataset(Dataset): timeout, dtype=dtypes.int64, name="timeout") def _as_variant_tensor(self): - return gen_dataset_ops.kafka_dataset(self._topics, self._servers, - self._group, self._eof, self._timeout) + return gen_kafka_ops.kafka_dataset(self._topics, self._servers, self._group, + self._eof, self._timeout) @property def output_classes(self): diff --git a/tensorflow/contrib/kafka/python/ops/kafka_op_loader.py b/tensorflow/contrib/kafka/python/ops/kafka_op_loader.py deleted file mode 100644 index ec2fdea962..0000000000 --- a/tensorflow/contrib/kafka/python/ops/kafka_op_loader.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Python helper for loading kafka ops and kernels.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.contrib.util import loader -from tensorflow.python.platform import resource_loader - -_dataset_ops = loader.load_op_library( - resource_loader.get_path_to_datafile("../../_dataset_ops.so")) diff --git a/tensorflow/contrib/kfac/python/kernel_tests/BUILD b/tensorflow/contrib/kfac/python/kernel_tests/BUILD index d1c449402a..146ae8b7e2 100644 --- a/tensorflow/contrib/kfac/python/kernel_tests/BUILD +++ b/tensorflow/contrib/kfac/python/kernel_tests/BUILD @@ -114,7 +114,6 @@ py_test( name = "utils_test", srcs = ["utils_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ "//tensorflow/contrib/kfac/python/ops:utils", "//tensorflow/contrib/tpu", diff --git a/tensorflow/contrib/labeled_tensor/BUILD b/tensorflow/contrib/labeled_tensor/BUILD index 544065dac6..894e6f6946 100644 --- a/tensorflow/contrib/labeled_tensor/BUILD +++ b/tensorflow/contrib/labeled_tensor/BUILD @@ -70,7 +70,6 @@ py_test( "python/ops/core_test.py", ], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":_typecheck", ":core", diff --git a/tensorflow/contrib/layers/BUILD b/tensorflow/contrib/layers/BUILD index cc7bbabf21..852d06e1e3 100644 --- a/tensorflow/contrib/layers/BUILD +++ b/tensorflow/contrib/layers/BUILD @@ -188,7 +188,6 @@ py_test( size = "small", srcs = ["python/layers/normalization_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":layers_py", "//tensorflow/contrib/framework:framework_py", @@ -354,7 +353,6 @@ py_test( size = "small", srcs = ["python/ops/sparse_ops_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":layers_py", "//tensorflow/python:array_ops", diff --git a/tensorflow/contrib/layers/python/layers/embedding_ops.py b/tensorflow/contrib/layers/python/layers/embedding_ops.py index ffa208540d..b62e3050cd 100644 --- a/tensorflow/contrib/layers/python/layers/embedding_ops.py +++ b/tensorflow/contrib/layers/python/layers/embedding_ops.py @@ -470,7 +470,7 @@ def embedding_lookup_unique(params, ids, name=None): ids = ops.convert_to_tensor(ids) shape = array_ops.shape(ids) ids_flat = array_ops.reshape( - ids, math_ops.reduce_prod(shape, keepdims=True)) + ids, math_ops.reduce_prod(shape, keep_dims=True)) unique_ids, idx = array_ops.unique(ids_flat) unique_embeddings = embedding_ops.embedding_lookup(params, unique_ids) embeds_flat = array_ops.gather(unique_embeddings, idx) diff --git a/tensorflow/contrib/learn/BUILD b/tensorflow/contrib/learn/BUILD index b05f5eeaee..f837ca3265 100644 --- a/tensorflow/contrib/learn/BUILD +++ b/tensorflow/contrib/learn/BUILD @@ -5,8 +5,6 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) -load("//tensorflow:tensorflow.bzl", "py_test") - package(default_visibility = [ "//engedu/ml/tf_from_scratch:__pkg__", "//tensorflow:internal", @@ -117,7 +115,6 @@ py_test( size = "small", srcs = ["python/learn/learn_io/data_feeder_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":learn", "//tensorflow/python:client_testlib", @@ -173,7 +170,6 @@ tf_py_test( "//tensorflow/python:variables", "//tensorflow/python/estimator", ], - tags = ["no_windows"], # TODO: needs investigation on Windows ) py_test( @@ -192,7 +188,6 @@ py_test( size = "small", srcs = ["python/learn/graph_actions_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":learn", "//tensorflow/contrib/framework:framework_py", @@ -431,10 +426,7 @@ py_test( size = "medium", srcs = ["python/learn/estimators/kmeans_test.py"], srcs_version = "PY2AND3", - tags = [ - "noasan", # b/73741358 - "nomac", - ], + tags = ["noasan"], deps = [ ":learn", "//tensorflow/python:array_ops", @@ -593,7 +585,6 @@ py_test( size = "small", srcs = ["python/learn/learn_io/io_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":learn", "//tensorflow/contrib/learn/python/learn/datasets", @@ -823,7 +814,6 @@ py_test( size = "small", srcs = ["python/learn/utils/saved_model_export_utils_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":learn", "//tensorflow/contrib/layers:layers_py", diff --git a/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py b/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py index 8f9811cf25..b3b067b8e1 100644 --- a/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py +++ b/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py @@ -61,7 +61,7 @@ def embedding_lookup(params, ids, name='embedding_lookup'): ids = ops.convert_to_tensor(ids) shape = array_ops_.shape(ids) ids_flat = array_ops_.reshape( - ids, math_ops.reduce_prod(shape, keepdims=True)) + ids, math_ops.reduce_prod(shape, keep_dims=True)) embeds_flat = nn.embedding_lookup(params, ids_flat, name) embed_shape = array_ops_.concat([shape, [-1]], 0) embeds = array_ops_.reshape(embeds_flat, embed_shape) diff --git a/tensorflow/contrib/lite/Makefile b/tensorflow/contrib/lite/Makefile index b4504f246a..7f31629272 100644 --- a/tensorflow/contrib/lite/Makefile +++ b/tensorflow/contrib/lite/Makefile @@ -27,10 +27,10 @@ LIBDIR := $(MAKEFILE_DIR)/gen/lib/ GENDIR := $(MAKEFILE_DIR)/gen/obj/ # Settings for the host compiler. -CXX := $(CC_PREFIX)gcc +CXX := $(CC_PREFIX) gcc CXXFLAGS := --std=c++11 -O3 -DNDEBUG -CC := $(CC_PREFIX)gcc -CFLAGS := -O3 -DNDEBUG +CC := $(CC_PREFIX) gcc +CFLAGS := LDOPTS := LDOPTS += -L/usr/local/lib ARFLAGS := -r @@ -57,11 +57,10 @@ LIBS := \ # If we're on Linux, also link in the dl library. ifeq ($(HOST_OS),LINUX) - LIBS += -ldl + LIBS += -ldl -lpthread endif include $(MAKEFILE_DIR)/ios_makefile.inc -include $(MAKEFILE_DIR)/rpi_makefile.inc # This library is the main target for this makefile. It will contain a minimal # runtime that can be linked in to other programs. diff --git a/tensorflow/contrib/lite/arena_planner.h b/tensorflow/contrib/lite/arena_planner.h index f84b3dad95..58bc164619 100644 --- a/tensorflow/contrib/lite/arena_planner.h +++ b/tensorflow/contrib/lite/arena_planner.h @@ -33,7 +33,7 @@ class AllocationInfo; // each tensor needs to be allocated and deallocated, and preallocates all the // necessary memory (the PlanAllocations phase). It then assigns portions of // this memory buffer to each tensor (the ExecuteAllocations phase). Tensors may -// share some of the buffer if a tensor B is to be allocated after another tensor +// share some of the bufer if a tensor B is to be allocated after another tensor // A has been deallocated. // // If dynamic tensors are used the planning steps can be repeated during model diff --git a/tensorflow/contrib/lite/build_rpi_lib.sh b/tensorflow/contrib/lite/build_rpi_lib.sh deleted file mode 100755 index 3824b16412..0000000000 --- a/tensorflow/contrib/lite/build_rpi_lib.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash -x -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -set -e - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -cd "$SCRIPT_DIR/../../.." - -CC_PREFIX=arm-linux-gnueabihf- make -j 3 -f tensorflow/contrib/lite/Makefile TARGET=RPI TARGET_ARCH=armv7 diff --git a/tensorflow/contrib/lite/builtin_ops.h b/tensorflow/contrib/lite/builtin_ops.h index ea3ae3489e..2218ea8eac 100644 --- a/tensorflow/contrib/lite/builtin_ops.h +++ b/tensorflow/contrib/lite/builtin_ops.h @@ -24,7 +24,7 @@ extern "C" { #endif // __cplusplus // The enum for builtin operators. -// Note: CUSTOM and DELEGATE are 2 special ops which are not real builtin +// Note: CUSTOM and DELEGATE are 2 special ops which are not real biultin // ops. typedef enum { kTfLiteBuiltinAdd = 0, diff --git a/tensorflow/contrib/lite/error_reporter.h b/tensorflow/contrib/lite/error_reporter.h index 3c5f805f12..da193d2586 100644 --- a/tensorflow/contrib/lite/error_reporter.h +++ b/tensorflow/contrib/lite/error_reporter.h @@ -30,7 +30,7 @@ namespace tflite { // va_list args; // foo.Report("test %d", args); // where args is va_list // -// Subclass ErrorReporter to provide another reporting destination. +// Sublclass ErrorReporter to provide another reporting destination. // For example, if you have a GUI program, you might redirect to a buffer // that drives a GUI error log box. class ErrorReporter { diff --git a/tensorflow/contrib/lite/g3doc/rpi.md b/tensorflow/contrib/lite/g3doc/rpi.md deleted file mode 100644 index 7a3a231626..0000000000 --- a/tensorflow/contrib/lite/g3doc/rpi.md +++ /dev/null @@ -1,50 +0,0 @@ -# TensorFlow Lite for Raspberry Pi - -## Cross compiling -### Installing toolchian -This has been tested on Ubuntu 16.04.3 64bit and Tensorflow devel docker image [tensorflow/tensorflow:nightly-devel](https://hub.docker.com/r/tensorflow/tensorflow/tags/). - -To cross compiling TensorFlow Lite. First you should install the toolchain and libs. -```bash -sudo apt-get update -sudo apt-get install crossbuild-essential-armhf -``` -> If you are using docker, you may not use `sudo` - -### Building -Clone this Tensorflow repository, Run this script at the root of the repository to download all the dependencies: -> The Tensorflow repository is in `/tensorflow` if you are using `tensorflow/tensorflow:nightly-devel` docker image, just try it. -```bash -./tensorflow/contrib/lite/download_dependencies.sh -``` -Note than you only need to to this once. - -You should then be able to compile: -```bash -./tensorflow/contrib/lite/build_rpi_lib.sh -``` - -This should compile a static library in: -`tensorflow/contrib/lite/gen/lib/rpi_armv7/libtensorflow-lite.a`. - -## Native compiling -This has been tested on Raspberry Pi 3b, Raspbian GNU/Linux 9.1 (stretch), gcc version 6.3.0 20170516 (Raspbian 6.3.0-18+rpi1). - -Log in to you RPI, install the toolchain. -```bash -sudo apt-get instal build-essential -``` - -First, clone this TensorFlow repository. Run this at the root of the repository: -```bash -./tensorflow/contrib/lite/download_dependencies.sh -``` -Note than you only need to to this once. - -You should then be able to compile: -```bash -./tensorflow/contrib/lite/build_rpi_lib.sh -``` - -This should compile a static library in: -`tensorflow/contrib/lite/gen/lib/rpi_armv7/libtensorflow-lite.a`. diff --git a/tensorflow/contrib/lite/interpreter.h b/tensorflow/contrib/lite/interpreter.h index 3749869f58..af143370ee 100644 --- a/tensorflow/contrib/lite/interpreter.h +++ b/tensorflow/contrib/lite/interpreter.h @@ -481,7 +481,7 @@ class Interpreter { // During Invoke(), Interpreter will allocate input tensors first, which are // known to be fixed size. Then it will allocate outputs from nodes as many // as possible. When there is a node that produces dynamic sized tensor. - // Interpreter will stop allocating tensors, set the value of next allocate + // Intepreter will stop allocating tensors, set the value of next allocate // node id, and execute the node to generate the output tensor before continue // to allocate successors. This process repeats until all nodes are executed. // NOTE: this relies on the order of nodes that is in topological order. diff --git a/tensorflow/contrib/lite/interpreter_test.cc b/tensorflow/contrib/lite/interpreter_test.cc index 72d4acedbe..7a029c7df8 100644 --- a/tensorflow/contrib/lite/interpreter_test.cc +++ b/tensorflow/contrib/lite/interpreter_test.cc @@ -40,7 +40,7 @@ TEST(BasicInterpreter, InvokeInvalidModel) { ASSERT_EQ(interpreter.Invoke(), kTfLiteOk); } -// Test size accessor functions. +// Test size accesser functions. TEST(BasicInterpreter, TestSizeFunctions) { Interpreter interpreter; int base_index; diff --git a/tensorflow/contrib/lite/kernels/conv.cc b/tensorflow/contrib/lite/kernels/conv.cc index e0cd12f1b4..b91ba1a03d 100644 --- a/tensorflow/contrib/lite/kernels/conv.cc +++ b/tensorflow/contrib/lite/kernels/conv.cc @@ -64,7 +64,7 @@ struct OpData { TfLitePaddingValues padding; // The scaling factor from input to output (aka the 'real multiplier') can - // be represented as a fixed point multiplier plus a left shift. + // be represented as a fixed point multipler plus a left shift. int32_t output_multiplier; int output_shift; // The range of the fused activation layer. For example for kNone and diff --git a/tensorflow/contrib/lite/kernels/depthwise_conv.cc b/tensorflow/contrib/lite/kernels/depthwise_conv.cc index cad9ce114c..15dbfe08c8 100644 --- a/tensorflow/contrib/lite/kernels/depthwise_conv.cc +++ b/tensorflow/contrib/lite/kernels/depthwise_conv.cc @@ -52,7 +52,7 @@ enum KernelType { struct OpData { TfLitePaddingValues padding; // The scaling factor from input to output (aka the 'real multiplier') can - // be represented as a fixed point multiplier plus a left shift. + // be represented as a fixed point multipler plus a left shift. int32_t output_multiplier; int output_shift; // The range of the fused activation layer. For example for kNone and diff --git a/tensorflow/contrib/lite/kernels/fully_connected.cc b/tensorflow/contrib/lite/kernels/fully_connected.cc index 888e67966c..a77fe94e49 100644 --- a/tensorflow/contrib/lite/kernels/fully_connected.cc +++ b/tensorflow/contrib/lite/kernels/fully_connected.cc @@ -48,7 +48,7 @@ enum KernelType { struct OpData { // The scaling factor from input to output (aka the 'real multiplier') can - // be represented as a fixed point multiplier plus a left shift. + // be represented as a fixed point multipler plus a left shift. int32_t output_multiplier; int output_shift; // The range of the fused activation layer. For example for kNone and diff --git a/tensorflow/contrib/lite/kernels/kernel_util.h b/tensorflow/contrib/lite/kernels/kernel_util.h index 21da1daff7..28f53b9fbb 100644 --- a/tensorflow/contrib/lite/kernels/kernel_util.h +++ b/tensorflow/contrib/lite/kernels/kernel_util.h @@ -58,7 +58,7 @@ inline bool IsConstantTensor(TfLiteTensor* tensor) { } // Determines whether tensor is dynamic. Note that a tensor can be non-const and -// not dynamic. This function specifically checks for a dynamic tensor. +// not dynamic. This function specificially checks for a dynamic tensor. inline bool IsDynamicTensor(TfLiteTensor* tensor) { return tensor->allocation_type == kTfLiteDynamic; } diff --git a/tensorflow/contrib/lite/kernels/lsh_projection.cc b/tensorflow/contrib/lite/kernels/lsh_projection.cc index 0ee35775d5..5f73b56ed9 100644 --- a/tensorflow/contrib/lite/kernels/lsh_projection.cc +++ b/tensorflow/contrib/lite/kernels/lsh_projection.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -// LSH Projection projects an input to a bit vector via locality sensitive +// LSH Projection projects an input to a bit vector via locality senstive // hashing. // // Options: diff --git a/tensorflow/contrib/lite/kernels/lstm.cc b/tensorflow/contrib/lite/kernels/lstm.cc index 8cf1165135..b9255b23a5 100644 --- a/tensorflow/contrib/lite/kernels/lstm.cc +++ b/tensorflow/contrib/lite/kernels/lstm.cc @@ -213,9 +213,9 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context, // present. // 2) If projection weight is present, then projection bias is optional. // TODO(ghodrat): make sure this is correct. - const bool projection_tensors_consistent = + const bool projecton_tensors_consistent = ((projection_weights != nullptr) || (projection_bias == nullptr)); - TF_LITE_ENSURE(context, projection_tensors_consistent == true); + TF_LITE_ENSURE(context, projecton_tensors_consistent == true); return kTfLiteOk; } @@ -357,7 +357,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const int n_output = recurrent_to_output_weights->dims->data[1]; // Since we have already checked that weights are all there or none, we can - // check the existence of only one to get the condition. + // check the existense of only one to the get the condition. const bool use_cifg = (input_to_input_weights == nullptr); const bool use_peephole = (cell_to_output_weights != nullptr); diff --git a/tensorflow/contrib/lite/kernels/reshape.cc b/tensorflow/contrib/lite/kernels/reshape.cc index 438f70d311..f3e6ddc9f4 100644 --- a/tensorflow/contrib/lite/kernels/reshape.cc +++ b/tensorflow/contrib/lite/kernels/reshape.cc @@ -49,20 +49,20 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TfLiteIntArray* output_size = TfLiteIntArrayCreate(params->num_dimensions); int num_output_elements = 1; - int stretch_dim = -1; + int strech_dim = -1; for (int i = 0; i < params->num_dimensions; ++i) { int value = params->shape[i]; if (value == -1) { - TF_LITE_ENSURE_EQ(context, stretch_dim, -1); - stretch_dim = i; + TF_LITE_ENSURE_EQ(context, strech_dim, -1); + strech_dim = i; } else { num_output_elements *= value; output_size->data[i] = value; } } - if (stretch_dim != -1) { - output_size->data[stretch_dim] = num_input_elements / num_output_elements; - num_output_elements *= output_size->data[stretch_dim]; + if (strech_dim != -1) { + output_size->data[strech_dim] = num_input_elements / num_output_elements; + num_output_elements *= output_size->data[strech_dim]; } TF_LITE_ENSURE_EQ(context, num_input_elements, num_output_elements); diff --git a/tensorflow/contrib/lite/kernels/reshape_test.cc b/tensorflow/contrib/lite/kernels/reshape_test.cc index aecbd0399f..0fbcf6e6aa 100644 --- a/tensorflow/contrib/lite/kernels/reshape_test.cc +++ b/tensorflow/contrib/lite/kernels/reshape_test.cc @@ -60,7 +60,7 @@ TEST(ReshapeOpTest, TooManyDimensions) { TEST(ReshapeOpTest, TooManySpecialDimensions) { EXPECT_DEATH(ReshapeOpModel({1, 2, 4, 1}, {-1, -1, 2, 4}), - "stretch_dim != -1"); + "strech_dim != -1"); } TEST(ReshapeOpTest, SimpleTest) { diff --git a/tensorflow/contrib/lite/kernels/test_util.cc b/tensorflow/contrib/lite/kernels/test_util.cc index 0bb28b50b2..373310bd87 100644 --- a/tensorflow/contrib/lite/kernels/test_util.cc +++ b/tensorflow/contrib/lite/kernels/test_util.cc @@ -141,8 +141,8 @@ void SingleOpModel::SetBuiltinOp(BuiltinOperator type, void SingleOpModel::SetCustomOp( const string& name, const std::vector<uint8_t>& custom_option, - const std::function<TfLiteRegistration*()>& registration) { - custom_registrations_[name] = registration; + const std::function<TfLiteRegistration*()>& registeration) { + custom_registrations_[name] = registeration; opcodes_.push_back( CreateOperatorCodeDirect(builder_, BuiltinOperator_CUSTOM, name.data())); operators_.push_back(CreateOperator( diff --git a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc index 42941a97db..508a570e2e 100644 --- a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc +++ b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc @@ -360,7 +360,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const int n_output = recurrent_to_output_weights->dims->data[1]; // Since we have already checked that weights are all there or none, we can - // check the existence of only one to get the condition. + // check the existense of only one to the get the condition. const bool use_cifg = (input_to_input_weights == nullptr); const bool use_peephole = (cell_to_output_weights != nullptr); diff --git a/tensorflow/contrib/lite/memory_planner.h b/tensorflow/contrib/lite/memory_planner.h index 0294ec815c..5cd6c20850 100644 --- a/tensorflow/contrib/lite/memory_planner.h +++ b/tensorflow/contrib/lite/memory_planner.h @@ -34,8 +34,8 @@ class MemoryPlanner { // [first_node, last_node]. virtual TfLiteStatus ExecuteAllocations(int first_node, int last_node) = 0; - // Invalidates allocations made earlier. This is called when tensors sizes - // have changed. All planned allocations remain, but can't be used until + // Invalidates allocations made earliers. This is called when tensors sizes + // have change. All planned allocations remain, but can't be used until // ExecuteAllocations() is called. virtual TfLiteStatus ResetAllocations() = 0; }; diff --git a/tensorflow/contrib/lite/model.h b/tensorflow/contrib/lite/model.h index 38eea0e26b..51a622a28d 100644 --- a/tensorflow/contrib/lite/model.h +++ b/tensorflow/contrib/lite/model.h @@ -81,7 +81,7 @@ class FlatBufferModel { const tflite::Model* model_spec, ErrorReporter* error_reporter = DefaultErrorReporter()); - // Releases memory or unmaps mmaped memory. + // Releases memory or unmaps mmaped meory. ~FlatBufferModel(); // Copying or assignment is disallowed to simplify ownership semantics. diff --git a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h index bd49d327c9..76032771af 100644 --- a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h +++ b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h @@ -569,7 +569,7 @@ enum { ANEURALNETWORKS_LOGISTIC = 14, /** - * Projects an input to a bit vector via locality sensitive hashing. + * Projects an input to a bit vector via locality senstive hashing. * * Inputs: * * 0: Hash functions. Dim.size == 2, DataType: Float. diff --git a/tensorflow/contrib/lite/rpi_makefile.inc b/tensorflow/contrib/lite/rpi_makefile.inc deleted file mode 100644 index 832ef5824b..0000000000 --- a/tensorflow/contrib/lite/rpi_makefile.inc +++ /dev/null @@ -1,33 +0,0 @@ -# Settings for Raspberry Pi. -ifeq ($(TARGET), RPI) - ifeq ($(TARGET_ARCH), armv7) - CXXFLAGS += \ - -march=armv7-a \ - -mfpu=neon-vfpv4 \ - -funsafe-math-optimizations \ - -ftree-vectorize - - CCFLAGS += \ - -march=armv7-a \ - -mfpu=neon-vfpv4 \ - -funsafe-math-optimizations \ - -ftree-vectorize - - LDFLAGS := \ - -Wl,--no-export-dynamic \ - -Wl,--exclude-libs,ALL \ - -Wl,--gc-sections \ - -Wl,--as-needed - endif - - LIBS := \ - -lstdc++ \ - -lpthread \ - -lm \ - -ldl - - OBJDIR := $(OBJDIR)rpi_$(TARGET_ARCH)/ - LIBDIR := $(LIBDIR)rpi_$(TARGET_ARCH)/ - BINDIR := $(BINDIR)rpi_$(TARGET_ARCH)/ - DEPDIR := $(DEPDIR)rpi_$(TARGET_ARCH)/ -endif diff --git a/tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc b/tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc index 640972de77..08bcfe4516 100644 --- a/tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc +++ b/tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc @@ -46,7 +46,7 @@ extern "C" { #endif // __cplusplus // The enum for builtin operators. -// Note: CUSTOM and DELEGATE are 2 special ops which are not real builtin +// Note: CUSTOM and DELEGATE are 2 special ops which are not real biultin // ops. typedef enum { )"; diff --git a/tensorflow/contrib/lite/simple_memory_arena.cc b/tensorflow/contrib/lite/simple_memory_arena.cc index 2f2004f56b..4aab244989 100644 --- a/tensorflow/contrib/lite/simple_memory_arena.cc +++ b/tensorflow/contrib/lite/simple_memory_arena.cc @@ -113,21 +113,21 @@ TfLiteStatus SimpleMemoryArena::Commit(TfLiteContext* context) { underlying_buffer_size_ = required_size; underlying_buffer_aligned_ptr_ = new_underlying_buffer_aligned_ptr; } - committed_ = true; + commited_ = true; return underlying_buffer_ != nullptr ? kTfLiteOk : kTfLiteError; } TfLiteStatus SimpleMemoryArena::ResolveAlloc(TfLiteContext* context, const ArenaAlloc& alloc, char** output_ptr) { - TF_LITE_ENSURE(context, committed_); + TF_LITE_ENSURE(context, commited_); TF_LITE_ENSURE(context, output_ptr != nullptr); *output_ptr = underlying_buffer_aligned_ptr_ + alloc.offset; return kTfLiteOk; } TfLiteStatus SimpleMemoryArena::Clear() { - committed_ = false; + commited_ = false; high_water_mark_ = 0; allocs_.clear(); return kTfLiteOk; diff --git a/tensorflow/contrib/lite/simple_memory_arena.h b/tensorflow/contrib/lite/simple_memory_arena.h index 5faf78b59e..0535522374 100644 --- a/tensorflow/contrib/lite/simple_memory_arena.h +++ b/tensorflow/contrib/lite/simple_memory_arena.h @@ -22,7 +22,7 @@ limitations under the License. namespace tflite { // This little structure holds the offset and the size for a dynamic memory -// allocation in the memory arena. When the arena is committed and the +// allocation in the memory arena. When the arena is commited and the // underlying buffer is set, the alloc can be resolved into an actual memory // pointer. struct ArenaAlloc { @@ -43,7 +43,7 @@ struct ArenaAlloc { class SimpleMemoryArena { public: explicit SimpleMemoryArena(size_t arena_alignment) - : committed_(false), + : commited_(false), arena_alignment_(arena_alignment), high_water_mark_(0), underlying_buffer_size_(0), @@ -73,7 +73,7 @@ class SimpleMemoryArena { } private: - bool committed_; + bool commited_; size_t arena_alignment_; size_t high_water_mark_; std::unique_ptr<char[]> underlying_buffer_; diff --git a/tensorflow/contrib/lookup/BUILD b/tensorflow/contrib/lookup/BUILD index 0a6edc33c5..8ca03f4193 100644 --- a/tensorflow/contrib/lookup/BUILD +++ b/tensorflow/contrib/lookup/BUILD @@ -46,7 +46,6 @@ tf_py_test( "//tensorflow/python:variables", ], grpc_enabled = True, - tags = ["no_windows"], # TODO: needs investigation on Windows ) filegroup( diff --git a/tensorflow/contrib/makefile/README.md b/tensorflow/contrib/makefile/README.md index 6c3b02e12b..995230dfa8 100644 --- a/tensorflow/contrib/makefile/README.md +++ b/tensorflow/contrib/makefile/README.md @@ -194,8 +194,6 @@ with: srcs = glob(["libs/arm64-v8a/*.so"]), ``` -If you are building for Android TV (Shield TV devices), replace "portrait" with "landscape" for android:screenOrientation in all four activities in tensorflow/examples/android/AndroidManifest.xml - Then run: ```bash # Create dir for native libs diff --git a/tensorflow/contrib/makefile/build_all_ios.sh b/tensorflow/contrib/makefile/build_all_ios.sh index 9b148688c4..2d99791839 100755 --- a/tensorflow/contrib/makefile/build_all_ios.sh +++ b/tensorflow/contrib/makefile/build_all_ios.sh @@ -80,9 +80,10 @@ if [[ ! -z "${OPTIMIZE_FOR_GRAPH}" ]]; then fi else echo "${PRNT_SLCTV_BIN} found. Using it" + ${PRNT_SLCTV_BIN} --graphs=${OPTIMIZE_FOR_GRAPH} > ${TOP_SRCDIR}/tensorflow/core/framework/ops_to_register.h + fi - ${PRNT_SLCTV_BIN} --graphs=${OPTIMIZE_FOR_GRAPH} > ${TOP_SRCDIR}/tensorflow/core/framework/ops_to_register.h fi if [[ "${ONLY_MAKE_TENSORFLOW}" != "true" ]]; then diff --git a/tensorflow/contrib/mpi/mpi_utils.h b/tensorflow/contrib/mpi/mpi_utils.h index df055ff567..fa297c28cb 100644 --- a/tensorflow/contrib/mpi/mpi_utils.h +++ b/tensorflow/contrib/mpi/mpi_utils.h @@ -24,8 +24,6 @@ limitations under the License. #include "tensorflow/core/lib/strings/str_util.h" -// Skip MPI C++ bindings support, this matches the usage in other places -#define OMPI_SKIP_MPICXX #include "third_party/mpi/mpi.h" #define MPI_CHECK(cmd) \ do { \ diff --git a/tensorflow/contrib/predictor/predictor_factories.py b/tensorflow/contrib/predictor/predictor_factories.py index 6e77e934fe..04b5d5bdf1 100644 --- a/tensorflow/contrib/predictor/predictor_factories.py +++ b/tensorflow/contrib/predictor/predictor_factories.py @@ -53,7 +53,7 @@ def from_contrib_estimator(estimator, `Estimator`. """ if isinstance(estimator, core_estimator.Estimator): - raise TypeError('Expected estimator to be of type ' + raise TypeError('Espected estimator to be of type ' 'tf.contrib.learn.Estimator, but got type ' 'tf.python.estimator.Estimator. You likely want to call ' 'from_estimator.') @@ -88,7 +88,7 @@ def from_estimator(estimator, `Estimator`. """ if isinstance(estimator, contrib_estimator.Estimator): - raise TypeError('Expected estimator to be of type ' + raise TypeError('Espected estimator to be of type ' 'tf.python.estimator.Estimator, but got type ' 'tf.contrib.learn.Estimator. You likely want to call ' 'from_contrib_estimator.') diff --git a/tensorflow/contrib/py2tf/converters/BUILD b/tensorflow/contrib/py2tf/converters/BUILD index 4bb6f76019..f624c42686 100644 --- a/tensorflow/contrib/py2tf/converters/BUILD +++ b/tensorflow/contrib/py2tf/converters/BUILD @@ -81,7 +81,6 @@ py_test( name = "builtin_functions_test", srcs = ["builtin_functions_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":test_lib", "//tensorflow/python:client_testlib", @@ -92,7 +91,6 @@ py_test( name = "call_trees_test", srcs = ["call_trees_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":test_lib", "//tensorflow/contrib/py2tf/impl", diff --git a/tensorflow/contrib/py2tf/converters/single_return.py b/tensorflow/contrib/py2tf/converters/single_return.py index 1194b98f5e..90bc22008f 100644 --- a/tensorflow/contrib/py2tf/converters/single_return.py +++ b/tensorflow/contrib/py2tf/converters/single_return.py @@ -212,7 +212,7 @@ class DetectReturnInUnsupportedControlFlow(gast.NodeVisitor): def __init__(self): self.cant_return = False - super(DetectReturnInUnsupportedControlFlow, self).__init__() + super(gast.NodeVisitor, self).__init__() def visit_While(self, node): self.cant_return = True diff --git a/tensorflow/contrib/py2tf/utils/BUILD b/tensorflow/contrib/py2tf/utils/BUILD index 8bc338e801..d029289f5a 100644 --- a/tensorflow/contrib/py2tf/utils/BUILD +++ b/tensorflow/contrib/py2tf/utils/BUILD @@ -83,7 +83,6 @@ py_test( name = "py_func_test", srcs = ["py_func_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":utils", "//tensorflow/python:client_testlib", diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms.py b/tensorflow/contrib/quantize/python/fold_batch_norms.py index 1afcbb8504..b278265639 100644 --- a/tensorflow/contrib/quantize/python/fold_batch_norms.py +++ b/tensorflow/contrib/quantize/python/fold_batch_norms.py @@ -237,7 +237,7 @@ def _FindFusedBatchNorms(graph): # The batch variance used during forward and backward prop is biased, # i.e it is calculated as: V=sum(x(k)-mu)^2/N. For the moving average # calculation, the variance is corrected by the term N/N-1 (Bessel's - # correction). The variance tensor read from FuseBatchNorm has Bessel's + # correction). The variance tensor read from FuseBatchNorm has bessel's # correction applied, so we undo it here. scope, sep, _ = bn_op.name.rpartition('/') g = ops.get_default_graph() @@ -306,7 +306,7 @@ def _ComputeBatchNormCorrections(context, match, freeze_batch_norm_delay, Args: context: The scope under which we look for batch norm params - match: Object containing required batch norm tensors for correction + match: Object containg required batch norm tensors for correction computation. freeze_batch_norm_delay: Delay in steps at which computation switches from regular batch norm to frozen mean and variance. diff --git a/tensorflow/contrib/quantize/python/quant_ops.py b/tensorflow/contrib/quantize/python/quant_ops.py index a4f7b1b221..0a8e35080c 100644 --- a/tensorflow/contrib/quantize/python/quant_ops.py +++ b/tensorflow/contrib/quantize/python/quant_ops.py @@ -282,8 +282,8 @@ def _FakeQuantWithMinMaxVars(inputs, min_var, max_var, per_channel, num_bits, Args: inputs: a tensor containing values to be quantized. min_var: a variable containing quantization range lower end(s). - max_var: a variable containing quantization range upper end(s). - per_channel: a boolean specifying whether to use per-channel quantization. + max_var: a variable containing quantization range lupper end(s). + per_channel: a boolean specifying whether to use per-channel quantizatioh. num_bits: Number of bits to use for quantization, must be between 2 and 8. narrow_range: Whether to use the narrow quantization range [1; 2^num_bits - 1] or wide range [0; 2^num_bits - 1]. diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py index ec721afbc8..0608ab9302 100644 --- a/tensorflow/contrib/quantize/python/quantize.py +++ b/tensorflow/contrib/quantize/python/quantize.py @@ -267,7 +267,7 @@ def _InsertQuantOp(context, """Inserts a quant op between a producer op and (multiple) consumer ops. Args: - context: Context where producer and consumer operations are nested. + context: Context w,here producer and consumer operations are nested. name: Name for the new quantization op within the context. producer: Producer operation of the pairs where quantization will be inserted. diff --git a/tensorflow/contrib/quantize/python/quantize_graph.py b/tensorflow/contrib/quantize/python/quantize_graph.py index 5abdcd2475..5a3a74cec4 100644 --- a/tensorflow/contrib/quantize/python/quantize_graph.py +++ b/tensorflow/contrib/quantize/python/quantize_graph.py @@ -158,7 +158,7 @@ def experimental_create_training_graph(input_graph=None, often fail. Args: - input_graph: The tf.Graph to be transformed, if None then defaults to the + input_graph: The tf.Graph to be transformed,if None then defaults to the default graph. weight_bits: Number of bits to use for quantizing weights. activation_bits: Number of bits to use for quantizing activations. diff --git a/tensorflow/contrib/quantize/python/quantize_parameterized_test.py b/tensorflow/contrib/quantize/python/quantize_parameterized_test.py index db745aa562..0624cc878b 100644 --- a/tensorflow/contrib/quantize/python/quantize_parameterized_test.py +++ b/tensorflow/contrib/quantize/python/quantize_parameterized_test.py @@ -419,7 +419,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): normalizer_params=self._BatchNormParams(fused_batch_norm), scope=scope) - # Manually add a bypass (optional) and an activation. + # Manually add a bypass (optionaly) and an activation. if with_bypass: node = math_ops.add(inputs, node, name='test/Add') @@ -470,7 +470,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): normalizer_params=self._BatchNormParams(fused_batch_norm), scope=scope) - # Manually add a bypass (optional) and an activation. + # Manually add a bypass (optionaly) and an activation. if with_bypass: node = math_ops.add(inputs, node, name='test/Add') @@ -526,7 +526,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): normalizer_params=self._BatchNormParams(fused_batch_norm), scope=scope) - # Manually add a bypass (optional) and an activation. + # Manually add a bypass (optionaly) and an activation. if with_bypass: node = math_ops.add(inputs, node, name='test/Add') @@ -565,7 +565,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): stddev: Standard deviation of normal variable. Returns: - An initialized that initializes with a truncated normal variable. + An initialized that initialzes with a truncated normal variable. """ return init_ops.truncated_normal_initializer(stddev=stddev) diff --git a/tensorflow/contrib/quantize/python/quantize_test.py b/tensorflow/contrib/quantize/python/quantize_test.py index b2e5707a6d..ef59475167 100644 --- a/tensorflow/contrib/quantize/python/quantize_test.py +++ b/tensorflow/contrib/quantize/python/quantize_test.py @@ -144,7 +144,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): stddev: Standard deviation of normal variable. Returns: - An initialized that initializes with a truncated normal variable. + An initialized that initialzes with a truncated normal variable. """ return init_ops.truncated_normal_initializer(stddev=stddev) diff --git a/tensorflow/contrib/remote_fused_graph/pylib/BUILD b/tensorflow/contrib/remote_fused_graph/pylib/BUILD index 54c66271cd..27f0a7f58f 100644 --- a/tensorflow/contrib/remote_fused_graph/pylib/BUILD +++ b/tensorflow/contrib/remote_fused_graph/pylib/BUILD @@ -38,6 +38,7 @@ py_test( size = "small", srcs = ["python/ops/remote_fused_graph_ops_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], deps = [ ":remote_fused_graph_ops_py", "//tensorflow/core:protos_all_py", diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py index 358b2eb02b..73f2607d84 100644 --- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py +++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py @@ -2133,7 +2133,7 @@ class Conv1DLSTMCell(ConvLSTMCell): def __init__(self, name="conv_1d_lstm_cell", **kwargs): """Construct Conv1DLSTM. See `ConvLSTMCell` for more details.""" - super(Conv1DLSTMCell, self).__init__(conv_ndims=1, name=name, **kwargs) + super(Conv1DLSTMCell, self).__init__(conv_ndims=1, **kwargs) class Conv2DLSTMCell(ConvLSTMCell): @@ -2144,7 +2144,7 @@ class Conv2DLSTMCell(ConvLSTMCell): def __init__(self, name="conv_2d_lstm_cell", **kwargs): """Construct Conv2DLSTM. See `ConvLSTMCell` for more details.""" - super(Conv2DLSTMCell, self).__init__(conv_ndims=2, name=name, **kwargs) + super(Conv2DLSTMCell, self).__init__(conv_ndims=2, **kwargs) class Conv3DLSTMCell(ConvLSTMCell): @@ -2155,7 +2155,7 @@ class Conv3DLSTMCell(ConvLSTMCell): def __init__(self, name="conv_3d_lstm_cell", **kwargs): """Construct Conv3DLSTM. See `ConvLSTMCell` for more details.""" - super(Conv3DLSTMCell, self).__init__(conv_ndims=3, name=name, **kwargs) + super(Conv3DLSTMCell, self).__init__(conv_ndims=3, **kwargs) def _conv(args, filter_size, num_features, bias, bias_start=0.0): diff --git a/tensorflow/contrib/saved_model/BUILD b/tensorflow/contrib/saved_model/BUILD index b10757df47..245fe07f2b 100644 --- a/tensorflow/contrib/saved_model/BUILD +++ b/tensorflow/contrib/saved_model/BUILD @@ -53,7 +53,6 @@ py_test( size = "small", srcs = ["python/saved_model/reader_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows visibility = ["//visibility:private"], deps = [ ":saved_model_py", diff --git a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py index 6adbb8be40..03fe31abf7 100644 --- a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py +++ b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py @@ -299,13 +299,12 @@ class BeamSearchDecoder(decoder.Decoder): """ finished, start_inputs = self._finished, self._start_inputs - dtype = nest.flatten(self._initial_cell_state)[0].dtype log_probs = array_ops.one_hot( # shape(batch_sz, beam_sz) array_ops.zeros([self._batch_size], dtype=dtypes.int32), depth=self._beam_width, - on_value=ops.convert_to_tensor(0.0, dtype=dtype), - off_value=ops.convert_to_tensor(-np.Inf, dtype=dtype), - dtype=dtype) + on_value=0.0, + off_value=-np.Inf, + dtype=nest.flatten(self._initial_cell_state)[0].dtype) initial_state = BeamSearchDecoderState( cell_state=self._initial_cell_state, diff --git a/tensorflow/contrib/session_bundle/BUILD b/tensorflow/contrib/session_bundle/BUILD index 3ad88a8a22..67011c8fef 100644 --- a/tensorflow/contrib/session_bundle/BUILD +++ b/tensorflow/contrib/session_bundle/BUILD @@ -165,7 +165,6 @@ py_test( name = "gc_test", srcs = ["gc_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows visibility = ["//visibility:private"], deps = [ ":gc", diff --git a/tensorflow/contrib/slim/python/slim/data/BUILD b/tensorflow/contrib/slim/python/slim/data/BUILD index 7aa1684839..5daabbd62e 100644 --- a/tensorflow/contrib/slim/python/slim/data/BUILD +++ b/tensorflow/contrib/slim/python/slim/data/BUILD @@ -61,7 +61,6 @@ py_test( name = "dataset_data_provider_test", srcs = ["dataset_data_provider_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":dataset", ":dataset_data_provider", diff --git a/tensorflow/contrib/tensor_forest/BUILD b/tensorflow/contrib/tensor_forest/BUILD index 07b6b1f142..1e4cc3f095 100644 --- a/tensorflow/contrib/tensor_forest/BUILD +++ b/tensorflow/contrib/tensor_forest/BUILD @@ -553,6 +553,7 @@ py_test( srcs = ["client/random_forest_test.py"], srcs_version = "PY2AND3", tags = [ + "no_windows", "nomac", # b/63258195 "notsan", ], diff --git a/tensorflow/contrib/tensorboard/BUILD b/tensorflow/contrib/tensorboard/BUILD index db2e000ef8..d833744d0c 100644 --- a/tensorflow/contrib/tensorboard/BUILD +++ b/tensorflow/contrib/tensorboard/BUILD @@ -9,7 +9,6 @@ exports_files(["LICENSE"]) # For platform specific build config load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library") -load("//tensorflow:tensorflow.bzl", "py_test") tf_proto_library( name = "protos_all", diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index 906cc3f034..c832c6f2e0 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -83,7 +83,6 @@ cc_library( "kernels/trt_engine_op.h", ], copts = tf_copts(), - visibility = ["//visibility:public"], deps = [ ":trt_logging", ":trt_resources", @@ -155,7 +154,6 @@ py_library( deps = [ ":trt_convert_py", ":trt_ops_py", - "//tensorflow/python:errors", ], ) diff --git a/tensorflow/contrib/tensorrt/README.md b/tensorflow/contrib/tensorrt/README.md index 461e627e99..dfcce0fd00 100644 --- a/tensorflow/contrib/tensorrt/README.md +++ b/tensorflow/contrib/tensorrt/README.md @@ -2,8 +2,7 @@ Using TensorRT in TensorFlow ============================ This module provides necessary bindings and introduces TRT_engine_op -operator that wraps a subgraph in TensorRT. This is still a work in progress -but should be useable with most common graphs. +operator that wraps a subgraph in TensorRT. Compilation ----------- @@ -16,10 +15,26 @@ configure script should find the necessary components from the system automatically. If installed from tar packages, user has to set path to location where the library is installed during configuration. -```shell + +``` bazel build --config=cuda --config=opt //tensorflow/tools/pip_package:build_pip_package bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/ ``` After the installation of tensorflow package, TensorRT transformation -will be available. An example use can be found in test/test_tftrt.py directory +will be available. An example use is shown below. + +```python +import tensorflow as tf +import tensorflow.contrib.tensorrt as trt +#... create and train or load model +gdef = sess.graph.as_graph_def() +trt_gdef = trt.create_inference_graph( + gdef, #original graph_def + ["output"], #name of output node(s) + max_batch_size, #maximum batch size to run the inference + max_workspace_size_bytes) # max memory for TensorRT to use +tf.reset_default_graph() +tf.import_graph_def(graph_def=trt_gdef) +#...... run inference +``` diff --git a/tensorflow/contrib/tensorrt/__init__.py b/tensorflow/contrib/tensorrt/__init__.py index 140ad48282..fd551d70b4 100644 --- a/tensorflow/contrib/tensorrt/__init__.py +++ b/tensorflow/contrib/tensorrt/__init__.py @@ -18,18 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.framework import errors - -# pylint: disable=unused-import,wildcard-import,g-import-not-at-top -try: - from tensorflow.contrib.tensorrt.python import * -except errors.NotFoundError as e: - no_trt_message = ( - '**** Failed to initialize TensorRT. This is either because the TensorRT' - ' installation path is not in LD_LIBRARY_PATH, or because you do not have' - ' it installed. If not installed, please go to' - ' https://developer.nvidia.com/tensorrt to download and install' - ' TensorRT ****') - print(no_trt_message) - raise e -# pylint: enable=unused-import,wildcard-import,g-import-not-at-top +# pylint: disable=unused-import,wildcard-import +from tensorflow.contrib.tensorrt.python import * +# pylint: enable=unused-import,wildcard-import diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index eea8c8efa2..970f810473 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -15,7 +15,6 @@ limitations under the License. #include "tensorflow/contrib/tensorrt/convert/convert_graph.h" -#include <list> #include <map> #include <set> #include <unordered_map> @@ -49,29 +48,13 @@ namespace tensorrt { namespace convert { namespace { -bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) { +static bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) { // LINT.IfChange // TODO(jie): Segmentation shouldn't associated with op name. // Split it into a registration for each kernel. static const std::set<string> candidate_ops = { - "Identity", - "Const", - "Conv2D", - "MaxPool", - "BiasAdd", - "Relu", - "Add", - "Mul", - "Sub", - "Rsqrt", - "Pad", - "Mean", - "AvgPool", - "ConcatV2", - "DepthwiseConv2dNative", - "FusedBatchNorm", - "FusedBatchNormV2", - // TODO(ben,jie): ... + "Identity", "Const", "Conv2D", "MaxPool", "BiasAdd", "Relu", + "Add", "Mul", "Sub", "Rsqrt", "Pad" // "Placeholder" ,"Mean" }; // LINT.ThenChange(//tensorflow/contrib/tensorrt/convert/convert_nodes.h) return candidate_ops.count(node_def.op()); @@ -86,8 +69,6 @@ void GetSubGraphIncomingEdges(const tensorflow::Graph& graph, if (!subgraph_node_ids.count(edge->src()->id()) && !edge->src()->IsSource()) { incoming_edges->insert(edge); - } else { - VLOG(2) << edge->src()->name() << " N, "; } } } @@ -101,10 +82,7 @@ void GetSubGraphOutgoingEdges(const tensorflow::Graph& graph, for (const tensorflow::Edge* edge : node->out_edges()) { if (!subgraph_node_ids.count(edge->dst()->id()) && !edge->dst()->IsSink()) { - VLOG(2) << edge->dst()->name() << " Y, "; outgoing_edges->insert(edge); - } else { - VLOG(2) << edge->dst()->name() << " N, "; } } } @@ -131,150 +109,74 @@ std::unordered_map<string, std::vector<int>> BuildTensorNameMap( } return result; } -// TODO(sami): convert references to pointers -struct ConvertGraphParams { - ConvertGraphParams( - tensorflow::Graph& inp_graph, - const std::vector<string>& output_node_names, - const std::set<int>& subgraph_node_id_numbers, - size_t max_supported_batch_size, size_t max_consumed_workspace_size_bytes, - const tensorflow::grappler::GraphProperties& current_graph_properties, - std::unordered_map<string, std::pair<int, string>>* output_edges, - int engine_precision_mode) - : graph(inp_graph), - output_names(output_node_names), - subgraph_node_ids(subgraph_node_id_numbers), - max_batch_size(max_supported_batch_size), - max_workspace_size_bytes(max_consumed_workspace_size_bytes), - graph_properties(current_graph_properties), - output_edge_map(output_edges), - precision_mode(engine_precision_mode) {} - tensorflow::Graph& graph; - const std::vector<string>& output_names; - const std::set<int>& subgraph_node_ids; - size_t max_batch_size; - size_t max_workspace_size_bytes; - const tensorflow::grappler::GraphProperties& graph_properties; - std::unordered_map<string, std::pair<int, string>>* output_edge_map; - int precision_mode; - std::vector<std::pair<int, int>> subgraph_inputs; - std::vector<std::pair<int, int>> subgraph_outputs; + +tensorflow::Status ConvertSubGraphToTensorRT( + const std::vector<string>& output_names, + const std::set<int>& subgraph_node_ids, + size_t max_batch_size, // Max batch size that engine will be created for + // Max amount of memory that engine will be allowed to consume, in bytes + size_t max_workspace_size_bytes, + const tensorflow::grappler::GraphProperties& graph_properties, + tensorflow::Graph* graph) { tensorflow::EdgeSet subgraph_incoming_edges; - tensorflow::EdgeSet subgraph_outgoing_edges; -}; + GetSubGraphIncomingEdges(*graph, subgraph_node_ids, &subgraph_incoming_edges); + + std::vector<std::pair<int, int>> subgraph_inputs; -static tensorflow::Status FillSubGraphEdgeSets(ConvertGraphParams* p) { - GetSubGraphIncomingEdges(p->graph, p->subgraph_node_ids, - &p->subgraph_incoming_edges); - for (const tensorflow::Edge* edge : p->subgraph_incoming_edges) { - p->subgraph_inputs.push_back({edge->src()->id(), edge->src_output()}); + // Collect inputs by looking for incoming edges + for (const tensorflow::Edge* edge : subgraph_incoming_edges) { + subgraph_inputs.push_back({edge->src()->id(), edge->src_output()}); } - auto output_name_to_index_map = BuildTensorNameMap(p->output_names); std::set<std::pair<int, int>> subgraph_outputs_set; // Collect outputs referenced from output_names - for (int node_id : p->subgraph_node_ids) { - tensorflow::Node* node = p->graph.FindNodeId(node_id); + auto output_name_to_index_map = BuildTensorNameMap(output_names); + for (int node_id : subgraph_node_ids) { + tensorflow::Node* node = graph->FindNodeId(node_id); if (output_name_to_index_map.count(node->name())) { for (int index : output_name_to_index_map.at(node->name())) { subgraph_outputs_set.insert({node_id, index}); } } } - GetSubGraphOutgoingEdges(p->graph, p->subgraph_node_ids, - &p->subgraph_outgoing_edges); - for (const tensorflow::Edge* edge : p->subgraph_outgoing_edges) { + // Collect outputs referenced from outgoing edges + tensorflow::EdgeSet subgraph_outgoing_edges; + GetSubGraphOutgoingEdges(*graph, subgraph_node_ids, &subgraph_outgoing_edges); + for (const tensorflow::Edge* edge : subgraph_outgoing_edges) { subgraph_outputs_set.insert({edge->src()->id(), edge->src_output()}); } - p->subgraph_outputs.reserve(subgraph_outputs_set.size()); - p->subgraph_outputs.insert(p->subgraph_outputs.begin(), - subgraph_outputs_set.begin(), - subgraph_outputs_set.end()); - return tensorflow::Status::OK(); -}; - -tensorflow::Status GetCalibNode(ConvertGraphParams* params) { - TF_RETURN_IF_ERROR(FillSubGraphEdgeSets(params)); + // Impose an ordering on the outputs + std::vector<std::pair<int, int>> subgraph_outputs( + subgraph_outputs_set.begin(), subgraph_outputs_set.end()); + // Build TensorRT node and add it to the graph tensorflow::NodeDef trt_node_def; - SubGraphParams s(params->graph, params->subgraph_node_ids, - params->subgraph_inputs, params->subgraph_outputs, - params->max_batch_size, params->max_workspace_size_bytes, - params->graph_properties, params->output_edge_map, - &trt_node_def, params->precision_mode); - TF_RETURN_IF_ERROR(InjectCalibrationNode(s)); + TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRTNodeDef( + *graph, subgraph_node_ids, subgraph_inputs, subgraph_outputs, + max_batch_size, max_workspace_size_bytes, graph_properties, + &trt_node_def)); tensorflow::Status status; - tensorflow::Node* trt_node = params->graph.AddNode(trt_node_def, &status); - - TF_RETURN_IF_ERROR(status); - - for (auto in_edge : - params->subgraph_incoming_edges) { // loop over incoming edges and - // attach them to calib node - // tensorflow::Node* src_node = in_edge->src(); - auto src_output = in_edge->src_output(); - auto dst_node = in_edge->dst(); - auto dst_input = in_edge->dst_input(); - VLOG(1) << " update edge " << trt_node->name() << ":" << src_output - << " -> " << dst_node->name() << ":" << dst_input; - TF_RETURN_IF_ERROR( - params->graph.UpdateEdge(trt_node, src_output, dst_node, dst_input)); - } - return tensorflow::Status::OK(); -} - -tensorflow::Status ConvertSubGraphToTensorRT(ConvertGraphParams* params) { - TF_RETURN_IF_ERROR(FillSubGraphEdgeSets(params)); - tensorflow::NodeDef trt_node_def; - - SubGraphParams s(params->graph, params->subgraph_node_ids, - params->subgraph_inputs, params->subgraph_outputs, - params->max_batch_size, params->max_workspace_size_bytes, - params->graph_properties, params->output_edge_map, - &trt_node_def, params->precision_mode); - TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRTNodeDef(s)); - tensorflow::Status status; - tensorflow::Node* trt_node = params->graph.AddNode(trt_node_def, &status); - - // AddNode does not wire edges. - // Re-map incoming edges to use the new TRT node instead of the orig subgraph - std::map<std::pair<int, int>, int> subgraph_edge_to_input_map; - for (size_t i = 0; i < params->subgraph_inputs.size(); ++i) { - subgraph_edge_to_input_map.insert({params->subgraph_inputs.at(i), i}); - } - for (const tensorflow::Edge* edge : params->subgraph_incoming_edges) { - std::pair<int, int> old_src = {edge->src()->id(), edge->src_output()}; - int new_src_output = subgraph_edge_to_input_map.at(old_src); - params->graph.AddEdge(edge->src(), edge->src_output(), trt_node, - new_src_output); - params->graph.RemoveEdge(edge); - } - - VLOG(2) << "new wiring edges: " << trt_node->in_edges().size(); - for (const tensorflow::Edge* edge : trt_node->in_edges()) { - VLOG(2) << edge->src()->name() << " port: " << edge->src_output(); - } - + tensorflow::Node* trt_node = graph->AddNode(trt_node_def, &status); TF_RETURN_IF_ERROR(status); // Re-map outgoing edges to use the new TRT node instead of the orig subgraph std::map<std::pair<int, int>, int> subgraph_edge_to_output_map; - for (size_t i = 0; i < params->subgraph_outputs.size(); ++i) { - subgraph_edge_to_output_map.insert({params->subgraph_outputs.at(i), i}); + for (size_t i = 0; i < subgraph_outputs.size(); ++i) { + subgraph_edge_to_output_map.insert({subgraph_outputs.at(i), i}); } TF_RETURN_IF_ERROR(status); - for (const tensorflow::Edge* edge : params->subgraph_outgoing_edges) { + for (const tensorflow::Edge* edge : subgraph_outgoing_edges) { std::pair<int, int> old_src = {edge->src()->id(), edge->src_output()}; int new_src_output = subgraph_edge_to_output_map.at(old_src); - TF_RETURN_IF_ERROR(params->graph.UpdateEdge( - trt_node, new_src_output, edge->dst(), edge->dst_input())); + TF_RETURN_IF_ERROR(graph->UpdateEdge(trt_node, new_src_output, edge->dst(), + edge->dst_input())); } // Remove the original subgraph - for (int node_id : params->subgraph_node_ids) { - tensorflow::Node* node = params->graph.FindNodeId(node_id); + for (int node_id : subgraph_node_ids) { + tensorflow::Node* node = graph->FindNodeId(node_id); // Don't remove the input placeholders if (node->type_string() == "Placeholder") { continue; } - params->graph.RemoveNode(node); + graph->RemoveNode(node); } return tensorflow::Status::OK(); } @@ -292,39 +194,12 @@ tensorflow::Status BuildNodeMap( } } // namespace -tensorflow::Status ConvertCalibGraphToInferGraph( - const tensorflow::GraphDef& graph_def, tensorflow::GraphDef* infer_graph) { - VLOG(0) << "Starting Calib Conversion"; - tensorflow::Graph graph(tensorflow::OpRegistry::Global()); - TF_RETURN_IF_ERROR(tensorflow::ConvertGraphDefToGraph( - tensorflow::GraphConstructorOptions(), graph_def, &graph)); - // get calib nodes - std::vector<tensorflow::Node*> calib_nodes; - for (auto node : graph.op_nodes()) { - if (node->type_string() == "TRTCalibOp") { - VLOG(1) << "Found Calib Node"; - calib_nodes.push_back(node); - } - } - VLOG(0) << "Num Calib nodes in graph= " << calib_nodes.size(); - if (calib_nodes.size() == 0) - return tensorflow::errors::FailedPrecondition( - "Graph doesn't contain any calibration nodes!." - " Please generate calibration graph and run calibration first"); - for (auto n : calib_nodes) { - TF_RETURN_IF_ERROR( - tensorrt::convert::ConvertCalibrationNodeToEngineNode(graph, n)); - } - graph.ToGraphDef(infer_graph); - return tensorflow::Status::OK(); -} tensorflow::Status ConvertGraphDefToTensorRT( const tensorflow::GraphDef& graph_def, const std::vector<string>& output_names, size_t max_batch_size, - size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def, - int precision_mode = FP32MODE, int minimum_segment_size = 3) { - // optimization pass + size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def) { + // Optimization pass tensorflow::grappler::GrapplerItem item; item.fetch = output_names; tensorflow::GraphDef gdef; @@ -334,23 +209,16 @@ tensorflow::Status ConvertGraphDefToTensorRT( tensorflow::grappler::LayoutOptimizer optimizer; tensorflow::grappler::Cluster* cluster; - // virtual cluster + // Virtual cluster tensorflow::DeviceProperties device_properties; - device_properties.set_type("GPU"); device_properties.mutable_environment()->insert({"architecture", "6"}); cluster = new tensorflow::grappler::VirtualCluster({{"/GPU:0", device_properties}}); - // single machine - int num_cpu_cores = tensorflow::grappler::GetNumAvailableLogicalCPUCores(); - int num_gpus = tensorflow::grappler::GetNumAvailableGPUs(); - VLOG(2) << "cpu_cores: " << num_cpu_cores; - VLOG(2) << "gpus: " << num_gpus; - TF_RETURN_IF_ERROR(optimizer.Optimize(cluster, item, &gdef)); - // constant folding + // Constant folding item.graph = gdef; tensorflow::grappler::ConstantFolding fold(nullptr); TF_RETURN_IF_ERROR(fold.Optimize(nullptr, item, &gdef)); @@ -358,6 +226,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( // AJ refactoring shape inference through grappler/GraphProperties. tensorflow::grappler::GraphProperties static_graph_properties(item); TF_RETURN_IF_ERROR(static_graph_properties.InferStatically(false)); + // Build full graph tensorflow::FunctionLibraryDefinition flib(tensorflow::OpRegistry::Global(), gdef.library()); @@ -374,7 +243,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( } // TODO(sami): this should be passed as a knob!!!! - segment_options.minimum_segment_size = minimum_segment_size; + segment_options.minimum_segment_size = 2; tensorflow::tensorrt::segment::SegmentNodesVector segments; TF_RETURN_IF_ERROR(tensorrt::segment::SegmentGraph( gdef, IsTensorRTCandidate, segment_options, &segments)); @@ -383,37 +252,14 @@ tensorflow::Status ConvertGraphDefToTensorRT( } std::unordered_map<string, tensorflow::Node*> node_map; TF_RETURN_IF_ERROR(BuildNodeMap(graph, &node_map)); - std::unordered_map<string, std::pair<int, string>> output_edge_map; - int count = 0; - float total_num_nodes_in_segments = 0.; - for (auto s : segments) { - total_num_nodes_in_segments += s.size(); - } for (const std::set<string>& subgraph_node_names : segments) { std::set<int> subgraph_node_ids; - size_t max_mem_per_engine = - max_workspace_size_bytes * - ((float)subgraph_node_names.size() / total_num_nodes_in_segments); - std::stringstream oss; for (const string& node_name : subgraph_node_names) { - oss << " " << node_name; subgraph_node_ids.insert(node_map.at(node_name)->id()); } - VLOG(2) << "Subgraph nodes" << oss.str(); - ConvertGraphParams p(graph, output_names, subgraph_node_ids, max_batch_size, - max_mem_per_engine, static_graph_properties, - &output_edge_map, precision_mode); - if (precision_mode == INT8MODE) { - TF_RETURN_IF_ERROR(GetCalibNode(&p)); - } else { - tensorflow::Status status = ConvertSubGraphToTensorRT(&p); - if (status != tensorflow::Status::OK()) { - LOG(WARNING) << "subgraph conversion error for subgraph_index:" << count - << " due to: \n" - << status.ToString() << " SKIPPING......"; - } - count++; - } + TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRT( + output_names, subgraph_node_ids, max_batch_size, + max_workspace_size_bytes, static_graph_properties, &graph)); } graph.ToGraphDef(new_graph_def); return tensorflow::Status::OK(); diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.h b/tensorflow/contrib/tensorrt/convert/convert_graph.h index e1596e89e2..154ad3f2e8 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.h +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.h @@ -28,11 +28,6 @@ namespace tensorflow { namespace tensorrt { namespace convert { -// This method converts an already generated calibration graph which was used in -// calibration runs to an inference graph -tensorflow::Status ConvertCalibGraphToInferGraph( - const tensorflow::GraphDef& graph_def, tensorflow::GraphDef* new_graph_def); - // max_batch_size: maximum batch size which can be used for inference for // optimization targets inference run with max batch size. // max_workspace_size_bytes: The upper bound of memory allowence for @@ -40,8 +35,7 @@ tensorflow::Status ConvertCalibGraphToInferGraph( tensorflow::Status ConvertGraphDefToTensorRT( const tensorflow::GraphDef& graph_def, const std::vector<string>& output_names, size_t max_batch_size, - size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def, - int precision_mode, int minimum_segment_size); + size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def); } // namespace convert } // namespace tensorrt diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 75a3c3d034..9ee717dd7f 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -24,10 +24,6 @@ limitations under the License. #include <utility> #include <vector> -#include "tensorflow/contrib/tensorrt/log/trt_logger.h" -#include "tensorflow/contrib/tensorrt/resources/trt_resource_manager.h" -#include "tensorflow/contrib/tensorrt/resources/trt_resources.h" -#include "tensorflow/core/framework/node_def.pb.h" // NOLINT #include "tensorflow/core/framework/node_def_builder.h" #include "tensorflow/core/framework/tensor_shape.pb.h" // NOLINT #include "tensorflow/core/framework/types.h" @@ -36,7 +32,6 @@ limitations under the License. #include "tensorflow/core/graph/graph_constructor.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/tensor_coding.h" @@ -44,6 +39,7 @@ limitations under the License. #if GOOGLE_CUDA #if GOOGLE_TENSORRT +#include "tensorflow/contrib/tensorrt/log/trt_logger.h" #include "tensorrt/include/NvInfer.h" // Check if the types are equal. Cast to int first so that failure log message @@ -53,7 +49,6 @@ limitations under the License. namespace tensorflow { namespace tensorrt { namespace convert { -using ::tensorflow::strings::StrCat; namespace { @@ -70,8 +65,7 @@ inline tensorflow::Status ConvertDType(tensorflow::DataType tf_dtype, *trt_dtype = nvinfer1::DataType::kHALF; break; default: - return tensorflow::errors::InvalidArgument( - "Unsupported data type " + tensorflow::DataTypeString(tf_dtype)); + return tensorflow::errors::InvalidArgument("Unsupported data type"); } return tensorflow::Status::OK(); } @@ -118,18 +112,6 @@ static std::vector<std::pair<int, int>> CreateSamePadding( return padding; } -string GetCommonNameScope(const string& op_name_a, const string& op_name_b) { - size_t last_scope_separator = 0; - for (size_t i = 0; i < std::min(op_name_a.size(), op_name_b.size()); ++i) { - if (op_name_a[i] != op_name_b[i]) { - break; - } else if (op_name_a[i] == '/') { - last_scope_separator = i + 1; - } - } - return op_name_a.substr(0, last_scope_separator); -} - class TRT_ShapedWeights { public: TRT_ShapedWeights(tensorflow::DataType type, const void* values, @@ -263,11 +245,6 @@ std::vector<int> TFAttrs::get<std::vector<int>>(string key) const { } template <> -std::vector<string> TFAttrs::get<std::vector<string>>(string key) const { - auto attr = this->at(key)->list().s(); - return std::vector<string>(attr.begin(), attr.end()); -} -template <> nvinfer1::Dims TFAttrs::get<nvinfer1::Dims>(string key) const { auto values = this->get<std::vector<int>>(key); nvinfer1::Dims dims; @@ -289,17 +266,6 @@ tensorflow::DataType TFAttrs::get<tensorflow::DataType>(string key) const { return this->at(key)->type(); } -template <> -float TFAttrs::get<float>(string key) const { - return this->at(key)->f(); -} - -template <> -bool TFAttrs::get<bool>(string key) const { - return this->at(key)->b(); -} - -// TODO(jie): reorder4 & reorder2 should be merged? template <typename T> void Reorder4(nvinfer1::DimsNCHW shape, const T* idata, nvinfer1::DimsNCHW istrides, T* odata, @@ -317,87 +283,29 @@ void Reorder4(nvinfer1::DimsNCHW shape, const T* idata, } } -template <typename T> -void Reorder2(nvinfer1::DimsHW shape, const T* idata, nvinfer1::DimsHW istrides, - T* odata, nvinfer1::DimsHW ostrides) { - for (int h = 0; h < shape.h(); ++h) { - for (int w = 0; w < shape.w(); ++w) { - odata[h * ostrides.h() + w * ostrides.w()] = - idata[h * ostrides.h() + w * ostrides.w()]; - } - } -} - -// TODO(jie): fallback to tensorflow!! -void ReorderCKtoKC(const TRT_ShapedWeights& iweights, - TRT_ShapedWeights* oweights) { - int c = iweights.shape_.d[0]; - int k = iweights.shape_.d[1]; - oweights->shape_.d[0] = k; - oweights->shape_.d[1] = c; - nvinfer1::DimsHW istrides = {1, k}; - nvinfer1::DimsHW ostrides = {c, 1}; - switch (iweights.type_) { - case tensorflow::DataType::DT_FLOAT: { - Reorder2({k, c}, static_cast<float const*>(iweights.GetValues()), - istrides, - static_cast<float*>(const_cast<void*>(oweights->GetValues())), - ostrides); - break; - } - case tensorflow::DataType::DT_HALF: { - Reorder2( - {k, c}, static_cast<Eigen::half const*>(iweights.GetValues()), - istrides, - static_cast<Eigen::half*>(const_cast<void*>(oweights->GetValues())), - ostrides); - break; - } - default: - LOG(FATAL) << "Unsupported type in reorder expected fp32 or fp16 but got " - << DataTypeString(iweights.type_); - } -} - void ReorderRSCKToKCRS(const TRT_ShapedWeights& iweights, - TRT_ShapedWeights* oweights, int num_groups) { + TRT_ShapedWeights* oweights) { CHECK_EQ(iweights.type_, oweights->type_); CHECK_EQ(iweights.size_bytes(), oweights->size_bytes()); int r = iweights.shape_.d[0]; int s = iweights.shape_.d[1]; - // TRT requires GKcRS, while TF depthwise has RSCK - // where c=1, C=G - VLOG(2) << "num_groups: " << num_groups; - int c = iweights.shape_.d[2] / num_groups; - VLOG(2) << "c" << iweights.shape_.d[2] << " then " << c; - int k = iweights.shape_.d[3] * num_groups; - VLOG(2) << "k" << iweights.shape_.d[3] << " then " << k; - oweights->shape_.d[0] = k / num_groups; - oweights->shape_.d[1] = c * num_groups; + int c = iweights.shape_.d[2]; + int k = iweights.shape_.d[3]; + oweights->shape_.d[0] = k; + oweights->shape_.d[1] = c; oweights->shape_.d[2] = r; oweights->shape_.d[3] = s; nvinfer1::DimsNCHW istrides = {1, k, s * k * c, c * k}; nvinfer1::DimsNCHW ostrides = {c * r * s, r * s, s, 1}; switch (iweights.type_) { - case tensorflow::DataType::DT_FLOAT: { + case tensorflow::DataType::DT_FLOAT: Reorder4({k, c, r, s}, static_cast<float const*>(iweights.GetValues()), istrides, static_cast<float*>(const_cast<void*>(oweights->GetValues())), ostrides); break; - } - case tensorflow::DataType::DT_HALF: { - Reorder4( - {k, c, r, s}, static_cast<Eigen::half const*>(iweights.GetValues()), - istrides, - static_cast<Eigen::half*>(const_cast<void*>(oweights->GetValues())), - ostrides); - break; - } - default: - LOG(FATAL) << "Unsupported type, expected fp32 or fp16 but got " - << DataTypeString(iweights.type_); + LOG(FATAL) << "!!!!!!!!!!!!!!!!!!!!!!!!broke!!!!!!!!!!!!"; } } @@ -415,11 +323,12 @@ inline std::shared_ptr<T> infer_object(T* obj) { return std::shared_ptr<T>(obj, InferDeleter()); } +// Logger for GIE info/warning/errors class Converter; using OpConverter = std::function<tensorflow::Status(Converter&, const tensorflow::NodeDef&, - const std::vector<TRT_TensorOrWeights>&, + std::vector<TRT_TensorOrWeights> const&, std::vector<TRT_TensorOrWeights>*)>; class Converter { @@ -427,57 +336,34 @@ class Converter { std::unordered_map<string, OpConverter> op_registry_; nvinfer1::INetworkDefinition* trt_network_; std::list<std::vector<uint8_t>> temp_bufs_; - tensorflow::tensorrt::TRTWeightStore* weight_store_; - bool fp16_; + void register_op_converters(); + std::vector<TRT_TensorOrWeights> get_inputs( const tensorflow::NodeDef& node_def) { std::vector<TRT_TensorOrWeights> inputs; - for (auto const& input_name : node_def.input()) { - /************************************************************************* - * TODO(jie) handle case 1) here - * Normalizes the inputs and extracts associated metadata: - * 1) Inputs can contain a colon followed by a suffix of characters. - * That suffix may be a single number (e.g. inputName:1) or several - * word characters separated from a number by a colon - * (e.g. inputName:foo:1). The - * latter case is used to denote inputs and outputs of functions. - * 2) Control dependency inputs contain caret at the beginning and we - * remove this and annotate the edge as a control dependency. - ************************************************************************/ - string name = input_name[0] == '^' ? input_name.substr(1) : input_name; - auto first = name.find_first_of(':'); - if (first != string::npos && first + 2 == name.size() && - name[first + 1] == '0') - name.erase(first); - - VLOG(2) << "retrieve input: " << name; - if (trt_tensors_.count(name)) { - inputs.push_back(trt_tensors_.at(name)); - } else { - LOG(FATAL) << "input: " << name << " not availabled for node at, " - << node_def.name(); - } + for (const auto& input_name : node_def.input()) { + VLOG(2) << "Retrieve input: " << input_name; + inputs.push_back(trt_tensors_.at(input_name)); } return inputs; } public: - explicit Converter(nvinfer1::INetworkDefinition* trt_network, - tensorflow::tensorrt::TRTWeightStore* ws, bool fp16) - : trt_network_(trt_network), weight_store_(ws), fp16_(fp16) { + explicit Converter(nvinfer1::INetworkDefinition* trt_network) + : trt_network_(trt_network) { this->register_op_converters(); } - tensorflow::tensorrt::TRTWeightStore* weight_store() { return weight_store_; } + TRT_ShapedWeights get_temp_weights(tensorflow::DataType type, nvinfer1::Dims shape) { TRT_ShapedWeights weights(type, nullptr, shape); // TODO(jie): check weights size_bytes. 0 means type error - weight_store_->store_.push_back(std::vector<uint8_t>(weights.size_bytes())); - weights.SetValues(weight_store_->store_.back().data()); + temp_bufs_.push_back(std::vector<uint8_t>(weights.size_bytes())); + weights.SetValues(temp_bufs_.back().data()); return weights; } - bool isFP16() { return fp16_; }; + TRT_ShapedWeights get_temp_weights_like(const TRT_ShapedWeights& weights) { return this->get_temp_weights(weights.type_, weights.shape_); } @@ -496,7 +382,7 @@ class Converter { TRT_TensorOrWeights output = outputs.at(i); // TODO(jie): tf protobuf seems to be omitting the :0 suffix string output_name = node_def.name(); - if (i != 0) output_name = StrCat(output_name, ":", i); + if (i != 0) output_name = output_name + ":" + std::to_string(i); if (output.is_tensor()) { output.tensor()->setName(output_name.c_str()); } @@ -562,7 +448,7 @@ struct LambdaFactory { switch (op) { case OP_CATEGORY::RSQRT: { VLOG(2) << "RSQRT GETS DONE"; - return [](T t) -> T { return 1.0 / sqrt(t); }; + return [](T t) -> T { return 1.0 / std::sqrt(t); }; } case OP_CATEGORY::NEG: return [](T t) -> T { return -t; }; @@ -648,22 +534,6 @@ struct LambdaFactory { } }; -template <> -std::function<Eigen::half(Eigen::half)> LambdaFactory::unary<Eigen::half>() { - switch (op) { - case OP_CATEGORY::RSQRT: { - VLOG(2) << "RSQRT GETS DONE"; - return [](Eigen::half t) -> Eigen::half { - return Eigen::half(1.0 / sqrt(float(t))); - }; - } - case OP_CATEGORY::NEG: - return [](Eigen::half t) -> Eigen::half { return -t; }; - default: - VLOG(2) << "Not supported op for unary: " << static_cast<int>(op); - return nullptr; - } -} tensorflow::Status UnaryCompute(const TRT_ShapedWeights& iweights, TRT_ShapedWeights* oweights, LambdaFactory unary_op) { @@ -675,14 +545,6 @@ tensorflow::Status UnaryCompute(const TRT_ShapedWeights& iweights, std::transform(inp, inp + iweights.count(), oup, unary_op.unary<float>()); break; } - case tensorflow::DataType::DT_HALF: { - auto inp = static_cast<Eigen::half const*>(iweights.GetValues()); - auto oup = - static_cast<Eigen::half*>(const_cast<void*>(oweights->GetValues())); - std::transform(inp, inp + iweights.count(), oup, - unary_op.unary<Eigen::half>()); - break; - } default: return tensorflow::errors::Unimplemented( "Data type not supported: " + @@ -726,32 +588,6 @@ tensorflow::Status BinaryCompute(const TRT_ShapedWeights& iweights_l, } break; } - case tensorflow::DataType::DT_HALF: { - auto inp_l = static_cast<const Eigen::half*>(iweights_l.GetValues()); - auto inp_r = static_cast<const Eigen::half*>(iweights_r.GetValues()); - auto oup = - static_cast<Eigen::half*>(const_cast<void*>(oweights->GetValues())); - - if (iweights_l.count() != iweights_r.count()) { - // We only supports broadcast of RankZero - if (iweights_l.count() == 1) { - VLOG(2) << "I bet it is not working!" << (*inp_l); - std::transform(inp_r, inp_r + iweights_r.count(), oup, - binary_op.broadcast_l<Eigen::half>(*inp_l)); - } else if (iweights_r.count() == 1) { - VLOG(2) << "I bet it is not working!" << (*inp_r); - std::transform(inp_l, inp_l + iweights_l.count(), oup, - binary_op.broadcast_r<Eigen::half>(*inp_r)); - } else { - return tensorflow::errors::Unimplemented( - "Binary op with non-rankZero broadcast not supported"); - } - } else { - std::transform(inp_l, inp_l + iweights_l.count(), inp_r, oup, - binary_op.binary<Eigen::half>()); - } - break; - } default: return tensorflow::errors::Unimplemented( "Data type not supported: " + @@ -763,7 +599,7 @@ tensorflow::Status BinaryCompute(const TRT_ShapedWeights& iweights_l, tensorflow::Status ConstantFoldUnary( Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector<TRT_TensorOrWeights>& inputs, + std::vector<TRT_TensorOrWeights> const& inputs, std::vector<TRT_TensorOrWeights>* outputs) { TRT_ShapedWeights weights_input = inputs.at(0).weights(); @@ -777,12 +613,13 @@ tensorflow::Status ConstantFoldUnary( CHECK_EQ(weights_input.type_, TFAttrs(node_def).get<tensorflow::DataType>("T")); + // Maybe I should do a switch LambdaFactory unary_op; if (node_def.op() == "Rsqrt") { // Compute rsqrt unary_op.op = LambdaFactory::OP_CATEGORY::RSQRT; auto ret = UnaryCompute(weights_input, &weights_output, unary_op); - // Pass the output + // PAss the output if (ret == tensorflow::Status::OK()) { outputs->push_back(TRT_TensorOrWeights(weights_output)); } @@ -794,11 +631,11 @@ tensorflow::Status ConstantFoldUnary( } // TODO(jie,ben) broadcast is needed yet not implemented -// Let's get the simple stuff working first. Maybe we should fall back to TF +// Let's get the simple stuff working first. Maybe we should fall bakc to TF // approach for constant folding tensorflow::Status ConstantFoldBinary( Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector<TRT_TensorOrWeights>& inputs, + std::vector<TRT_TensorOrWeights> const& inputs, std::vector<TRT_TensorOrWeights>* outputs) { TRT_ShapedWeights weights_input_l = inputs.at(0).weights(); TRT_ShapedWeights weights_input_r = inputs.at(1).weights(); @@ -811,12 +648,12 @@ tensorflow::Status ConstantFoldBinary( "Binary op implicit broadcast not supported: " + node_def.op()); // TODO(jie): constant fold should really fall back to TF. - int num_dims = weights_input_l.shape_.nbDims; + int nb_dims = weights_input_l.shape_.nbDims; nvinfer1::Dims output_shape; - output_shape.nbDims = num_dims; - VLOG(2) << "nb_dims: " << num_dims + output_shape.nbDims = nb_dims; + VLOG(2) << "nb_dims: " << nb_dims << ", the other: " << weights_input_r.shape_.nbDims; - for (int i = 0; i < num_dims; i++) { + for (int i = 0; i < nb_dims; i++) { if (weights_input_l.shape_.d[i] == weights_input_r.shape_.d[i]) { output_shape.d[i] = weights_input_l.shape_.d[i]; } else if (weights_input_l.shape_.d[i] == 1 || @@ -841,6 +678,7 @@ tensorflow::Status ConstantFoldBinary( // Allocate output weights TRT_ShapedWeights weights_output = ctx.get_temp_weights(dtype, output_shape); + // Maybe I should do a switch LambdaFactory binary_op; if (node_def.op() == "Sub") { binary_op.op = LambdaFactory::OP_CATEGORY::SUB; @@ -874,90 +712,48 @@ tensorflow::Status BinaryTensorOpWeight( // Maybe this part has to be moved into the block of rsqrt later // Check type consistency + auto dtype = TFAttrs(node_def).get<nvinfer1::DataType>("T"); + CHECK_EQ_TYPE(tensor->getType(), dtype); // Cast to int for error messages nvinfer1::DataType ttype; TF_CHECK_OK(ConvertDType(weights.type_, &ttype)); + CHECK_EQ_TYPE(ttype, dtype); // Cast to int for error message // Check scale mode auto dims_w = weights.shape_; auto dims_t = tensor->getDimensions(); - // default to element-wise + // Default to channel-wise auto scale_mode = nvinfer1::ScaleMode::kELEMENTWISE; - // TODO(jie): maybe use a permuatation instead to support more cases; - bool permutation_flag = false; - if (weights.count() == 1) { VLOG(2) << "UNIFORM"; scale_mode = nvinfer1::ScaleMode::kUNIFORM; } else { - // no broadcasting on Batch dimension; - VLOG(2) << "WEIGHTS DIM: " << dims_w.nbDims - << " tensor DIM: " << dims_t.nbDims; - if (dims_w.nbDims == dims_t.nbDims + 1) { - if (dims_w.d[0] == 1) { - for (int i = 1; i < dims_w.nbDims; i++) { - dims_w.d[i - 1] = dims_w.d[i]; - } - dims_w.nbDims--; - } else { - return tensorflow::errors::InvalidArgument( - "Binary op cannot operate on batch, " + node_def.name()); - } - } + // No broadcasting on Batch dimension; + assert(dims_w.d[0] == 1); - if (dims_w.nbDims == dims_t.nbDims && dims_w.d[0] == dims_t.d[0]) { - scale_mode = nvinfer1::ScaleMode::kELEMENTWISE; - // default is element; - for (int i = 1; i < dims_w.nbDims; i++) { - if (dims_w.d[i] != dims_t.d[i]) { - // if dimension does not match, switch back to channel; - VLOG(2) << "channel"; - scale_mode = nvinfer1::ScaleMode::kCHANNEL; - break; - } - } - // if channel as candidate, validate it - if (scale_mode == nvinfer1::ScaleMode::kCHANNEL) { - for (int i = 1; i < dims_w.nbDims; i++) { - if (dims_w.d[i] != 1) - return tensorflow::errors::InvalidArgument( - "Weight shape not compatible at, " + node_def.name()); - } - } else { - VLOG(2) << "elementwise"; + // Broadcasting on Channel dimension only allowed in kUNIFORM + assert(dims_w.d[1] == dims_t.d[0]); + assert(dims_w.nbDims == dims_t.nbDims); + + // Default is element; + for (int i = 2; i < dims_w.nbDims; i++) { + if (dims_w.d[i] != dims_t.d[i - 1]) { + scale_mode = nvinfer1::ScaleMode::kCHANNEL; + break; } - } else if (dims_w.nbDims == 1 && - dims_w.d[0] == dims_t.d[dims_t.nbDims - 1]) { - // channel wise and broadcast required; - permutation_flag = true; - scale_mode = nvinfer1::ScaleMode::kCHANNEL; - } else { - return tensorflow::errors::InvalidArgument( - "Weight shape not compatible at, " + node_def.name()); } - } - - // transpose last dimension - std::vector<int> permutation(dims_t.nbDims + 1); - if (permutation_flag) { - if (scale_mode == nvinfer1::ScaleMode::kCHANNEL && dims_t.nbDims > 1) { - // we swap the last dimension into channel for trt. - // because of tensorflow default broadcasting rules. - for (int i = 0; i < static_cast<int>(permutation.size()); i++) { - permutation[i] = i; + if (scale_mode == nvinfer1::ScaleMode::kELEMENTWISE) { + scale_mode = nvinfer1::ScaleMode::kELEMENTWISE; + for (int i = 2; i < dims_w.nbDims; i++) { + if (dims_w.d[i] != 1) + return tensorflow::errors::InvalidArgument( + "Weight shape not compatible at, " + node_def.name()); } - permutation[1] = dims_t.nbDims; - permutation[dims_t.nbDims] = 1; - tensor = ctx.TransposeTensor(const_cast<nvinfer1::ITensor*>(tensor), - permutation); - } else { - return tensorflow::errors::InvalidArgument( - "Transpose cannot be applied, " + node_def.name()); } } - // prepare weights + // Prepare weights TRT_ShapedWeights shift_weights(weights.type_); TRT_ShapedWeights scale_weights(weights.type_); TRT_ShapedWeights power_weights(weights.type_); @@ -983,26 +779,88 @@ tensorflow::Status BinaryTensorOpWeight( scale_weights, power_weights); nvinfer1::ITensor* output_tensor = layer->getOutput(0); - // transpose back dimension - if (permutation_flag) { - output_tensor = ctx.TransposeTensor(output_tensor, permutation); - } // Pass the output outputs->push_back(TRT_TensorOrWeights(output_tensor)); return tensorflow::Status::OK(); } -enum class ConvolutionType { DEFAULT, DEPTHWISE_CONV }; +tensorflow::Status BinaryTensorOpTensor( + Converter& ctx, const tensorflow::NodeDef& node_def, + const nvinfer1::ITensor* tensor_l, const nvinfer1::ITensor* tensor_r, + std::vector<TRT_TensorOrWeights>* outputs) { + static const std::unordered_map<string, nvinfer1::ElementWiseOperation> ops{ + {"Add", nvinfer1::ElementWiseOperation::kSUM}, + {"Mul", nvinfer1::ElementWiseOperation::kPROD}, + // {"max", nvinfer1::ElementWiseOperation::kMAX}, + // {"min", nvinfer1::ElementWiseOperation::kMIN}, + {"Sub", nvinfer1::ElementWiseOperation::kSUB}, + {"Div", nvinfer1::ElementWiseOperation::kDIV}, + }; + + // FIXME assume type matches input weights + // Get trt type & shape + TFAttrs attrs(node_def); + // Maybe this part has to be moved into the block of rsqrt later + nvinfer1::DataType dtype = attrs.get<nvinfer1::DataType>("T"); + + // Check type consistency + CHECK_EQ_TYPE(tensor_l->getType(), dtype); + CHECK_EQ_TYPE(tensor_r->getType(), dtype); + auto op_pair = ops.find(node_def.op()); + if (op_pair == ops.end()) + return tensorflow::errors::Unimplemented("binary op: " + node_def.op() + + " not supported at: " + + node_def.name()); -tensorflow::Status ConvertConv2DHelper( + nvinfer1::IElementWiseLayer* layer = ctx.network()->addElementWise( + *const_cast<nvinfer1::ITensor*>(tensor_l), + *const_cast<nvinfer1::ITensor*>(tensor_r), op_pair->second); + + nvinfer1::ITensor* output_tensor = layer->getOutput(0); + + // Pass the output + outputs->push_back(TRT_TensorOrWeights(output_tensor)); + return tensorflow::Status::OK(); +} + +tensorflow::Status ConvertPlaceholder( Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector<TRT_TensorOrWeights>& inputs, - std::vector<TRT_TensorOrWeights>* outputs, - int group // group ==0 specifies depthwise conv -) { - const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); + std::vector<TRT_TensorOrWeights> const& inputs, + std::vector<TRT_TensorOrWeights>* outputs) { + VLOG(2) << "Placeholder should have been replace already"; + return tensorflow::errors::Unimplemented(", cannot convert Placeholder op"); + // OK this make sense since we are supposed to replace it with input + TFAttrs attrs(node_def); + nvinfer1::DataType dtype = attrs.get<nvinfer1::DataType>("dtype"); + nvinfer1::Dims dims = attrs.get<nvinfer1::Dims>("shape"); + + dims.nbDims--; + for (int i = 0; i < dims.nbDims; i++) dims.d[i] = dims.d[i + 1]; + + nvinfer1::ITensor* output = + ctx.network()->addInput(node_def.name().c_str(), dtype, dims); + if (!output) { + return tensorflow::errors::InvalidArgument("Failed to create Input layer"); + } + outputs->push_back(TRT_TensorOrWeights(output)); + return tensorflow::Status::OK(); +} +tensorflow::Status ConvertConv2D(Converter& ctx, + const tensorflow::NodeDef& node_def, + const std::vector<TRT_TensorOrWeights>& inputs, + std::vector<TRT_TensorOrWeights>* outputs) { + nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); + // TODO(jie): handle NHWC/NCHW transpose; + TRT_ShapedWeights weights_rsck = inputs.at(1).weights(); + TRT_ShapedWeights weights = ctx.get_temp_weights_like(weights_rsck); + ReorderRSCKToKCRS(weights_rsck, &weights); + TRT_ShapedWeights biases(weights.type_); + int noutput = weights.shape_.d[0]; + nvinfer1::DimsHW kernel_size; + kernel_size.h() = weights.shape_.d[2]; + kernel_size.w() = weights.shape_.d[3]; TFAttrs attrs(node_def); int h_index = 2; @@ -1016,31 +874,11 @@ tensorflow::Status ConvertConv2DHelper( // TODO(jie): transpose it } - // tensor after transpose (NCHW) - auto tensor_dim = tensor->getDimensions(); - - int num_groups = group; - if (num_groups == 0) // depthwise convolution - num_groups = tensor_dim.d[0]; - VLOG(2) << "groups count: " << num_groups; - - TRT_ShapedWeights weights_rsck = inputs.at(1).weights(); - TRT_ShapedWeights weights = ctx.get_temp_weights_like(weights_rsck); - ReorderRSCKToKCRS(weights_rsck, &weights, num_groups); - TRT_ShapedWeights biases(weights.type_); - int noutput = weights.shape_.d[0] * num_groups; - nvinfer1::DimsHW kernel_size; - kernel_size.h() = weights.shape_.d[2]; - kernel_size.w() = weights.shape_.d[3]; - VLOG(2) << "kernel size: " << kernel_size.h() << ", " << kernel_size.w(); - // TODO(jie): stride. (NHWC/NCHW) auto tf_stride = attrs.get<std::vector<int>>("strides"); - VLOG(2) << "h_INDEX" << h_index << ", w_index " << w_index; - VLOG(2) << "stride!!!: " << tf_stride[0] << tf_stride[1] << tf_stride[2] - << tf_stride[3]; nvinfer1::DimsHW stride(tf_stride[h_index], tf_stride[w_index]); + auto tensor_dim = tensor->getDimensions(); std::vector<std::pair<int, int>> padding; // TODO(jie): padding. if (attrs.get<string>("padding") == "SAME") { @@ -1081,11 +919,10 @@ tensorflow::Status ConvertConv2DHelper( layer->setStride(stride); layer->setPadding({padding[0].first, padding[1].first}); layer->setName(node_def.name().c_str()); - layer->setNbGroups(num_groups); nvinfer1::ITensor* output_tensor = layer->getOutput(0); auto dim_after = output_tensor->getDimensions(); - VLOG(2) << "TENSOR out: " << dim_after.d[0] << ", " << dim_after.d[1] << ", " + VLOG(2) << "TENSOR out: " << dim_after.d[0] << ", " << dim_after.d[1] << dim_after.d[2] << ", " << dim_after.d[3]; if (data_format == "NHWC") { @@ -1098,101 +935,11 @@ tensorflow::Status ConvertConv2DHelper( return tensorflow::Status::OK(); } -tensorflow::Status ConvertConv2DHelper( - Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector<TRT_TensorOrWeights>& inputs, - std::vector<TRT_TensorOrWeights>* outputs, ConvolutionType type) { - switch (type) { - case ConvolutionType::DEFAULT: - return ConvertConv2DHelper(ctx, node_def, inputs, outputs, 1); - case ConvolutionType::DEPTHWISE_CONV: - return ConvertConv2DHelper(ctx, node_def, inputs, outputs, 0); - } - return tensorflow::errors::Unimplemented("unsupported convolution type at, " + - node_def.name()); -} - -tensorflow::Status BinaryTensorOpTensor( - Converter& ctx, const tensorflow::NodeDef& node_def, - const nvinfer1::ITensor* tensor_l, const nvinfer1::ITensor* tensor_r, - std::vector<TRT_TensorOrWeights>* outputs) { - static const std::unordered_map<string, nvinfer1::ElementWiseOperation> ops{ - {"Add", nvinfer1::ElementWiseOperation::kSUM}, - {"Mul", nvinfer1::ElementWiseOperation::kPROD}, - {"Sub", nvinfer1::ElementWiseOperation::kSUB}, - {"Div", nvinfer1::ElementWiseOperation::kDIV}, - }; - - // FIXME assume type matches input weights - // get trt type & shape - TFAttrs attrs(node_def); - // maybe this part has to be moved into the block of rsqrt later - nvinfer1::DataType dtype = attrs.get<nvinfer1::DataType>("T"); - - // check type consistency - CHECK_EQ_TYPE(tensor_l->getType(), dtype); - CHECK_EQ_TYPE(tensor_r->getType(), dtype); - auto op_pair = ops.find(node_def.op()); - if (op_pair == ops.end()) - return tensorflow::errors::Unimplemented("binary op: " + node_def.op() + - " not supported at: " + - node_def.name()); - - nvinfer1::IElementWiseLayer* layer = ctx.network()->addElementWise( - *const_cast<nvinfer1::ITensor*>(tensor_l), - *const_cast<nvinfer1::ITensor*>(tensor_r), op_pair->second); - - nvinfer1::ITensor* output_tensor = layer->getOutput(0); - - // pass the output - outputs->push_back(TRT_TensorOrWeights(output_tensor)); - return tensorflow::Status::OK(); -} - -tensorflow::Status ConvertPlaceholder( - Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector<TRT_TensorOrWeights>& inputs, - std::vector<TRT_TensorOrWeights>* outputs) { - VLOG(2) << "Placeholder should have been replace already"; - return tensorflow::errors::Unimplemented("cannot convert Placeholder op"); - // OK this make sense since we are supposed to replace it with input - TFAttrs attrs(node_def); - nvinfer1::DataType dtype = attrs.get<nvinfer1::DataType>("dtype"); - nvinfer1::Dims dims = attrs.get<nvinfer1::Dims>("shape"); - - dims.nbDims--; - for (int i = 0; i < dims.nbDims; i++) dims.d[i] = dims.d[i + 1]; - - nvinfer1::ITensor* output = - ctx.network()->addInput(node_def.name().c_str(), dtype, dims); - if (!output) { - return tensorflow::errors::InvalidArgument("Failed to create Input layer"); - } - outputs->push_back(TRT_TensorOrWeights(output)); - return tensorflow::Status::OK(); -} - -tensorflow::Status ConvertConv2D(Converter& ctx, - const tensorflow::NodeDef& node_def, - const std::vector<TRT_TensorOrWeights>& inputs, - std::vector<TRT_TensorOrWeights>* outputs) { - return ConvertConv2DHelper(ctx, node_def, inputs, outputs, - ConvolutionType::DEFAULT); -} - -tensorflow::Status ConvertConv2DDepthwise( - Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector<TRT_TensorOrWeights>& inputs, - std::vector<TRT_TensorOrWeights>* outputs) { - return ConvertConv2DHelper(ctx, node_def, inputs, outputs, - ConvolutionType::DEPTHWISE_CONV); -} - tensorflow::Status ConvertPool(Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector<TRT_TensorOrWeights>& inputs, + std::vector<TRT_TensorOrWeights> const& inputs, std::vector<TRT_TensorOrWeights>* outputs) { - const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); + nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); TFAttrs attrs(node_def); int h_index = 2; @@ -1210,8 +957,6 @@ tensorflow::Status ConvertPool(Converter& ctx, // TODO(jie): support other pooling type if (node_def.op() == "MaxPool") type = nvinfer1::PoolingType::kMAX; - else if (node_def.op() == "AvgPool") - type = nvinfer1::PoolingType::kAVERAGE; else return tensorflow::errors::Unimplemented("Only supports Max pool"); @@ -1274,9 +1019,9 @@ tensorflow::Status ConvertPool(Converter& ctx, tensorflow::Status ConvertActivation( Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector<TRT_TensorOrWeights>& inputs, + std::vector<TRT_TensorOrWeights> const& inputs, std::vector<TRT_TensorOrWeights>* outputs) { - const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); + nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); nvinfer1::IActivationLayer* layer = ctx.network()->addActivation( *const_cast<nvinfer1::ITensor*>(tensor), nvinfer1::ActivationType::kRELU); nvinfer1::ITensor* output_tensor = layer->getOutput(0); @@ -1286,14 +1031,14 @@ tensorflow::Status ConvertActivation( tensorflow::Status ConvertScale(Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector<TRT_TensorOrWeights>& inputs, + std::vector<TRT_TensorOrWeights> const& inputs, std::vector<TRT_TensorOrWeights>* outputs) { if (inputs.size() != 2 || !inputs.at(0).is_tensor() || !inputs.at(1).is_weights()) return tensorflow::errors::Unimplemented( "Only supports tensor op weight for now, at " + node_def.name()); // Implement tensor binaryOp weight [channel wise] for now; - const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); + nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); // TODO(jie): handle NHWC/NCHW transpose; TRT_ShapedWeights weights = inputs.at(1).weights(); @@ -1310,26 +1055,9 @@ tensorflow::Status ConvertScale(Converter& ctx, } else { VLOG(2) << "NCHW !!!!"; } - - auto dims = tensor->getDimensions(); - VLOG(2) << "tensor dimensions: " << dims.nbDims; - for (int i = 0; i < dims.nbDims; i++) { - VLOG(2) << "i: " << dims.d[i]; - } - dims = weights.shape_; - VLOG(2) << "tensor dimensions: " << dims.nbDims; - for (int i = 0; i < dims.nbDims; i++) { - VLOG(2) << "i: " << dims.d[i]; - } - - nvinfer1::ScaleMode mode = nvinfer1::ScaleMode::kCHANNEL; - if (weights.shape_.d[0] == 1) { - mode = nvinfer1::ScaleMode::kUNIFORM; - } - - nvinfer1::IScaleLayer* layer = - ctx.network()->addScale(*const_cast<nvinfer1::ITensor*>(tensor), mode, - weights, empty_weights, empty_weights); + nvinfer1::IScaleLayer* layer = ctx.network()->addScale( + *const_cast<nvinfer1::ITensor*>(tensor), nvinfer1::ScaleMode::kCHANNEL, + weights, empty_weights, empty_weights); nvinfer1::ITensor* output_tensor = layer->getOutput(0); if (data_format == "NHWC") { @@ -1344,7 +1072,7 @@ tensorflow::Status ConvertScale(Converter& ctx, tensorflow::Status ConvertConst(Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector<TRT_TensorOrWeights>& inputs, + std::vector<TRT_TensorOrWeights> const& inputs, std::vector<TRT_TensorOrWeights>* outputs) { const auto& weights_tensor = node_def.attr().at("value").tensor(); @@ -1363,144 +1091,20 @@ tensorflow::Status ConvertConst(Converter& ctx, VLOG(2) << "SCALAR!!!" << node_def.name(); nvinfer1::Dims scalar_shape; if (tensor.dims() > 0) { - VLOG(2) << "dimensions: " << tensor.dims(); - VLOG(2) << "size: " << weights_tensor.float_val_size(); - scalar_shape = GetTensorShape(tensor); - for (int i = 0; i < scalar_shape.nbDims; i++) - VLOG(2) << scalar_shape.d[i]; - if (GetShapeSize(scalar_shape) != weights_tensor.float_val_size()) { - if (weights_tensor.float_val_size() == 1 || - scalar_shape.d[0] == weights_tensor.float_val_size()) { - scalar_shape.nbDims = 1; - // no dimension provided. flatten it - scalar_shape.d[0] = weights_tensor.float_val_size(); - scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; - } else { - LOG(FATAL) << "Broadcast on weights only supports kCHANNEL and" - << " kUNIFORM, at: " << node_def.name(); - } - } - } else { VLOG(2) << "Dimensions: " << tensor.dims(); - scalar_shape.nbDims = 1; - // no dimension provided. flatten it - scalar_shape.d[0] = weights_tensor.float_val_size(); - scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; - for (int i = 1; i < nvinfer1::Dims::MAX_DIMS; i++) { - scalar_shape.d[i] = 0; - scalar_shape.type[i] = nvinfer1::DimensionType::kSPATIAL; - } - } - if (ctx.isFP16()) { - auto dtype_new = tensorflow::DataType::DT_HALF; - size_t len_data = tensorflow::DataTypeSize(dtype_new); - for (int i = 0; i < scalar_shape.nbDims; i++) - len_data *= scalar_shape.d[i]; - ctx.weight_store()->store_.push_back(std::vector<uint8_t>(len_data)); - void* dst = static_cast<void*>(&(ctx.weight_store()->store_.back()[0])); - tensorflow::Tensor temp_tensor(tensorflow::DT_HALF, tensor.shape()); - auto half_tensor = temp_tensor.flat<Eigen::half>(); - Eigen::DefaultDevice defd; - half_tensor.device(defd) = - tensor.flat<float>().template cast<Eigen::half>(); - memcpy(dst, half_tensor.data(), len_data); // store into weight store - weights = TRT_ShapedWeights(dtype_new, dst, scalar_shape); + weights = TRT_ShapedWeights(dtype, weights_tensor.float_val().data(), + GetTensorShape(tensor)); } else { - size_t len_data = tensorflow::DataTypeSize(dtype); - for (int i = 0; i < scalar_shape.nbDims; i++) - len_data *= scalar_shape.d[i]; - ctx.weight_store()->store_.push_back(std::vector<uint8_t>(len_data)); - void* dst = static_cast<void*>(&(ctx.weight_store()->store_.back()[0])); - std::vector<float> tensor_data( - weights_tensor.float_val().begin(), - weights_tensor.float_val() - .end()); // make a local copy first to flatten - memcpy(dst, tensor_data.data(), len_data); // store into weight store - weights = TRT_ShapedWeights(dtype, dst, scalar_shape); - } - } else if (!weights_tensor.int_val().empty()) { - VLOG(2) << "int!!!" << node_def.name(); - nvinfer1::Dims scalar_shape; - if (tensor.dims() > 0) { - VLOG(2) << "dimensions: " << tensor.dims(); - scalar_shape = GetTensorShape(tensor); - if (GetShapeSize(scalar_shape) != weights_tensor.int_val_size()) { - if (weights_tensor.int_val_size() == 1 || - scalar_shape.d[0] == weights_tensor.int_val_size()) { - scalar_shape.nbDims = 1; - // no dimension provided. flatten it - scalar_shape.d[0] = weights_tensor.int_val_size(); - scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; - } else { - LOG(FATAL) << "Broadcast on weights only supports kCHANNEL and" - << " kUNIFORM, at: " << node_def.name(); - } - } - } else { - VLOG(2) << "dimensions: " << tensor.dims(); + VLOG(2) << "Dimensions: " << tensor.dims(); scalar_shape.nbDims = 1; - // no dimension provided. flatten it - scalar_shape.d[0] = weights_tensor.int_val_size(); + scalar_shape.d[0] = 1; scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; for (int i = 1; i < nvinfer1::Dims::MAX_DIMS; i++) { scalar_shape.d[i] = 0; scalar_shape.type[i] = nvinfer1::DimensionType::kSPATIAL; } - } - if (ctx.isFP16()) { - auto dtype_new = tensorflow::DataType::DT_HALF; - size_t len_data = tensorflow::DataTypeSize(dtype_new); - for (int i = 0; i < scalar_shape.nbDims; i++) - len_data *= scalar_shape.d[i]; - ctx.weight_store()->store_.push_back(std::vector<uint8_t>(len_data)); - void* dst = static_cast<void*>(&(ctx.weight_store()->store_.back()[0])); - tensorflow::Tensor temp_tensor(tensorflow::DT_HALF, tensor.shape()); - TTypes<Eigen::half>::Flat half_tensor = temp_tensor.flat<Eigen::half>(); - Eigen::DefaultDevice defd; - switch (dtype) { - case (tensorflow::DT_INT32): { - half_tensor.device(defd) = - tensor.flat<int32>().template cast<Eigen::half>(); - break; - } - case (tensorflow::DT_INT16): { - half_tensor.device(defd) = - tensor.flat<int16>().template cast<Eigen::half>(); - break; - } - case (tensorflow::DT_INT8): { - half_tensor.device(defd) = - tensor.flat<int8>().template cast<Eigen::half>(); - break; - } - case (tensorflow::DT_UINT8): { - half_tensor.device(defd) = - tensor.flat<uint8>().template cast<Eigen::half>(); - break; - } - default: - return tensorflow::errors::InvalidArgument( - "Datatype " + tensorflow::DataTypeString(dtype) + - " for FP16 conversion"); - break; - }; - memcpy(dst, half_tensor.data(), len_data); // store into weight store - weights = TRT_ShapedWeights(dtype_new, dst, scalar_shape); - } else { - size_t len_data = tensorflow::DataTypeSize(dtype); - for (int i = 0; i < scalar_shape.nbDims; i++) - len_data *= scalar_shape.d[i]; - size_t len_tensor = weights_tensor.int_val_size() * sizeof(int32); - len_data = std::max(len_data, len_tensor); - ctx.weight_store()->store_.push_back(std::vector<uint8_t>(len_data)); - void* dst = static_cast<void*>(&(ctx.weight_store()->store_.back()[0])); - std::vector<int32> tensor_data( - weights_tensor.int_val().begin(), - weights_tensor.int_val() - .end()); // make a local copy first to flatten - // doesn't have to be contigous - memcpy(dst, tensor_data.data(), len_tensor); // store into weight store - weights = TRT_ShapedWeights(dtype, dst, scalar_shape); + weights = TRT_ShapedWeights(dtype, weights_tensor.float_val().data(), + scalar_shape); } } else if (!weights_tensor.tensor_content().empty()) { VLOG(2) << "TENSOR!!!" << node_def.name(); @@ -1526,7 +1130,7 @@ tensorflow::Status ConvertConst(Converter& ctx, tensorflow::Status ConvertIdentity( Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector<TRT_TensorOrWeights>& inputs, + std::vector<TRT_TensorOrWeights> const& inputs, std::vector<TRT_TensorOrWeights>* outputs) { outputs->push_back(inputs.at(0)); return tensorflow::Status::OK(); @@ -1534,7 +1138,7 @@ tensorflow::Status ConvertIdentity( tensorflow::Status ConvertBinary(Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector<TRT_TensorOrWeights>& inputs, + std::vector<TRT_TensorOrWeights> const& inputs, std::vector<TRT_TensorOrWeights>* outputs) { if (inputs.size() != 2) return tensorflow::errors::FailedPrecondition( @@ -1561,7 +1165,7 @@ tensorflow::Status ConvertBinary(Converter& ctx, tensorflow::Status ConvertUnary(Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector<TRT_TensorOrWeights>& inputs, + std::vector<TRT_TensorOrWeights> const& inputs, std::vector<TRT_TensorOrWeights>* outputs) { if (inputs.size() != 1) return tensorflow::errors::FailedPrecondition( @@ -1579,7 +1183,7 @@ tensorflow::Status ConvertUnary(Converter& ctx, tensorflow::Status ConvertReduce(Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector<TRT_TensorOrWeights>& inputs, + std::vector<TRT_TensorOrWeights> const& inputs, std::vector<TRT_TensorOrWeights>* outputs) { if (inputs.size() != 2 || !inputs.at(0).is_tensor() || !inputs.at(1).is_weights()) @@ -1587,7 +1191,7 @@ tensorflow::Status ConvertReduce(Converter& ctx, "Input expects tensor and weights, at" + node_def.name()); // Implement tensor binaryOp weight [channel wise] for now; - const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); + nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); auto dims = tensor->getDimensions(); // Restore implicit batch dimension int nb_dims = dims.nbDims + 1; @@ -1625,7 +1229,6 @@ tensorflow::Status ConvertReduce(Converter& ctx, return tensorflow::errors::InvalidArgument("TRT cannot reduce at 0, at" + node_def.name()); if (index_list_data[i] == 1) permuted_index = 1; - idx_set.emplace(index_list_data[i]); } @@ -1633,7 +1236,7 @@ tensorflow::Status ConvertReduce(Converter& ctx, nvinfer1::DimsHW pool_kernel; if (permuted_index == 1) { for (int i = 2; i < nb_dims; i++) { - if (idx_set.count(i) == 0) { + if (idx_set.count(i)) { permuted_index = i; break; } @@ -1668,13 +1271,12 @@ tensorflow::Status ConvertReduce(Converter& ctx, output_tensor = ctx.TransposeTensor( const_cast<nvinfer1::ITensor*>(output_tensor), permutation_order); } - outputs->push_back(TRT_TensorOrWeights(output_tensor)); return tensorflow::Status::OK(); } tensorflow::Status ConvertPad(Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector<TRT_TensorOrWeights>& inputs, + std::vector<TRT_TensorOrWeights> const& inputs, std::vector<TRT_TensorOrWeights>* outputs) { if (inputs.size() != 2 || !inputs.at(0).is_tensor() || !inputs.at(1).is_weights()) @@ -1682,7 +1284,7 @@ tensorflow::Status ConvertPad(Converter& ctx, "Input expects tensor and weights, at" + node_def.name()); // Implement tensor binaryOp weight [channel wise] for now; - const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); + nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); auto dims = tensor->getDimensions(); // Restore implicit batch dimension int nb_dims = dims.nbDims + 1; @@ -1769,287 +1371,19 @@ tensorflow::Status ConvertPad(Converter& ctx, return tensorflow::Status::OK(); } -tensorflow::Status ConvertConcat(Converter& ctx, - const tensorflow::NodeDef& node_def, - const std::vector<TRT_TensorOrWeights>& inputs, - std::vector<TRT_TensorOrWeights>* outputs) { - // not including the last input (axis) here - int input_size = static_cast<int>(inputs.size()) - 1; - - if (!inputs.at(0).is_tensor()) - return tensorflow::errors::InvalidArgument( - "Concat in TRT support only Tensor input, at " + node_def.name()); - - // We are retrieving the axis - TRT_ShapedWeights axis = inputs.at(input_size).weights(); - - TFAttrs attrs(node_def); - // auto attr_size = attrs.at("N")->i(); - // auto data_type = attrs.get<nvinfer1::DataType>("T"); - auto index_type = attrs.get<tensorflow::DataType>("Tidx"); - - // TODO(jie): handle data type - // Only expect to handle INT32 as index attributes for now - if (index_type != tensorflow::DataType::DT_INT32) - return tensorflow::errors::Unimplemented( - "Tidx supports only DT_INT32, at " + node_def.name()); - - int index = *(static_cast<int*>(const_cast<void*>(axis.GetValues()))); - - // TODO(jie): early termination with no-op (attr_size==1) - - auto dim = inputs.at(0).tensor()->getDimensions(); - // dimension check - if (index > dim.nbDims + 1) - return tensorflow::errors::InvalidArgument( - "Concatenate on axis out of dimension range, at " + node_def.name()); - - if (index == 0) - return tensorflow::errors::InvalidArgument( - "Concatenate on batch dimension not supported, at " + node_def.name()); - - // incase we need permutation; - std::vector<int> permutation_order(dim.nbDims + 1); - - for (int i = 0; i < dim.nbDims + 1; i++) permutation_order[i] = i; - - if (index != 1) { - permutation_order[1] = index - 1; - permutation_order[index - 1] = 1; - } - - std::vector<nvinfer1::ITensor const*> inputs_vec; - // Shap chack (all input tensor should have same shape) - // starting from 0 since we are probably also doing transpose here; - for (int i = 0; i < input_size; i++) { - auto tensor_i = inputs.at(i).tensor(); - auto dim_i = tensor_i->getDimensions(); - if (dim_i.nbDims != dim.nbDims) - return tensorflow::errors::InvalidArgument( - "Concatenate receives inputs with inconsistent dimensions, at " + - node_def.name()); - - for (int j = 0; j < dim.nbDims; j++) { - // check dimension consistency on non-concatenate axis - if (j != index - 1 && dim_i.d[j] != dim.d[j]) - return tensorflow::errors::InvalidArgument( - "Concatenate receives inputs with inconsistent shape, at" + - node_def.name()); - } - - // TRT does concatenation only on channel! - if (index != 1) - tensor_i = ctx.TransposeTensor(const_cast<nvinfer1::ITensor*>(tensor_i), - permutation_order); - - inputs_vec.push_back(tensor_i); - } - - // nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); - nvinfer1::IConcatenationLayer* layer = ctx.network()->addConcatenation( - const_cast<nvinfer1::ITensor* const*>(inputs_vec.data()), - inputs_vec.size()); - nvinfer1::ITensor* output_tensor = layer->getOutput(0); - - if (index != 1) { - output_tensor = ctx.TransposeTensor(output_tensor, permutation_order); - } - outputs->push_back(TRT_TensorOrWeights(output_tensor)); - return tensorflow::Status::OK(); -} - -tensorflow::Status ConvertFusedBatchNorm( - Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector<TRT_TensorOrWeights>& inputs, - std::vector<TRT_TensorOrWeights>* outputs) { - TFAttrs attrs(node_def); - float epsilon = attrs.get<float>("epsilon"); - auto data_format = attrs.get<string>("data_format"); - if (data_format != "NCHW") { - return tensorflow::errors::Unimplemented( - "only data_format=NCHW is supported, at " + node_def.name()); - } - bool is_training = attrs.get<bool>("is_training"); - if (is_training) { - return tensorflow::errors::Unimplemented( - "only is_training=false is supported, at " + node_def.name()); - } - nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); - TRT_ShapedWeights scale_weights = inputs.at(1).weights(); - TRT_ShapedWeights offset_weights = inputs.at(2).weights(); - TRT_ShapedWeights mean_weights = inputs.at(3).weights(); - TRT_ShapedWeights variance_weights = inputs.at(4).weights(); - TRT_ShapedWeights dummy_power_weights(scale_weights.type_); - TRT_ShapedWeights combined_scale_weights = - ctx.get_temp_weights_like(scale_weights); - TRT_ShapedWeights combined_offset_weights = - ctx.get_temp_weights_like(offset_weights); - size_t nweight = scale_weights.count(); - if ((scale_weights.type_ == offset_weights.type_) && - (mean_weights.type_ == variance_weights.type_) && - (scale_weights.type_ == variance_weights.type_)) { - if ((scale_weights.type_ != tensorflow::DataType::DT_FLOAT) && - (scale_weights.type_ != tensorflow::DataType::DT_HALF)) { - return tensorflow::errors::Unimplemented( - "only float32 or float16 weight data type is supported, for node " + - node_def.name() + " got " + - tensorflow::DataTypeString(scale_weights.type_)); - } - if (scale_weights.type_ == tensorflow::DT_FLOAT) { - for (size_t i = 0; i < nweight; ++i) { - float scale = (static_cast<float const*>(scale_weights.GetValues()))[i]; - float offset = - (static_cast<float const*>(offset_weights.GetValues()))[i]; - float mean = (static_cast<float const*>(mean_weights.GetValues()))[i]; - float variance = - (static_cast<float const*>(variance_weights.GetValues()))[i]; - float& combined_scale_ref = const_cast<float*>( - static_cast<float const*>(combined_scale_weights.GetValues()))[i]; - float& combined_offset_ref = const_cast<float*>( - static_cast<float const*>(combined_offset_weights.GetValues()))[i]; - combined_scale_ref = scale / sqrtf(variance + epsilon); - combined_offset_ref = offset - mean * combined_scale_ref; - } - } else { - const Eigen::half* scale_vals = - (static_cast<Eigen::half const*>(scale_weights.GetValues())); - const Eigen::half* off_vals = - (static_cast<Eigen::half const*>(offset_weights.GetValues())); - const Eigen::half* mean_vals = - (static_cast<Eigen::half const*>(mean_weights.GetValues())); - const Eigen::half* variance_vals = - (static_cast<Eigen::half const*>(variance_weights.GetValues())); - Eigen::half* comb_scale_vals = const_cast<Eigen::half*>( - static_cast<Eigen::half const*>(combined_scale_weights.GetValues())); - Eigen::half* comb_off_vals = const_cast<Eigen::half*>( - static_cast<Eigen::half const*>(combined_offset_weights.GetValues())); - for (size_t i = 0; i < nweight; ++i) { - float scale(scale_vals[i]); - float offset(off_vals[i]); - float mean(mean_vals[i]); - float variance(variance_vals[i]); - float combined_scale_ref = scale / sqrtf(variance + epsilon); - comb_scale_vals[i] = Eigen::half(combined_scale_ref); - float combined_offset_ref = offset - mean * combined_scale_ref; - comb_off_vals[i] = Eigen::half(combined_offset_ref); - } - } - } - nvinfer1::IScaleLayer* layer = ctx.network()->addScale( - *const_cast<nvinfer1::ITensor*>(tensor), nvinfer1::ScaleMode::kCHANNEL, - combined_offset_weights.GetWeightsForTRT(), - combined_scale_weights.GetWeightsForTRT(), - dummy_power_weights.GetWeightsForTRT()); - nvinfer1::ITensor* output_tensor = layer->getOutput(0); - outputs->push_back(TRT_TensorOrWeights(output_tensor)); - return tensorflow::Status::OK(); -} - -tensorflow::Status ConvertMatMul(Converter& ctx, - const tensorflow::NodeDef& node_def, - const std::vector<TRT_TensorOrWeights>& inputs, - std::vector<TRT_TensorOrWeights>* outputs) { - const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); - - // TODO(jie): transpose! - TFAttrs attrs(node_def); - - TRT_ShapedWeights weights_ck = inputs.at(1).weights(); - TRT_ShapedWeights weights = ctx.get_temp_weights_like(weights_ck); - ReorderCKtoKC(weights_ck, &weights); - TRT_ShapedWeights biases(weights.type_); - - int noutput = weights.shape_.d[0]; - - nvinfer1::IFullyConnectedLayer* layer = ctx.network()->addFullyConnected( - *const_cast<nvinfer1::ITensor*>(tensor), noutput, weights, biases); - - nvinfer1::ITensor* output_tensor = layer->getOutput(0); - outputs->push_back(TRT_TensorOrWeights(output_tensor)); - return tensorflow::Status::OK(); -} - -tensorflow::Status ConvertReshape( - Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector<TRT_TensorOrWeights>& inputs, - std::vector<TRT_TensorOrWeights>* outputs) { - if (inputs.size() != 2 || !inputs.at(0).is_tensor() || - !inputs.at(1).is_weights()) - return tensorflow::errors::InvalidArgument( - "Input expects tensor and weights, at" + node_def.name()); - - // implement tensor binaryOp weight [channel wise] for now; - const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); - auto dims = tensor->getDimensions(); - // restore implicit batch dimension - - TRT_ShapedWeights shape = inputs.at(1).weights(); - - TFAttrs attrs(node_def); - - auto padding_type = attrs.get<tensorflow::DataType>("Tshape"); - - if (shape.shape_.nbDims != 1) - return tensorflow::errors::InvalidArgument( - "reshape new shape is not 1 dimensional, at " + node_def.name()); - - // Only expect to handle INT32 as attributes for now - if (padding_type != tensorflow::DataType::DT_INT32) - return tensorflow::errors::Unimplemented( - "reshape new shape supports only DT_INT32, at " + node_def.name()); - - auto shape_data = static_cast<int*>(const_cast<void*>(shape.GetValues())); - - if (shape_data[0] != -1) - return tensorflow::errors::InvalidArgument( - "reshape new shape first dimension is not -1, at " + node_def.name()); - - auto shape_num_dims = shape.shape_.d[0]; - VLOG(2) << "shape dimensions: " << shape_num_dims; - int volume_w = 1; - for (int i = 1; i < shape.shape_.d[0]; i++) volume_w *= shape_data[i]; - - int volume_t = 1; - for (int i = 0; i < dims.nbDims; i++) volume_t *= dims.d[i]; - - VLOG(2) << "volume: " << volume_t << " volume weights: " << volume_w; - if (volume_w != volume_t) - return tensorflow::errors::InvalidArgument( - "volume does not agree between tensor and new shape, at " + - node_def.name()); - - nvinfer1::IShuffleLayer* layer = - ctx.network()->addShuffle(*const_cast<nvinfer1::ITensor*>(tensor)); - - nvinfer1::Dims reshape_dims; - VLOG(2) << "new dimension: " << shape_num_dims - 1; - reshape_dims.nbDims = shape_num_dims - 1; - for (int32_t i = 0; i < reshape_dims.nbDims; ++i) { - reshape_dims.d[i] = shape_data[i + 1]; - } - layer->setReshapeDimensions(reshape_dims); - VLOG(2) << "new dimension: " << shape_num_dims - 1; - - nvinfer1::ITensor* output_tensor = layer->getOutput(0); - auto dims_output = output_tensor->getDimensions(); - VLOG(2) << "output tensor dimension:" << dims_output.nbDims; - outputs->push_back(TRT_TensorOrWeights(output_tensor)); - return tensorflow::Status::OK(); -} - void Converter::register_op_converters() { // vgg_16 slim implementation op_registry_["Placeholder"] = ConvertPlaceholder; op_registry_["Conv2D"] = ConvertConv2D; - op_registry_["DepthwiseConv2dNative"] = ConvertConv2DDepthwise; op_registry_["Relu"] = ConvertActivation; op_registry_["MaxPool"] = ConvertPool; - op_registry_["AvgPool"] = ConvertPool; // This could be really handled as ConvertBinary op_registry_["BiasAdd"] = ConvertScale; op_registry_["Const"] = ConvertConst; + // op_registry_["MatMul"] = ConvertFullyConnected; // Not used in vgg // TODO(ben,jie): this is a temp hack. op_registry_["Identity"] = ConvertIdentity; // Identity should be removed + // op_registry_["AvgPool"] = ConvertPool; // resnet_50_v1 slim implementation op_registry_["Add"] = ConvertBinary; @@ -2059,364 +1393,26 @@ void Converter::register_op_converters() { op_registry_["Mean"] = ConvertReduce; op_registry_["Pad"] = ConvertPad; // TODO(ben,jie): Add more ops - - op_registry_["ConcatV2"] = ConvertConcat; - op_registry_["MatMul"] = ConvertMatMul; - op_registry_["Reshape"] = ConvertReshape; - op_registry_["FusedBatchNorm"] = ConvertFusedBatchNorm; - op_registry_["FusedBatchNormV2"] = ConvertFusedBatchNorm; } } // namespace -tensorflow::Status GetTensorRTGraph(tensorrt::convert::SubGraphParams& s) { - return tensorflow::errors::Unimplemented("Not implemented yet"); -} -tensorflow::Status ConvertCalibrationNodeToEngineNode( - tensorflow::Graph& graph, tensorflow::Node* c_node) { - const auto ndef = c_node->def(); - - TFAttrs attrs(ndef); - std::vector<string> segment_nodes( - attrs.get<std::vector<string>>("segment_nodes")); - std::vector<string> output_nodes( - attrs.get<std::vector<string>>("segment_output_names")); - std::vector<string> input_names( - attrs.get<std::vector<string>>("input_names")); - string res_name = attrs.get<string>("resource_name"); - VLOG(1) << "Node name " << c_node->name() << " res_name " << res_name; - string engine_name = "my_trt_op"; - { - const auto node_id = tensorflow::str_util::Split(res_name, "_"); - engine_name += node_id.back(); - } - std::map<string, tensorflow::Node*> node_maps; - - for (auto n : graph.op_nodes()) { - node_maps.insert({n->name(), n}); - } - VLOG(1) << "Output Nodes:"; - std::vector<tensorflow::DataType> out_types; - std::vector<const tensorflow::Edge*> out_edges; - for (auto& i : output_nodes) { - auto node_port = tensorflow::str_util::Split(i, ":"); - VLOG(1) << " " << i << " in graph " << node_maps.count(i); - auto out_node_name = node_port.at(0); - if (node_port.size() > 1) { - VLOG(1) << "Multi port output" << node_port.at(0) << " " - << node_port.at(1) << " size=" << node_port.size(); - } - auto node_it = node_maps.find(out_node_name); - if (node_it != node_maps.end()) { - tensorflow::Node* out_node = node_it->second; - int port = 0; - if (node_port.size() == 2) { - port = std::strtoul(node_port.at(1).c_str(), nullptr, 10); - out_types.push_back(out_node->output_type(port)); - } else { - out_types.push_back(out_node->output_type(0)); - } - for (auto out_edge : out_node->out_edges()) { - if (out_edge->src_output() == port) { - out_edges.push_back(out_edge); - break; - } - } - } else { - LOG(WARNING) << " couldn't find output node " << out_node_name; - } - } - VLOG(1) << "Input Nodes:"; - for (auto& i : input_names) { - VLOG(1) << " " << i << " in graph " << node_maps.count(i); - } - auto trt_rm = tensorflow::tensorrt::TRTResourceManager::instance(); - auto resmgr = trt_rm->getManager("TRTCalibOps"); - tensorflow::tensorrt::TRTCalibrationResource* calib_res = nullptr; - auto status = resmgr->Lookup(res_name, res_name, &calib_res); - if (!status.ok() || !calib_res->calibrator_) { - return tensorflow::errors::FailedPrecondition( - "You must run calibration" - " and inference conversion in the same proces"); - } - - calib_res->calibrator_->setDone(); - calib_res->thr_->join(); - delete calib_res->thr_; - if (!calib_res->engine_) { - LOG(FATAL) << "Calibration failed!, engine is nullptr. Did you run " - "calibration graph?"; - } - auto weight_rmgr = trt_rm->getManager("WeightStore"); - TF_CHECK_OK(weight_rmgr->Delete<tensorflow::tensorrt::TRTWeightStore>( - res_name, res_name)); - auto engine_plan = calib_res->engine_->serialize(); - calib_res->engine_->destroy(); - calib_res->network_->destroy(); - calib_res->builder_->destroy(); - calib_res->thr_ = nullptr; - calib_res->engine_ = nullptr; - calib_res->builder_ = nullptr; - tensorflow::NodeDefBuilder op_builder(engine_name, "TRTEngineOp"); - std::vector<tensorflow::NodeDefBuilder::NodeOut> income_edges; - for (const auto in_edge : c_node->in_edges()) { - auto src = in_edge->src(); - int dest_port = in_edge->dst_input(); - income_edges.emplace_back(src->name(), in_edge->src_output(), - c_node->input_type(dest_port)); - } - tensorflow::gtl::ArraySlice<tensorflow::NodeDefBuilder::NodeOut> input_list( - income_edges); - op_builder.Input(input_list); - tensorflow::NodeDef engine_node; - const char* engine_plan_data = static_cast<const char*>(engine_plan->data()); - string engine_plan_string(engine_plan_data, - engine_plan_data + engine_plan->size()); - status = op_builder.Attr("serialized_engine", engine_plan_string) - .Attr("input_nodes", input_names) - .Attr("output_nodes", output_nodes) - .Attr("OutT", out_types) - .Finalize(&engine_node); - if (!status.ok()) { - LOG(ERROR) << "Engine Node creation failed"; - return status; - } - auto trt_engine_node = graph.AddNode(engine_node, &status); - TF_CHECK_OK(status); - for (size_t i = 0; i < out_edges.size(); i++) { - VLOG(1) << "Connecting trt_engine_node output " << i << " with " - << out_edges.at(i)->dst()->name() << " port " - << out_edges.at(i)->dst_input(); - TF_RETURN_IF_ERROR(graph.UpdateEdge(trt_engine_node, i, - out_edges.at(i)->dst(), - out_edges.at(i)->dst_input())); - } - VLOG(1) << "Segment nodes:"; - for (auto& i : segment_nodes) { - VLOG(1) << " " << i << " in graph " << node_maps.count(i); - auto it = node_maps.find(i); - if (it != node_maps.end()) { - graph.RemoveNode(it->second); - } - } - graph.RemoveNode(c_node); - return tensorflow::Status::OK(); -} - -tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { - // Visit nodes in reverse topological order and construct the TRT network. - - // Toposort - std::vector<tensorflow::Node*> order_vec; - tensorflow::GetPostOrder(s.graph, &order_vec); - // Select just the subgraph - std::list<tensorflow::Node*> order; - for (tensorflow::Node* node : order_vec) { - if (s.subgraph_node_ids.count(node->id())) { - order.push_front(node); // we want topological order to contstruct the - // network layer by layer - } - } - // topological order is needed to build TRT network - static int static_id = 0; - string subgraph_name_scope; - if (!order.empty()) { - subgraph_name_scope = order.front()->name(); - } - for (const tensorflow::Node* node : order) { - subgraph_name_scope = GetCommonNameScope(subgraph_name_scope, node->name()); - } - // TODO(sami,ben,jie): proper naming! - string calib_op_name = - StrCat(subgraph_name_scope, "my_trt_calib_op_", static_id); - string engine_name = StrCat(subgraph_name_scope, "my_trt_op", static_id); - static_id++; - auto trt_rmgr = tensorflow::tensorrt::TRTResourceManager::instance(); - auto op_rmgr = trt_rmgr->getManager("TRTCalibOps"); - auto op_res = new tensorflow::tensorrt::TRTCalibrationResource(); - TF_CHECK_OK(op_rmgr->Create(calib_op_name, calib_op_name, op_res)); - op_res->logger_ = new tensorflow::tensorrt::Logger(); - op_res->builder_ = nvinfer1::createInferBuilder(*(op_res->logger_)); - - if (!op_res->builder_) { - return tensorflow::errors::Internal( - "failed to create TensorRT builder object"); - } - - op_res->network_ = op_res->builder_->createNetwork(); - if (!op_res->network_) { - return tensorflow::errors::Internal( - "failed to create TensorRT network object"); - } - - // Build the network - auto weight_rmgr = trt_rmgr->getManager("WeightStore"); - auto ws = new tensorflow::tensorrt::TRTWeightStore(); - TF_CHECK_OK(weight_rmgr->Create(calib_op_name, calib_op_name, ws)); - Converter converter(op_res->network_, ws, s.precision_mode == FP16MODE); - std::vector<string> input_names; - std::vector<tensorflow::DataType> input_dtypes; - for (const std::pair<int, int>& input : s.input_inds) { - VLOG(2) << "parsing input. Node id= " << input.first; - int node_id = input.first; - int output_idx = input.second; - tensorflow::Node* node = s.graph.FindNodeId(node_id); - auto node_name = node->name(); - input_names.push_back(node_name); // insert original node name without port - // TODO(jie): alternative :) - if (!s.graph_properties.HasOutputProperties(node_name)) - return tensorflow::errors::Internal("failed to find input node: " + - node_name); - - auto op_info_vec = s.graph_properties.GetOutputProperties(node_name); - if (static_cast<int>(op_info_vec.size()) < output_idx) - return tensorflow::errors::Internal( - "accessing output index of: ", output_idx, ", at node: ", node_name, - "with output entry from shape_map: ", op_info_vec.size()); - - auto op_info = op_info_vec.at(output_idx); - - tensorflow::DataType tf_dtype = op_info.dtype(); - input_dtypes.push_back(tf_dtype); - - nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); - TF_CHECK_OK(ConvertDType(tf_dtype, &dtype)); - - VLOG(2) << "accessing output index of: " << output_idx - << ", at node: " << node_name - << "with output entry from shape_map: " << op_info_vec.size(); - - // TODO(ben,jie): update TRT input format/dimension - nvinfer1::DimsCHW input_dim_psuedo_chw; - for (int i = 0; i < 3; i++) input_dim_psuedo_chw.d[i] = 1; - - for (int i = 1; i < op_info.shape().dim_size(); i++) { - VLOG(2) << "dimension: " << i - << " , size: " << op_info.shape().dim(i).size(); - input_dim_psuedo_chw.d[i - 1] = op_info.shape().dim(i).size(); - } - - // TODO(ben,jie): proper way to restore input tensor name? - auto input_tensor_name = node_name; - if (output_idx != 0) input_tensor_name = StrCat(node_name, ":", output_idx); - - nvinfer1::ITensor* input_tensor = converter.network()->addInput( - input_tensor_name.c_str(), dtype, input_dim_psuedo_chw); - - if (!input_tensor) - return tensorflow::errors::InvalidArgument( - "Failed to create Input layer"); - VLOG(2) << "input tensor name :" << input_tensor_name; - - if (!converter.insert_input_tensor(input_tensor_name, input_tensor)) - return tensorflow::errors::AlreadyExists( - "output tensor already exists for op: " + input_tensor_name); - } - - VLOG(2) << "finished sorting"; - - for (const tensorflow::Node* node : order) { - const tensorflow::NodeDef& node_def = node->def(); - VLOG(2) << "converting node: " << node_def.name() << " , " << node_def.op(); - TF_RETURN_IF_ERROR(converter.convert_node(node_def)); - } - - VLOG(2) << "finished conversion"; - - // Gather output metadata - std::vector<string> output_names; - std::vector<tensorflow::DataType> output_dtypes; - int trt_engine_op_output_idx = 0; - for (const std::pair<int, int>& output : s.output_inds) { - int node_id = output.first; - int output_idx = output.second; - tensorflow::Node* node = s.graph.FindNodeId(node_id); - string op_name = node->name(); - string tensor_name = op_name; - - s.output_edge_map->insert( - {trt_engine_op_output_idx == 0 - ? engine_name - : StrCat(engine_name, ":", trt_engine_op_output_idx), - {output_idx, tensor_name}}); - trt_engine_op_output_idx++; - if (output_idx != 0) { - tensor_name = StrCat(tensor_name, ":", output_idx); - } - VLOG(1) << "output tensor name: " << tensor_name; - output_names.push_back(tensor_name); - auto tensor_or_weights = converter.get_tensor(tensor_name); - if (!tensor_or_weights.is_tensor()) { - return tensorflow::errors::InvalidArgument( - "Output node is weights not tensor"); - } - nvinfer1::ITensor* tensor = tensor_or_weights.tensor(); - if (!tensor) { - return tensorflow::errors::NotFound("Output tensor not found: " + - tensor_name); - } - converter.network()->markOutput(*tensor); - tensorflow::DataType tf_dtype = node->output_type(output_idx); - output_dtypes.push_back(tf_dtype); - nvinfer1::DataType trt_dtype = nvinfer1::DataType::kFLOAT; - TF_RETURN_IF_ERROR(ConvertDType(tf_dtype, &trt_dtype)); - tensor->setType(trt_dtype); - } - - VLOG(2) << "finished output"; - - // Build the engine - op_res->builder_->setMaxBatchSize(s.max_batch_size); - op_res->builder_->setMaxWorkspaceSize(s.max_workspace_size_bytes); - - // Build the TRT op - // TODO(sami,ben,jie): proper naming! - tensorflow::NodeDefBuilder op_builder(calib_op_name, "TRTCalibOp"); - std::vector<tensorflow::NodeDefBuilder::NodeOut> income_edges; - for (size_t i = 0; i < input_names.size(); ++i) { - int output_idx = s.input_inds.at(i).second; - // we wired up the input here already, it is redundant to do it again in - // ConvertSubGraphToTensorRT(convert_graph.cc) - auto incoming_edge = tensorflow::NodeDefBuilder::NodeOut( - input_names.at(i), output_idx, input_dtypes.at(i)); - VLOG(1) << calib_op_name << " input " << i << " = " << input_names.at(i) - << ":" << output_idx - << " dType= " << tensorflow::DataTypeString(input_dtypes.at(i)); - income_edges.push_back(incoming_edge); - } - tensorflow::gtl::ArraySlice<tensorflow::NodeDefBuilder::NodeOut> input_list( - income_edges); - op_builder.Input(input_list); - std::vector<string> segment_names; - segment_names.reserve(s.subgraph_node_ids.size()); - for (int i : s.subgraph_node_ids) { - auto node = s.graph.FindNodeId(i); - segment_names.push_back(node->name()); - } - LOG(INFO) << "finished op preparation"; - - auto status = op_builder.Attr("segment_nodes", segment_names) - .Attr("input_names", input_names) - .Attr("segment_output_names", output_names) - .Attr("resource_name", calib_op_name) - .Finalize(s.trt_node); - - LOG(INFO) << status.ToString(); - LOG(INFO) << "finished op building"; - - return tensorflow::Status::OK(); -} tensorflow::Status ConvertSubGraphToTensorRTNodeDef( - tensorrt::convert::SubGraphParams& s) { + const tensorflow::Graph& graph, const std::set<int>& subgraph_node_ids, + const std::vector<std::pair<int, int>>& input_inds, + const std::vector<std::pair<int, int>>& output_inds, size_t max_batch_size, + size_t max_workspace_size_bytes, + const tensorflow::grappler::GraphProperties& graph_properties, + tensorflow::NodeDef* trt_node) { // Visit nodes in reverse topological order and construct the TRT network. // Toposort std::vector<tensorflow::Node*> order_vec; - tensorflow::GetPostOrder(s.graph, &order_vec); + tensorflow::GetPostOrder(graph, &order_vec); // Select just the subgraph std::list<tensorflow::Node*> order; for (tensorflow::Node* node : order_vec) { - if (s.subgraph_node_ids.count(node->id())) { + if (subgraph_node_ids.count(node->id())) { // We want topological order to contstruct the // network layer by layer order.push_front(node); @@ -2438,86 +1434,46 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( "Failed to create TensorRT network object"); } - string subgraph_name_scope; - if (!order.empty()) { - subgraph_name_scope = order.front()->name(); - } - for (const tensorflow::Node* node : order) { - subgraph_name_scope = GetCommonNameScope(subgraph_name_scope, node->name()); - } - static int static_id = 0; - // TODO(sami,ben,jie): proper naming! - string engine_name = StrCat(subgraph_name_scope, "my_trt_op"); - engine_name = StrCat(engine_name, static_id++); - auto trt_rmgr = tensorflow::tensorrt::TRTResourceManager::instance(); - auto weight_rmgr = trt_rmgr->getManager("WeightStore"); - auto ws = new tensorflow::tensorrt::TRTWeightStore(); - TF_CHECK_OK(weight_rmgr->Create(engine_name, engine_name, ws)); - // Build the network - Converter converter(trt_network.get(), ws, s.precision_mode == FP16MODE); + Converter converter(trt_network.get()); std::vector<string> input_names; std::vector<tensorflow::DataType> input_dtypes; - for (const std::pair<int, int>& input : s.input_inds) { - VLOG(2) << "parsing input!!!!!"; + for (std::pair<int, int> const& input : input_inds) { int node_id = input.first; int output_idx = input.second; - tensorflow::Node* node = s.graph.FindNodeId(node_id); + tensorflow::Node* node = graph.FindNodeId(node_id); auto node_name = node->name(); - // input_names should use the node name in the graph - // here it should be the input tensor name -> matching the binding - // insert original node name without port - auto tensor_name = node_name; - if (output_idx != 0) { - tensor_name = StrCat(tensor_name, ":", output_idx); - } - - VLOG(2) << "input name: " << node_name << " tensor_name: " << tensor_name - << " idx: " << output_idx; - - auto shape_inference_node_name = node_name; - auto shape_inference_output_idx = output_idx; - // rewire the shape inference to original node in the graph - if (s.output_edge_map->count(tensor_name)) { - shape_inference_node_name = s.output_edge_map->at(tensor_name).second; - shape_inference_output_idx = s.output_edge_map->at(tensor_name).first; - } - if (shape_inference_output_idx < 0) continue; - VLOG(2) << "shapeinference name: " << shape_inference_node_name - << " idx: " << shape_inference_output_idx; - - if (!s.graph_properties.HasOutputProperties(shape_inference_node_name)) - return tensorflow::errors::Internal("failed to find input node: " + - shape_inference_node_name); + input_names.push_back(node_name); // Insert original node name without port + // TODO(jie): alternative :) + if (!graph_properties.HasOutputProperties(node_name)) + return tensorflow::errors::Internal("Failed to find input node: " + + node_name); - auto op_info_vec = - s.graph_properties.GetOutputProperties(shape_inference_node_name); - if (static_cast<int>(op_info_vec.size()) <= shape_inference_output_idx) + auto op_info_vec = graph_properties.GetOutputProperties(node_name); + if (static_cast<int>(op_info_vec.size()) < output_idx) return tensorflow::errors::Internal( - "accessing output index of: ", shape_inference_output_idx, - ", at node: ", shape_inference_node_name, - " with output entry from shape_map: ", op_info_vec.size()); + "Accessing output index of: " + std::to_string(output_idx) + + ", at node: " + node_name + " with output entry from shape_map: " + + std::to_string(op_info_vec.size())); + + auto op_info = op_info_vec.at(output_idx); - auto op_info = op_info_vec.at(shape_inference_output_idx); tensorflow::DataType tf_dtype = op_info.dtype(); input_dtypes.push_back(tf_dtype); nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); TF_CHECK_OK(ConvertDType(tf_dtype, &dtype)); - VLOG(2) << "Accessing output index of: " << output_idx + VLOG(2) << "Accessing output index of: " << std::to_string(output_idx) << ", at node: " << node_name - << " with output entry from shape_map: " << op_info_vec.size(); + << " with output entry from shape_map: " + << std::to_string(op_info_vec.size()); + // TODO(ben,jie): update TRT input format/dimension nvinfer1::DimsCHW input_dim_psuedo_chw; for (int i = 0; i < 3; i++) input_dim_psuedo_chw.d[i] = 1; - // TODO(jie): TRT 3.x only support 4 dimensional input tensor. - // update the code once TRT 4.0 comes out. - if (op_info.shape().dim_size() != 4) - return tensorflow::errors::Unimplemented("require 4 dimensional input"); - for (int i = 1; i < op_info.shape().dim_size(); i++) { VLOG(2) << "dimension: " << i << " , size: " << op_info.shape().dim(i).size(); @@ -2526,11 +1482,9 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( // TODO(ben,jie): proper way to restore input tensor name? auto input_tensor_name = node_name; - if (output_idx != 0) { - input_tensor_name = StrCat(node_name, ":", output_idx); - } + if (output_idx != 0) + input_tensor_name = node_name + ":" + std::to_string(output_idx); - input_names.push_back(input_tensor_name); nvinfer1::ITensor* input_tensor = converter.network()->addInput( input_tensor_name.c_str(), dtype, input_dim_psuedo_chw); @@ -2557,22 +1511,14 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( // Gather output metadata std::vector<string> output_names; std::vector<tensorflow::DataType> output_dtypes; - int trt_engine_op_output_idx = 0; - for (const std::pair<int, int>& output : s.output_inds) { + for (std::pair<int, int> const& output : output_inds) { int node_id = output.first; int output_idx = output.second; - tensorflow::Node* node = s.graph.FindNodeId(node_id); + tensorflow::Node* node = graph.FindNodeId(node_id); string op_name = node->name(); string tensor_name = op_name; - - s.output_edge_map->insert( - {trt_engine_op_output_idx == 0 - ? engine_name - : StrCat(engine_name, ":", trt_engine_op_output_idx), - {output_idx, tensor_name}}); - trt_engine_op_output_idx++; if (output_idx != 0) - tensorflow::strings::StrAppend(&tensor_name, ":", output_idx); + tensor_name = tensor_name + ":" + std::to_string(output_idx); VLOG(2) << "Output tensor name: " << tensor_name; output_names.push_back(tensor_name); auto tensor_or_weights = converter.get_tensor(tensor_name); @@ -2594,25 +1540,19 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( } VLOG(2) << "Finished output"; + // TODO(jie): static_id is not thread safe. + static int static_id = 0; // Build the engine - trt_builder->setMaxBatchSize(s.max_batch_size); - trt_builder->setMaxWorkspaceSize(s.max_workspace_size_bytes); - VLOG(0) << "Max batch size= " << s.max_batch_size - << " max workspace size= " << s.max_workspace_size_bytes; - if (s.precision_mode == FP16MODE) { - trt_builder->setHalf2Mode(true); - VLOG(0) << "Using FP16 precision mode"; - } - LOG(INFO) << "starting build engine"; + trt_builder->setMaxBatchSize(max_batch_size); + trt_builder->setMaxWorkspaceSize(max_workspace_size_bytes); + VLOG(0) << "Starting build engine " << static_id; + // TODO(ben,jie): half2 and int8 mode support string engine_plan_string; { auto trt_engine = infer_object(trt_builder->buildCudaEngine(*converter.network())); VLOG(0) << "Built network"; - if (trt_engine.get() == nullptr) { - return tensorflow::errors::Internal("Engine building failure"); - } auto engine_plan = infer_object(trt_engine->serialize()); VLOG(0) << "Serialized engine"; const char* engine_plan_data = @@ -2620,19 +1560,18 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( engine_plan_string = string(engine_plan_data, engine_plan_data + engine_plan->size()); } - TF_RETURN_IF_ERROR(weight_rmgr->Delete<tensorflow::tensorrt::TRTWeightStore>( - engine_name, engine_name)); - LOG(INFO) << "finished engine " << engine_name; + + VLOG(0) << "Finished engine"; // Build the TRT op - tensorflow::NodeDefBuilder op_builder(engine_name, "TRTEngineOp"); + // TODO(sami,ben,jie): proper naming! + tensorflow::NodeDefBuilder op_builder( + tensorflow::strings::StrCat("my_trt_op", static_id++), "TRTEngineOp"); std::vector<tensorflow::NodeDefBuilder::NodeOut> income_edges; - VLOG(2) << "input edge size: " << input_names.size(); for (size_t i = 0; i < input_names.size(); ++i) { - VLOG(2) << "input edges: " << i << " " << input_names.at(i); - int output_idx = s.input_inds.at(i).second; - // we wired up the input here already, it is redundant to do it again in - // ConvertSubGraphToTensorRT(convert_graph.cc) + int output_idx = input_inds.at(i).second; + // We wired up the input here already, it is redundant to do it again in + // ConvertSubGraphToTensorRT(convert_graph.cc) auto incoming_edge = tensorflow::NodeDefBuilder::NodeOut( input_names.at(i), output_idx, input_dtypes.at(i)); income_edges.push_back(incoming_edge); @@ -2647,7 +1586,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( .Attr("input_nodes", input_names) .Attr("output_nodes", output_names) .Attr("OutT", output_dtypes) - .Finalize(s.trt_node); + .Finalize(trt_node); VLOG(0) << status.ToString() << " finished op building"; diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.h b/tensorflow/contrib/tensorrt/convert/convert_nodes.h index 954a1e72f8..2e7fd19566 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h @@ -17,8 +17,6 @@ limitations under the License. #define TENSORFLOW_CONTRIB_TENSORRT_CONVERT_CONVERT_NODES_H_ #include <set> -#include <string> -#include <unordered_map> #include <utility> #include <vector> @@ -34,49 +32,16 @@ namespace tensorflow { namespace tensorrt { namespace convert { -const int FP32MODE = 0; -const int FP16MODE = 1; -const int INT8MODE = 2; +tensorflow::Status ConvertSubGraphToTensorRTNodeDef( + const tensorflow::Graph& graph, const std::set<int>& subgraph_node_ids, + const std::vector<std::pair<int, int>>& + input_inds, // {node_id, output_idx} + const std::vector<std::pair<int, int>>& + output_inds, // {node_id, output_idx} + size_t max_batch_size, size_t max_workspace_size_bytes, + const tensorflow::grappler::GraphProperties& graph_prop, + tensorflow::NodeDef* trt_node); -struct SubGraphParams { - SubGraphParams( - tensorflow::Graph& inp_graph, - const std::set<int>& subgraph_node_id_numbers, - const std::vector<std::pair<int, int>>& input_indices, - const std::vector<std::pair<int, int>>& output_indices, - size_t max_supported_batch_size, size_t max_consumed_workspace_size_bytes, - const tensorflow::grappler::GraphProperties& current_graph_properties, - std::unordered_map<string, std::pair<int, string>>* output_edges, - tensorflow::NodeDef* constructed_trt_node, - int engine_precision_mode = FP32MODE) - : graph(inp_graph), - subgraph_node_ids(subgraph_node_id_numbers), - input_inds(input_indices), - output_inds(output_indices), - max_batch_size(max_supported_batch_size), - max_workspace_size_bytes(max_consumed_workspace_size_bytes), - graph_properties(current_graph_properties), - output_edge_map(output_edges), - trt_node(constructed_trt_node), - precision_mode(engine_precision_mode) {} - - tensorflow::Graph& graph; - const std::set<int>& subgraph_node_ids; - const std::vector<std::pair<int, int>>& input_inds; // {node_id, output_idx} - const std::vector<std::pair<int, int>>& output_inds; // {node_id, output_idx} - size_t max_batch_size; - size_t max_workspace_size_bytes; - const tensorflow::grappler::GraphProperties& graph_properties; - std::unordered_map<string, std::pair<int, string>>* output_edge_map; - tensorflow::NodeDef* trt_node; - const int precision_mode; -}; - -// TODO(sami): Replace references with const reference or pointers -tensorflow::Status ConvertSubGraphToTensorRTNodeDef(SubGraphParams& params); -tensorflow::Status InjectCalibrationNode(SubGraphParams& params); -tensorflow::Status ConvertCalibrationNodeToEngineNode(tensorflow::Graph& graph, - tensorflow::Node* c_node); } // namespace convert } // namespace tensorrt } // namespace tensorflow diff --git a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc index aea44fd8a2..1dcb87e768 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc @@ -21,11 +21,10 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/platform/stream_executor.h" #if GOOGLE_CUDA #if GOOGLE_TENSORRT -#include "cuda/include/cuda_runtime_api.h" +#include "cuda_runtime_api.h" #include "tensorrt/include/NvInfer.h" namespace tensorflow { @@ -114,13 +113,7 @@ void TRTCalibOp::Compute(tensorflow::OpKernelContext* ctx) { ctx->set_output(i, t); } VLOG(2) << "Filled map for sending"; - // copied from cuda_kernel_helper since it seems only valid in *.cu.cc files - const cudaStream_t* stream = CHECK_NOTNULL( - reinterpret_cast<const cudaStream_t*>(ctx->op_device_context() - ->stream() - ->implementation() - ->CudaStreamMemberHack())); - calib_res->calibrator_->setBatch(input_data, *stream); + calib_res->calibrator_->setBatch(input_data); VLOG(2) << "Passed calibration data"; // TODO(aaroey): make sure we wait for the completion of calibration on the // last batch in future PR. diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc index b32371b642..8efdf63ebe 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc @@ -24,12 +24,8 @@ limitations under the License. #include "cuda/include/cuda_runtime_api.h" namespace tensorflow { -static ::tensorflow::tensorrt::Logger logger; -namespace gpu = ::perftools::gputools; -using IRuntime = nvinfer1::IRuntime; -using Dims = nvinfer1::Dims; - namespace tensorrt { +static ::tensorflow::tensorrt::Logger logger; TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) : OpKernel(context) { // read serialized_engine @@ -44,21 +40,10 @@ TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) : OpKernel(context) { // TODO(samikama) runtime should be taken from a resourcemanager as well. // Only engine should be in the op and context and runtime should be taken // from resourcemanager - // TODO(jie): cudaSetDevice make sure trt engine is allocated on the same - // gpu where the input/output is also located. - int gpu_id = context->device()->tensorflow_gpu_device_info()->gpu_id; - cudaSetDevice(gpu_id); - int device; - cudaGetDevice(&device); - if (gpu_id != device) LOG(FATAL) << "set device failed!"; - - // TODO(samikama) runtime should be taken from a resourcemanager as well. - // Only engine should be in the op and context and runtime should be taken - // from resourcemanager - - IRuntime* infer = nvinfer1::createInferRuntime(logger); + nvinfer1::IRuntime* infer = nvinfer1::createInferRuntime(logger); trt_engine_ptr_.reset(infer->deserializeCudaEngine( serialized_engine.c_str(), serialized_engine.size(), nullptr)); + trt_execution_context_ptr_.reset(trt_engine_ptr_->createExecutionContext()); // Runtime is safe to delete after engine creation infer->destroy(); @@ -70,6 +55,7 @@ void TRTEngineOp::Compute(OpKernelContext* context) { size_t binding_index; int num_batch = 0; + bool valid = true; for (int i = 0; i < context->num_inputs(); i++) { // Grab the input tensor binding_index = trt_engine_ptr_->getBindingIndex(input_nodes_[i].c_str()); @@ -78,12 +64,8 @@ void TRTEngineOp::Compute(OpKernelContext* context) { const TensorShape& input_shape = input_tensor.shape(); if (i == 0) { num_batch = input_shape.dim_size(0); - if (num_batch > trt_engine_ptr_->getMaxBatchSize()) { - LOG(FATAL) << "input tensor batch larger than max_batch_size: " - << trt_engine_ptr_->getMaxBatchSize(); - } } else if (num_batch != input_shape.dim_size(0)) { - LOG(FATAL) << "input data inconsistent batch size"; + valid = false; break; } switch (trt_engine_ptr_->getBindingDataType(binding_index)) { @@ -99,6 +81,9 @@ void TRTEngineOp::Compute(OpKernelContext* context) { } } + // Might want a different way to inform the user of batch size inconsistency + if (!valid) LOG(WARNING) << "input data inconsistent batch size"; + for (int i = 0; i < static_cast<int>(output_nodes_.size()); i++) { // This is bad that we have to reallocate output buffer every run. // Create an output tensor @@ -141,11 +126,9 @@ void TRTEngineOp::Compute(OpKernelContext* context) { ->implementation() ->CudaStreamMemberHack())); - // TODO(jie): trt enqueue does not return error - auto ret = trt_execution_context_ptr_->enqueue(num_batch, &buffers[0], - *stream, nullptr); - VLOG(2) << "enqueue returns: " << ret; - // sync should be done by TF. + // execution handled by TF since we are getting stream from TF. + // it is safe for CPU pointer array (buffers) to go out of scope after enqueue + trt_execution_context_ptr_->enqueue(num_batch, &buffers[0], *stream, nullptr); } REGISTER_KERNEL_BUILDER(Name("TRTEngineOp").Device(DEVICE_GPU), TRTEngineOp); diff --git a/tensorflow/contrib/tensorrt/log/trt_logger.cc b/tensorflow/contrib/tensorrt/log/trt_logger.cc index dda0dc9e71..7add8cb8b3 100644 --- a/tensorflow/contrib/tensorrt/log/trt_logger.cc +++ b/tensorflow/contrib/tensorrt/log/trt_logger.cc @@ -27,19 +27,19 @@ void Logger::log(Severity severity, const char* msg) { // Suppress info-level messages switch (severity) { case Severity::kINFO: { // Mark TRT info messages as debug! - VLOG(2) << name_ << " " << msg; + VLOG(2) << msg; break; } case Severity::kWARNING: { - LOG(WARNING) << name_ << " " << msg; + LOG(WARNING) << msg; break; } case Severity::kERROR: { - LOG(ERROR) << name_ << " " << msg; + LOG(ERROR) << msg; break; } case Severity::kINTERNAL_ERROR: { - LOG(FATAL) << name_ << " " << msg; + LOG(FATAL) << msg; break; } // This is useless for now. But would catch it in future if enum changes. It diff --git a/tensorflow/contrib/tensorrt/log/trt_logger.h b/tensorflow/contrib/tensorrt/log/trt_logger.h index 7f3544f8cf..d71f66b933 100644 --- a/tensorflow/contrib/tensorrt/log/trt_logger.h +++ b/tensorflow/contrib/tensorrt/log/trt_logger.h @@ -27,11 +27,9 @@ namespace tensorrt { // Logger for GIE info/warning/errors class Logger : public nvinfer1::ILogger { - public: - Logger(string name = "DefaultLogger") : name_(name){}; + private: void log(nvinfer1::ILogger::Severity severity, const char* msg) override; - private: string name_; }; diff --git a/tensorflow/contrib/tensorrt/python/__init__.py b/tensorflow/contrib/tensorrt/python/__init__.py index 0b2321b5fc..7e050a768c 100644 --- a/tensorflow/contrib/tensorrt/python/__init__.py +++ b/tensorflow/contrib/tensorrt/python/__init__.py @@ -20,6 +20,5 @@ from __future__ import print_function # pylint: disable=unused-import,line-too-long from tensorflow.contrib.tensorrt.python.ops import trt_engine_op -from tensorflow.contrib.tensorrt.python.trt_convert import calib_graph_to_infer_graph from tensorflow.contrib.tensorrt.python.trt_convert import create_inference_graph # pylint: enable=unused-import,line-too-long diff --git a/tensorflow/contrib/tensorrt/python/trt_convert.py b/tensorflow/contrib/tensorrt/python/trt_convert.py index 666220d78c..9454862f85 100644 --- a/tensorflow/contrib/tensorrt/python/trt_convert.py +++ b/tensorflow/contrib/tensorrt/python/trt_convert.py @@ -20,17 +20,11 @@ from __future__ import print_function # pylint: disable=unused-import,line-too-long import six as _six -from tensorflow.contrib.tensorrt.wrap_conversion import calib_convert from tensorflow.contrib.tensorrt.wrap_conversion import trt_convert from tensorflow.core.framework import graph_pb2 -from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow.python.framework import errors from tensorflow.python.framework import errors_impl as _impl -from tensorflow.python.framework import meta_graph from tensorflow.python.framework import ops -from tensorflow.python.grappler import tf_optimizer -from tensorflow.python.util import compat -# pylint: enable=unused-import,line-too-long # TODO(skama): get outputs from session when implemented as c++ @@ -38,33 +32,22 @@ from tensorflow.python.util import compat def create_inference_graph(input_graph_def, outputs, max_batch_size=1, - max_workspace_size_bytes=2 << 20, - precision_mode="FP32", - minimum_segment_size=3): + max_workspace_size_bytes=2 << 20): """Python wrapper for the TRT transormation. + Args: input_graph_def: GraphDef object containing a model to be transformed. - outputs: list of tensors or node names for the model outputs. + outputs: List of tensors or node names for the model outputs. max_batch_size: max size for the input batch max_workspace_size_bytes: parameter to control memory allocation (in Bytes) - precision_mode: one of 'FP32', 'FP16' and 'INT8' - minimum_segment_size: the minimum number of nodes required for a subgraph to - be replaced by TRTEngineOp. Returns: New GraphDef with TRTEngineOps placed in graph replacing subgraphs. Raises: - ValueError: if the provided precision mode is invalid. RuntimeError: if the returned status message is malformed. """ - supported_precision_modes = {"FP32": 0, "FP16": 1, "INT8": 2} - if precision_mode.upper() not in supported_precision_modes: - raise ValueError(("precision mode '{}' is not supported." - "It should be one of {}").format( - precision_mode, "{'FP32', 'FP16', 'INT8'}")) - mode = supported_precision_modes[precision_mode.upper()] def py2bytes(inp): return inp @@ -100,7 +83,7 @@ def create_inference_graph(input_graph_def, # pair or strings where first one is encoded status and the second # one is the transformed graphs protobuf string. out = trt_convert(input_graph_def_str, out_names, max_batch_size, - max_workspace_size_bytes, mode, minimum_segment_size) + max_workspace_size_bytes) status = to_string(out[0]) output_graph_def_string = out[1] del input_graph_def_str # Save some memory @@ -118,46 +101,3 @@ def create_inference_graph(input_graph_def, output_graph_def.ParseFromString(output_graph_def_string) del output_graph_def_string # Save some memory return output_graph_def - - -def calib_graph_to_infer_graph(calibration_graph_def): - """Convert an existing calibration graph to inference graph. - - Args: - calibration_graph_def: the calibration GraphDef object with calibration data - Returns: - New GraphDef with TRTEngineOps placed in graph replacing calibration nodes. - Raises: - RuntimeError: if the returned status message is malformed. - """ - - def py2string(inp): - return inp - - def py3string(inp): - return inp.decode("utf-8") - - if _six.PY2: - to_string = py2string - else: - to_string = py3string - - graph_str = calibration_graph_def.SerializeToString() - out = calib_convert(graph_str) - status = to_string(out[0]) - output_graph_def_string = out[1] - del graph_str # Save some memory - if len(status) < 2: - raise _impl.UnknownError(None, None, status) - if status[:2] != "OK": - msg = status.split(";") - if len(msg) == 1: - raise RuntimeError("Status message is malformed {}".format(status)) - # pylint: disable=protected-access - raise _impl._make_specific_exception(None, None, ";".join(msg[1:]), - int(msg[0])) - # pylint: enable=protected-access - output_graph_def = graph_pb2.GraphDef() - output_graph_def.ParseFromString(output_graph_def_string) - del output_graph_def_string # Save some memory - return output_graph_def diff --git a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc index 74df75902e..3d5cc76c42 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc +++ b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc @@ -23,7 +23,7 @@ limitations under the License. #if GOOGLE_CUDA #if GOOGLE_TENSORRT -#include "cuda/include/cuda_runtime_api.h" +#include "cuda_runtime_api.h" namespace tensorflow { namespace tensorrt { @@ -38,18 +38,22 @@ TRTInt8Calibrator::TRTInt8Calibrator( done_(false), dev_buffers_(dev_buffers), calib_running_(false), - batch_is_set_(false), engine_name_(engine_name) {} -bool TRTInt8Calibrator::setBatch(const std::unordered_map<string, void*>& data, - const cudaStream_t stream) { - tensorflow::mutex_lock lock(cond_mtx_); - while ((calib_running_ || batch_is_set_) && - !done_) { // wait while calibration is running - cond_.wait(lock); - } +bool TRTInt8Calibrator::setBatch( + const std::unordered_map<string, void*>& data) { + // TODO(aaroey): make sure that in future PR: + // 1. the mutex_lock is outside of the loop + // 2. wait() is used instead of wait_for() + // 3. done_ is to be protected by the mutex + // 4. the first batch is not missed if (done_) return false; - CHECK(!calib_running_ && !batch_is_set_); + while (calib_running_.load( + std::memory_order_acquire)) { // wait while calibration is running + tensorflow::mutex_lock l(cond_mtx_); + cond_.wait_for(l, std::chrono::milliseconds(50)); + if (done_) return false; + } VLOG(1) << "Set Batch Waiting finished"; for (const auto it : data) { auto devptr = dev_buffers_.find(it.first); @@ -61,32 +65,27 @@ bool TRTInt8Calibrator::setBatch(const std::unordered_map<string, void*>& data, // TODO(aaroey): we should not use sync copy on default stream. Make sure // stream->ThenMemcpy() is used in future PRs. - // TODO(sami,aaroey): Need to figureout a way to ensure synchronization - // between stream, perhaps using a tensor? - auto status = cudaMemcpyAsync(d.first, it.second, d.second, - cudaMemcpyDeviceToDevice, stream); + auto status = + cudaMemcpy(d.first, it.second, d.second, cudaMemcpyDeviceToDevice); if (status != cudaSuccess) { LOG(FATAL) << "cudaMemcpy " << engine_name_ << " for '" << it.first << "' failed with " << status; } } - - // TODO(Sami, aaorey): Find an alternative way! - cudaStreamSynchronize( - stream); // we have to wait for the stream before returning! - batch_is_set_ = true; + calib_running_.store(true, std::memory_order_release); // release builder cond_.notify_all(); return true; } bool TRTInt8Calibrator::getBatch(void** bindings, const char** names, int num_bindings) { - tensorflow::mutex_lock lock(cond_mtx_); - calib_running_ = false; + calib_running_.store(false, std::memory_order_release); // wait for new batch cond_.notify_all(); - while ((!batch_is_set_ && !done_)) { // wait until new batch arrives - cond_.wait(lock); - + while (!calib_running_.load( + std::memory_order_acquire)) { // wait until new batch arrives + tensorflow::mutex_lock l(cond_mtx_); + cond_.wait_for(l, std::chrono::milliseconds(50)); + if (done_) return false; } if (done_) { return false; @@ -101,8 +100,6 @@ bool TRTInt8Calibrator::getBatch(void** bindings, const char** names, bindings[i] = it->second.first; } - batch_is_set_ = false; - calib_running_ = true; return true; } @@ -110,12 +107,6 @@ const void* TRTInt8Calibrator::readCalibrationCache(std::size_t& length) { return nullptr; } -void TRTInt8Calibrator::setDone() { - tensorflow::mutex_lock lock(cond_mtx_); - done_ = true; - cond_.notify_all(); -} - void TRTInt8Calibrator::writeCalibrationCache(const void* ptr, std::size_t length) {} TRTInt8Calibrator::~TRTInt8Calibrator() { @@ -124,6 +115,5 @@ TRTInt8Calibrator::~TRTInt8Calibrator() { } // namespace tensorrt } // namespace tensorflow - #endif #endif diff --git a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h index d77aa2c5ab..8830f7efe7 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h +++ b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h @@ -24,10 +24,7 @@ limitations under the License. #if GOOGLE_CUDA #if GOOGLE_TENSORRT - -#include "cuda/include/cuda_runtime_api.h" #include "tensorrt/include/NvInfer.h" - namespace tensorflow { namespace tensorrt { // This class provides a 1 element queue to match TFs push model to @@ -42,9 +39,8 @@ struct TRTInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator { int getBatchSize() const override; bool getBatch(void* bindings[], const char* names[], int num_bindings) override; - bool setBatch(const std::unordered_map<string, void*>& data, - const cudaStream_t stream); - void setDone(); + bool setBatch(const std::unordered_map<string, void*>& data); + void setDone() { done_ = true; } const void* readCalibrationCache(std::size_t& length) override; void writeCalibrationCache(const void* ptr, std::size_t length) override; ~TRTInt8Calibrator(); @@ -59,14 +55,11 @@ struct TRTInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator { const std::unordered_map<string, std::pair<void*, size_t>> dev_buffers_; // map to keep tensorrt input buffers and sizes keyed with // buffer names - bool calib_running_; - bool batch_is_set_; + std::atomic_bool calib_running_; string engine_name_; }; - } // namespace tensorrt } // namespace tensorflow - +#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_INT8_CALIBRATOR_H_ #endif #endif -#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_INT8_CALIBRATOR_H_ diff --git a/tensorflow/contrib/tensorrt/test/test_tftrt.py b/tensorflow/contrib/tensorrt/test/test_tftrt.py index 0b661bd536..c78f6f2224 100644 --- a/tensorflow/contrib/tensorrt/test/test_tftrt.py +++ b/tensorflow/contrib/tensorrt/test/test_tftrt.py @@ -60,7 +60,6 @@ def get_simple_graph_def(): def run_graph(gdef, dumm_inp): - """Run given graphdef once.""" gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50) ops.reset_default_graph() g = ops.Graph() @@ -75,65 +74,15 @@ def run_graph(gdef, dumm_inp): return val -# Use real data that is representatitive of the inference dataset -# for calibration. For this test script it is random data. -def run_calibration(gdef, dumm_inp): - """Run given calibration graph multiple times.""" - gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50) - ops.reset_default_graph() - g = ops.Graph() - with g.as_default(): - inp, out = importer.import_graph_def( - graph_def=gdef, return_elements=["input", "output"]) - inp = inp.outputs[0] - out = out.outputs[0] - with csess.Session( - config=cpb2.ConfigProto(gpu_options=gpu_options), graph=g) as sess: - # run over real calibration data here, we are mimicking a calibration set of - # 30 different batches. Use as much calibration data as you want - for _ in range(30): - val = sess.run(out, {inp: dumm_inp}) - return val - - if "__main__" in __name__: inp_dims = (100, 24, 24, 2) dummy_input = np.random.random_sample(inp_dims) - orig_graph = get_simple_graph_def() # use a frozen graph for inference + gdef = get_simple_graph_def() # Get optimized graph - trt_graph = trt.create_inference_graph( - input_graph_def=orig_graph, - outputs=["output"], - max_batch_size=inp_dims[0], - max_workspace_size_bytes=1 << 25, - precision_mode="FP32", # TRT Engine precision "FP32","FP16" or "INT8" - minimum_segment_size=2 # minimum number of nodes in an engine - ) - o1 = run_graph(orig_graph, dummy_input) + trt_graph = trt.create_inference_graph(gdef, ["output"], inp_dims[0]) + o1 = run_graph(gdef, dummy_input) o2 = run_graph(trt_graph, dummy_input) o3 = run_graph(trt_graph, dummy_input) assert np.array_equal(o1, o2) assert np.array_equal(o3, o2) # sanity check - fp16_graph = trt.create_inference_graph( - input_graph_def=orig_graph, - outputs=["output"], - max_batch_size=inp_dims[0], - max_workspace_size_bytes=1 << 25, - precision_mode="FP16", # TRT Engine precision "FP32","FP16" or "INT8" - minimum_segment_size=2 # minimum number of nodes in an engine - ) - int8_calib_gdef = trt.create_inference_graph( - input_graph_def=orig_graph, - outputs=["output"], - max_batch_size=inp_dims[0], - max_workspace_size_bytes=1 << 25, - precision_mode="INT8", # TRT Engine precision "FP32","FP16" or "INT8" - minimum_segment_size=2 # minimum number of nodes in an engine - ) - o4 = run_graph(fp16_graph, dummy_input) - _ = run_calibration(int8_calib_gdef, dummy_input) - int8_graph = trt.calib_graph_to_infer_graph(int8_calib_gdef) - o5 = run_graph(int8_graph, dummy_input) - assert np.allclose(o1, o4) - assert np.allclose(o1, o5) print("Pass") diff --git a/tensorflow/contrib/tensorrt/trt_conversion.i b/tensorflow/contrib/tensorrt/trt_conversion.i index 46480e99a1..d679945d56 100644 --- a/tensorflow/contrib/tensorrt/trt_conversion.i +++ b/tensorflow/contrib/tensorrt/trt_conversion.i @@ -64,17 +64,13 @@ PyObject* pair_helper(std::pair<string, string>* in) { %ignoreall %unignore tensorflow; %unignore trt_convert; -%unignore calib_convert; %{ - std::pair<string, string> trt_convert( string graph_def_string, // The serialized GraphDef string. std::vector<string> output_names, size_t max_batch_size, - size_t max_workspace_size_bytes, - int precision_mode, - int minimum_segment_size + size_t max_workspace_size_bytes // Unfortunately we can't use TF_Status here since it // is in c/c_api and brings in a lot of other libraries // which in turn declare ops. These ops are included @@ -94,64 +90,16 @@ std::pair<string, string> trt_convert( return std::pair<string, string>{out_status, ""}; } - if(precision_mode < 0 || precision_mode > 2){ - out_status = "InvalidArgument;Invalid precision_mode"; - return std::pair<string, string>{out_status, ""}; - } if (!output_names.size()) { out_status = "InvalidArgument;Size of the output_names vector is 0"; return std::pair<string, string>{out_status, ""}; + // return ""; } tensorflow::GraphDef outGraph; tensorflow::Status conversion_status = tensorflow::tensorrt::convert::ConvertGraphDefToTensorRT( graph_def, output_names, max_batch_size, max_workspace_size_bytes, - &outGraph, precision_mode, minimum_segment_size); - if (!conversion_status.ok()) { - auto retCode = (int)conversion_status.code(); - char buff[2000]; - snprintf(buff, 2000, "%d;%s", retCode, - conversion_status.error_message().c_str()); - out_status = buff; - return std::pair<string, string>{out_status, ""}; - } - string result; - if (!outGraph.SerializeToString(&result)) { - out_status = "InvalidArgument;Couldn't serialize output as a GraphDef"; - return std::pair<string, string>{out_status, ""}; - } - out_status = "OK;All good!"; - return std::pair<string, string>{out_status, result}; -#else - // Returns FAILED_PRECONDITION. - return std::pair<string, string>{"9;TensorRT is not enabled!", ""}; -#endif // GOOGLE_CUDA && GOOGLE_TENSORRT -} - -std::pair<string, string> calib_convert(string graph_def_string // const tensorflow::GraphDef& - // unfortunately we can't use TF_Status here since it - // is in c/c_api and brings in a lot of other libraries - // which in turn declare ops. These ops are included - // statically in our library and cause an abort when - // module is loaded due to double registration - // until Tensorflow properly exposes these headers - // we have to work around this by returning a string - // and converting it to exception on python side. - //,TF_Status* out_status) { -) { -#if GOOGLE_CUDA && GOOGLE_TENSORRT - string out_status; - - tensorflow::GraphDef graph_def; - if (!graph_def.ParseFromString(graph_def_string)) { - out_status = "InvalidArgument;Couldn't interpret input as a GraphDef"; - return std::pair<string, string>{out_status, ""}; - } - - tensorflow::GraphDef outGraph; - tensorflow::Status conversion_status = - tensorflow::tensorrt::convert::ConvertCalibGraphToInferGraph(graph_def, - &outGraph); + &outGraph); if (!conversion_status.ok()) { auto retCode = (int)conversion_status.code(); char buff[2000]; @@ -174,13 +122,10 @@ std::pair<string, string> calib_convert(string graph_def_string // const tenso } %} -std::pair<string, string> calib_convert(string graph_def_string); - std::pair<string, string> trt_convert(string graph_def_string, std::vector<string> output_names, size_t max_batch_size, - size_t max_workspace_size_bytes, - int precision_mode, int minimum_segment_size); + size_t max_workspace_size_bytes); %unignoreall diff --git a/tensorflow/contrib/timeseries/examples/BUILD b/tensorflow/contrib/timeseries/examples/BUILD index 70bf67c779..bb86ecb220 100644 --- a/tensorflow/contrib/timeseries/examples/BUILD +++ b/tensorflow/contrib/timeseries/examples/BUILD @@ -25,10 +25,7 @@ py_test( srcs = ["predict_test.py"], data = ["data/period_trend.csv"], srcs_version = "PY2AND3", - tags = [ - "no_windows", # TODO: needs investigation on Windows - "notsan", # b/67513579 - ], + tags = ["notsan"], # b/67513579 deps = [ ":predict", "//tensorflow/python:client_testlib", diff --git a/tensorflow/contrib/timeseries/python/timeseries/BUILD b/tensorflow/contrib/timeseries/python/timeseries/BUILD index 64f5cd8357..ed3ed4c0e1 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/BUILD +++ b/tensorflow/contrib/timeseries/python/timeseries/BUILD @@ -156,7 +156,9 @@ py_test( "head_test.py", ], srcs_version = "PY2AND3", - tags = ["no_pip_gpu"], # b/63391119 + tags = [ + "no_pip_gpu", # b/63391119 + ], deps = [ ":feature_keys", ":head", @@ -425,7 +427,6 @@ py_test( srcs_version = "PY2AND3", tags = [ "no_pip_gpu", # b/63391119 - "no_windows", # TODO: needs investigation on Windows ], deps = [ ":feature_keys", diff --git a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD index 07df7bc9a5..c86d06e923 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD +++ b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD @@ -40,7 +40,6 @@ py_test( timeout = "long", # Moderate but for asan srcs = ["state_space_model_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":state_space_model", "//tensorflow/contrib/layers:layers_py", diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD index f9d433a45b..ed930e44e8 100644 --- a/tensorflow/contrib/tpu/BUILD +++ b/tensorflow/contrib/tpu/BUILD @@ -225,7 +225,6 @@ tf_py_test( "//tensorflow/python:framework", "//tensorflow/python:layers", ], - tags = ["no_windows"], # TODO: needs investigation on Windows ) tf_py_test( diff --git a/tensorflow/contrib/util/loader.py b/tensorflow/contrib/util/loader.py index dca01d26f4..f4283cd9ed 100644 --- a/tensorflow/contrib/util/loader.py +++ b/tensorflow/contrib/util/loader.py @@ -42,10 +42,9 @@ def load_op_library(path): plugin. """ if os.name == 'nt': - # To avoid making every user_ops aware of windows, re-write - # the file extension from .so to .dll if .so file doesn't exist. - if not os.path.exists(path): - path = re.sub(r'\.so$', '.dll', path) + # To avoid makeing every user_ops aware of windows, re-write + # the file extension from .so to .dll. + path = re.sub(r'\.so$', '.dll', path) # Currently we have only some user_ops as dlls on windows - don't try # to load them if the dll is not found. |