aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/contrib')
-rw-r--r--tensorflow/contrib/BUILD10
-rw-r--r--tensorflow/contrib/__init__.py6
-rw-r--r--tensorflow/contrib/boosted_trees/lib/utils/batch_features.h6
-rw-r--r--tensorflow/contrib/cmake/README.md12
-rw-r--r--tensorflow/contrib/cmake/external/grpc.cmake1
-rw-r--r--tensorflow/contrib/cmake/external/protobuf.cmake2
-rw-r--r--tensorflow/contrib/cmake/tf_tests.cmake4
-rw-r--r--tensorflow/contrib/data/__init__.py4
-rw-r--r--tensorflow/contrib/data/python/kernel_tests/BUILD17
-rw-r--r--tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py242
-rw-r--r--tensorflow/contrib/data/python/ops/BUILD1
-rw-r--r--tensorflow/contrib/data/python/ops/sliding.py102
-rw-r--r--tensorflow/contrib/distributions/BUILD2
-rw-r--r--tensorflow/contrib/eager/python/BUILD5
-rw-r--r--tensorflow/contrib/eager/python/examples/linear_regression/BUILD1
-rw-r--r--tensorflow/contrib/factorization/BUILD5
-rw-r--r--tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc9
-rw-r--r--tensorflow/contrib/gan/BUILD1
-rw-r--r--tensorflow/contrib/kafka/BUILD108
-rw-r--r--tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc4
-rw-r--r--tensorflow/contrib/kafka/ops/dataset_ops.cc44
-rw-r--r--tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py9
-rw-r--r--tensorflow/contrib/kafka/python/ops/kafka_op_loader.py24
-rw-r--r--tensorflow/contrib/kfac/python/kernel_tests/BUILD1
-rw-r--r--tensorflow/contrib/labeled_tensor/BUILD1
-rw-r--r--tensorflow/contrib/layers/BUILD2
-rw-r--r--tensorflow/contrib/layers/python/layers/embedding_ops.py2
-rw-r--r--tensorflow/contrib/learn/BUILD12
-rw-r--r--tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py2
-rw-r--r--tensorflow/contrib/lite/Makefile9
-rw-r--r--tensorflow/contrib/lite/arena_planner.h2
-rwxr-xr-xtensorflow/contrib/lite/build_rpi_lib.sh22
-rw-r--r--tensorflow/contrib/lite/builtin_ops.h2
-rw-r--r--tensorflow/contrib/lite/error_reporter.h2
-rw-r--r--tensorflow/contrib/lite/g3doc/rpi.md50
-rw-r--r--tensorflow/contrib/lite/interpreter.h2
-rw-r--r--tensorflow/contrib/lite/interpreter_test.cc2
-rw-r--r--tensorflow/contrib/lite/kernels/conv.cc2
-rw-r--r--tensorflow/contrib/lite/kernels/depthwise_conv.cc2
-rw-r--r--tensorflow/contrib/lite/kernels/fully_connected.cc2
-rw-r--r--tensorflow/contrib/lite/kernels/kernel_util.h2
-rw-r--r--tensorflow/contrib/lite/kernels/lsh_projection.cc2
-rw-r--r--tensorflow/contrib/lite/kernels/lstm.cc6
-rw-r--r--tensorflow/contrib/lite/kernels/reshape.cc12
-rw-r--r--tensorflow/contrib/lite/kernels/reshape_test.cc2
-rw-r--r--tensorflow/contrib/lite/kernels/test_util.cc4
-rw-r--r--tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc2
-rw-r--r--tensorflow/contrib/lite/memory_planner.h4
-rw-r--r--tensorflow/contrib/lite/model.h2
-rw-r--r--tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h2
-rw-r--r--tensorflow/contrib/lite/rpi_makefile.inc33
-rw-r--r--tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc2
-rw-r--r--tensorflow/contrib/lite/simple_memory_arena.cc6
-rw-r--r--tensorflow/contrib/lite/simple_memory_arena.h6
-rw-r--r--tensorflow/contrib/lookup/BUILD1
-rw-r--r--tensorflow/contrib/makefile/README.md2
-rwxr-xr-xtensorflow/contrib/makefile/build_all_ios.sh3
-rw-r--r--tensorflow/contrib/mpi/mpi_utils.h2
-rw-r--r--tensorflow/contrib/predictor/predictor_factories.py4
-rw-r--r--tensorflow/contrib/py2tf/converters/BUILD2
-rw-r--r--tensorflow/contrib/py2tf/converters/single_return.py2
-rw-r--r--tensorflow/contrib/py2tf/utils/BUILD1
-rw-r--r--tensorflow/contrib/quantize/python/fold_batch_norms.py4
-rw-r--r--tensorflow/contrib/quantize/python/quant_ops.py4
-rw-r--r--tensorflow/contrib/quantize/python/quantize.py2
-rw-r--r--tensorflow/contrib/quantize/python/quantize_graph.py2
-rw-r--r--tensorflow/contrib/quantize/python/quantize_parameterized_test.py8
-rw-r--r--tensorflow/contrib/quantize/python/quantize_test.py2
-rw-r--r--tensorflow/contrib/remote_fused_graph/pylib/BUILD1
-rw-r--r--tensorflow/contrib/rnn/python/ops/rnn_cell.py6
-rw-r--r--tensorflow/contrib/saved_model/BUILD1
-rw-r--r--tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py7
-rw-r--r--tensorflow/contrib/session_bundle/BUILD1
-rw-r--r--tensorflow/contrib/slim/python/slim/data/BUILD1
-rw-r--r--tensorflow/contrib/tensor_forest/BUILD1
-rw-r--r--tensorflow/contrib/tensorboard/BUILD1
-rw-r--r--tensorflow/contrib/tensorrt/BUILD2
-rw-r--r--tensorflow/contrib/tensorrt/README.md23
-rw-r--r--tensorflow/contrib/tensorrt/__init__.py18
-rw-r--r--tensorflow/contrib/tensorrt/convert/convert_graph.cc256
-rw-r--r--tensorflow/contrib/tensorrt/convert/convert_graph.h8
-rw-r--r--tensorflow/contrib/tensorrt/convert/convert_nodes.cc1469
-rw-r--r--tensorflow/contrib/tensorrt/convert/convert_nodes.h53
-rw-r--r--tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc11
-rw-r--r--tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc39
-rw-r--r--tensorflow/contrib/tensorrt/log/trt_logger.cc8
-rw-r--r--tensorflow/contrib/tensorrt/log/trt_logger.h4
-rw-r--r--tensorflow/contrib/tensorrt/python/__init__.py1
-rw-r--r--tensorflow/contrib/tensorrt/python/trt_convert.py68
-rw-r--r--tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc56
-rw-r--r--tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h15
-rw-r--r--tensorflow/contrib/tensorrt/test/test_tftrt.py57
-rw-r--r--tensorflow/contrib/tensorrt/trt_conversion.i63
-rw-r--r--tensorflow/contrib/timeseries/examples/BUILD5
-rw-r--r--tensorflow/contrib/timeseries/python/timeseries/BUILD5
-rw-r--r--tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD1
-rw-r--r--tensorflow/contrib/tpu/BUILD1
-rw-r--r--tensorflow/contrib/util/loader.py7
98 files changed, 478 insertions, 2571 deletions
diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD
index 986b61b3ea..bab37e8906 100644
--- a/tensorflow/contrib/BUILD
+++ b/tensorflow/contrib/BUILD
@@ -8,7 +8,6 @@ package(default_visibility = ["//tensorflow:__subpackages__"])
load("//third_party/mpi:mpi.bzl", "if_mpi")
load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
load("@local_config_tensorrt//:build_defs.bzl", "if_tensorrt")
-load("//tensorflow:tensorflow.bzl", "if_not_windows")
py_library(
name = "contrib_py",
@@ -52,6 +51,7 @@ py_library(
"//tensorflow/contrib/image:single_image_random_dot_stereograms_py",
"//tensorflow/contrib/input_pipeline:input_pipeline_py",
"//tensorflow/contrib/integrate:integrate_py",
+ "//tensorflow/contrib/kafka",
"//tensorflow/contrib/keras",
"//tensorflow/contrib/kernel_methods",
"//tensorflow/contrib/kfac",
@@ -63,6 +63,7 @@ py_library(
"//tensorflow/contrib/linalg:linalg_py",
"//tensorflow/contrib/linear_optimizer:sdca_estimator_py",
"//tensorflow/contrib/linear_optimizer:sdca_ops_py",
+ "//tensorflow/contrib/lite/python:lite",
"//tensorflow/contrib/lookup:lookup_py",
"//tensorflow/contrib/losses:losses_py",
"//tensorflow/contrib/losses:metric_learning_py",
@@ -109,10 +110,6 @@ py_library(
"//tensorflow/python:util",
] + if_mpi(["//tensorflow/contrib/mpi_collectives:mpi_collectives_py"]) + if_tensorrt([
"//tensorflow/contrib/tensorrt:init_py",
- ]) + if_not_windows([
- "//tensorflow/contrib/ffmpeg:ffmpeg_ops_py", # unix dependency, need to fix code
- "//tensorflow/contrib/lite/python:lite", # unix dependency, need to fix code
- "//tensorflow/contrib/kafka", # has some linking issue on opensssl.
]),
)
@@ -124,7 +121,6 @@ cc_library(
"//tensorflow/contrib/coder:all_kernels",
"//tensorflow/contrib/cudnn_rnn:cudnn_rnn_kernels",
"//tensorflow/contrib/data/kernels:dataset_kernels",
- "//tensorflow/contrib/kafka:dataset_kernels",
"//tensorflow/contrib/factorization/kernels:all_kernels",
"//tensorflow/contrib/input_pipeline:input_pipeline_ops_kernels",
"//tensorflow/contrib/layers:sparse_feature_cross_op_kernel",
@@ -151,7 +147,7 @@ cc_library(
"//tensorflow/contrib/factorization:all_ops",
"//tensorflow/contrib/framework:all_ops",
"//tensorflow/contrib/input_pipeline:input_pipeline_ops_op_lib",
- "//tensorflow/contrib/kafka:dataset_ops_op_lib",
+ "//tensorflow/contrib/kafka:kafka_ops_op_lib",
"//tensorflow/contrib/layers:sparse_feature_cross_op_op_lib",
"//tensorflow/contrib/nccl:nccl_ops_op_lib",
"//tensorflow/contrib/nearest_neighbor:nearest_neighbor_ops_op_lib",
diff --git a/tensorflow/contrib/__init__.py b/tensorflow/contrib/__init__.py
index 669d611b01..4f6f539027 100644
--- a/tensorflow/contrib/__init__.py
+++ b/tensorflow/contrib/__init__.py
@@ -18,8 +18,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
-import os
-
# Add projects here, they will show up under tf.contrib.
from tensorflow.contrib import batching
from tensorflow.contrib import bayesflow
@@ -85,8 +83,7 @@ from tensorflow.contrib import tpu
from tensorflow.contrib import training
from tensorflow.contrib import util
from tensorflow.contrib.eager.python import tfe as eager
-if os.name != 'nt':
- from tensorflow.contrib.lite.python import lite
+from tensorflow.contrib.lite.python import lite
from tensorflow.contrib.receptive_field import receptive_field_api as receptive_field
from tensorflow.contrib.remote_fused_graph import pylib as remote_fused_graph
from tensorflow.contrib.specs import python as specs
@@ -95,7 +92,6 @@ from tensorflow.contrib.summary import summary
from tensorflow.python.util.lazy_loader import LazyLoader
ffmpeg = LazyLoader("ffmpeg", globals(),
"tensorflow.contrib.ffmpeg")
-del os
del LazyLoader
del absolute_import
diff --git a/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h b/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h
index 7815fa049a..da5e744851 100644
--- a/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h
+++ b/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h
@@ -48,9 +48,9 @@ class BatchFeatures {
Status GetFeatureColumnSizes(int64* const num_dense_float_features,
int64* const num_sparse_float_features,
int64* const num_sparse_int_features) const {
- QCHECK_NE(num_dense_float_features, (int64*) nullptr);
- QCHECK_NE(num_sparse_float_features, (int64*) nullptr);
- QCHECK_NE(num_sparse_int_features, (int64*) nullptr);
+ QCHECK_NE(num_dense_float_features, nullptr);
+ QCHECK_NE(num_sparse_float_features, nullptr);
+ QCHECK_NE(num_sparse_int_features, nullptr);
*num_dense_float_features = dense_float_feature_columns_.size();
*num_sparse_float_features = sparse_float_feature_columns_.size();
*num_sparse_int_features = sparse_int_feature_columns_.size();
diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md
index fe83bb3204..8f85a75ee4 100644
--- a/tensorflow/contrib/cmake/README.md
+++ b/tensorflow/contrib/cmake/README.md
@@ -26,7 +26,7 @@ The CMake files in this directory can build the core TensorFlow runtime, an
example C++ binary, and a PIP package containing the runtime and Python
bindings.
-### Prerequisites
+### Pre-requisites
* CMake version 3.5 or later.
@@ -34,16 +34,14 @@ bindings.
* [SWIG](http://www.swig.org/download.html)
-* Additional prerequisites for Microsoft Windows:
+* Additional pre-requisites for Microsoft Windows:
- Visual Studio 2015
- Python 3.5
+ - NumPy 1.11.0 or later
-* Additional prerequisites for Linux:
+* Additional pre-requisites for Linux:
- Python 2.7 or later
- [Docker](https://www.docker.com/) (for automated testing)
-
-* Python dependencies:
- - wheel
- NumPy 1.11.0 or later
### Known-good configurations
@@ -104,7 +102,7 @@ ops or APIs.
Step-by-step Windows build
==========================
-1. Install the prerequisites detailed above, and set up your environment.
+1. Install the pre-requisites detailed above, and set up your environment.
* The following commands assume that you are using the Windows Command
Prompt (`cmd.exe`). You will need to set up your environment to use the
diff --git a/tensorflow/contrib/cmake/external/grpc.cmake b/tensorflow/contrib/cmake/external/grpc.cmake
index 17f65999fa..a9f43a3ecb 100644
--- a/tensorflow/contrib/cmake/external/grpc.cmake
+++ b/tensorflow/contrib/cmake/external/grpc.cmake
@@ -35,7 +35,6 @@ else()
set(grpc_STATIC_LIBRARIES
${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgrpc++_unsecure.a
${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgrpc_unsecure.a
- ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/third_party/cares/cares/lib/libcares.a
${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgpr.a)
endif()
diff --git a/tensorflow/contrib/cmake/external/protobuf.cmake b/tensorflow/contrib/cmake/external/protobuf.cmake
index ab464bc99a..aba8a5244e 100644
--- a/tensorflow/contrib/cmake/external/protobuf.cmake
+++ b/tensorflow/contrib/cmake/external/protobuf.cmake
@@ -16,7 +16,7 @@ include (ExternalProject)
set(PROTOBUF_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/src)
set(PROTOBUF_URL https://github.com/google/protobuf.git)
-set(PROTOBUF_TAG b04e5cba356212e4e8c66c61bbe0c3a20537c5b9)
+set(PROTOBUF_TAG 396336eb961b75f03b25824fe86cf6490fb75e3a)
if(WIN32)
if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*")
diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake
index b3e5b30826..9f96a4b797 100644
--- a/tensorflow/contrib/cmake/tf_tests.cmake
+++ b/tensorflow/contrib/cmake/tf_tests.cmake
@@ -476,10 +476,6 @@ if (tensorflow_BUILD_CC_TESTS)
"${tensorflow_source_dir}/tensorflow/core/profiler/internal/advisor/*_test.cc"
)
- list(REMOVE_ITEM tf_test_src_simple
- ${tf_core_profiler_test_srcs}
- )
-
set(tf_test_lib tf_test_lib)
add_library(${tf_test_lib} STATIC ${tf_src_testlib})
diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py
index 9212b69700..f09d156832 100644
--- a/tensorflow/contrib/data/__init__.py
+++ b/tensorflow/contrib/data/__init__.py
@@ -40,7 +40,6 @@ See the @{$datasets$Importing Data} Programmer's Guide for an overview.
@@rejection_resample
@@scan
@@shuffle_and_repeat
-@@sliding_window_batch
@@sloppy_interleave
@@unbatch
@@ -73,9 +72,6 @@ from tensorflow.contrib.data.python.ops.readers import SqlDataset
from tensorflow.contrib.data.python.ops.resampling import rejection_resample
from tensorflow.contrib.data.python.ops.scan_ops import scan
from tensorflow.contrib.data.python.ops.shuffle_ops import shuffle_and_repeat
-from tensorflow.contrib.data.python.ops.sliding import sliding_window_batch
-from tensorflow.python.data.ops.iterator_ops import Iterator
-from tensorflow.python.ops.parsing_ops import parse_single_example_v2 as parse_single_example
# pylint: enable=unused-import
from tensorflow.python.util.all_util import remove_undocumented
diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index 2c4d4adfda..22418b38e3 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -498,23 +498,6 @@ py_test(
],
)
-tf_py_test(
- name = "slide_dataset_op_test",
- size = "small",
- srcs = ["slide_dataset_op_test.py"],
- additional_deps = [
- "//tensorflow/contrib/data/python/ops:dataset_ops",
- "//tensorflow/contrib/data/python/ops:transformation_ops",
- "//tensorflow/python:array_ops",
- "//tensorflow/python:client_testlib",
- "//tensorflow/python:dtypes",
- "//tensorflow/python:errors",
- "//tensorflow/python:math_ops",
- "//tensorflow/python:sparse_tensor",
- "//third_party/py/numpy",
- ],
-)
-
filegroup(
name = "all_files",
srcs = glob(
diff --git a/tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py
deleted file mode 100644
index 33c48e20be..0000000000
--- a/tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py
+++ /dev/null
@@ -1,242 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for the experimental input pipeline ops."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-
-from tensorflow.contrib.data.python.ops import sliding
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.platform import test
-
-
-class SlideDatasetTest(test.TestCase):
-
- def testSlideDataset(self):
- """Test an dataset that maps a TF function across its input elements."""
- components = (np.arange(7),
- np.array([[1, 2, 3]]) * np.arange(7)[:, np.newaxis],
- np.array(37.0) * np.arange(7))
-
- count = array_ops.placeholder(dtypes.int64, shape=[])
- window_size = array_ops.placeholder(dtypes.int64, shape=[])
- stride = array_ops.placeholder(dtypes.int64, shape=[])
-
- def _map_fn(x, y, z):
- return math_ops.square(x), math_ops.square(y), math_ops.square(z)
-
- # The pipeline is TensorSliceDataset -> MapDataset(square_3) ->
- # RepeatDataset(count) -> _SlideDataset(window_size, stride).
- iterator = (dataset_ops.Dataset.from_tensor_slices(components)
- .map(_map_fn)
- .repeat(count)
- .apply(sliding.sliding_window_batch(window_size, stride))
- .make_initializable_iterator())
- init_op = iterator.initializer
- get_next = iterator.get_next()
-
- self.assertEqual([[None] + list(c.shape[1:]) for c in components],
- [t.shape.as_list() for t in get_next])
-
- with self.test_session() as sess:
- # Slide over a finite input, where the window_size divides the
- # total number of elements.
- sess.run(init_op, feed_dict={count: 20, window_size: 14, stride: 7})
- # Same formula with convolution layer.
- num_batches = (20 * 7 - 14) // 7 + 1
- for i in range(num_batches):
- result = sess.run(get_next)
- for component, result_component in zip(components, result):
- for j in range(14):
- self.assertAllEqual(component[(i*7 + j) % 7]**2,
- result_component[j])
- with self.assertRaises(errors.OutOfRangeError):
- sess.run(get_next)
-
- # Slide over a finite input, where the window_size does not
- # divide the total number of elements.
- sess.run(init_op, feed_dict={count: 20, window_size: 17, stride: 9})
-
- num_batches = (20 * 7 - 17) // 9 + 1
- for i in range(num_batches):
- result = sess.run(get_next)
- for component, result_component in zip(components, result):
- for j in range(17):
- self.assertAllEqual(component[(i*9 + j) % 7]**2,
- result_component[j])
- with self.assertRaises(errors.OutOfRangeError):
- sess.run(get_next)
-
- # Slide over a finite input, which is less than window_size,
- # should fail straight away.
- sess.run(init_op, feed_dict={count: 1, window_size: 10, stride: 4})
- with self.assertRaises(errors.OutOfRangeError):
- sess.run(get_next)
-
- sess.run(init_op, feed_dict={count: 1, window_size: 10, stride: 8})
- with self.assertRaises(errors.OutOfRangeError):
- sess.run(get_next)
-
- # Slide over an empty input should fail straight away.
- sess.run(init_op, feed_dict={count: 0, window_size: 8, stride: 4})
- with self.assertRaises(errors.OutOfRangeError):
- sess.run(get_next)
-
- # Empty window_size should be an initialization time error.
- with self.assertRaises(errors.InvalidArgumentError):
- sess.run(init_op, feed_dict={count: 14, window_size: 0, stride: 0})
-
- # Invalid stride should be an initialization time error.
- with self.assertRaises(errors.InvalidArgumentError):
- sess.run(init_op, feed_dict={count: 14, window_size: 3, stride: 0})
- with self.assertRaises(errors.InvalidArgumentError):
- sess.run(init_op, feed_dict={count: 14, window_size: 3, stride: 3})
- with self.assertRaises(errors.InvalidArgumentError):
- sess.run(init_op, feed_dict={count: 14, window_size: 3, stride: 5})
-
- def assertSparseValuesEqual(self, a, b):
- self.assertAllEqual(a.indices, b.indices)
- self.assertAllEqual(a.values, b.values)
- self.assertAllEqual(a.dense_shape, b.dense_shape)
-
- def testSlideSparse(self):
-
- def _sparse(i):
- return sparse_tensor.SparseTensorValue(
- indices=[[0]], values=(i * [1]), dense_shape=[1])
-
- iterator = dataset_ops.Dataset.range(10).map(_sparse).apply(
- sliding.sliding_window_batch(5, 3)).make_initializable_iterator()
- init_op = iterator.initializer
- get_next = iterator.get_next()
-
- with self.test_session() as sess:
- sess.run(init_op)
- num_batches = (10 - 5) // 3 + 1
- for i in range(num_batches):
- actual = sess.run(get_next)
- expected = sparse_tensor.SparseTensorValue(
- indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]],
- values=[i * 3, i * 3 + 1, i * 3 + 2, i * 3 + 3, i * 3 + 4],
- dense_shape=[5, 1])
- self.assertTrue(sparse_tensor.is_sparse(actual))
- self.assertSparseValuesEqual(actual, expected)
- with self.assertRaises(errors.OutOfRangeError):
- sess.run(get_next)
-
- def testSlideSparseWithDifferentDenseShapes(self):
-
- def _sparse(i):
- return sparse_tensor.SparseTensorValue(
- indices=array_ops.expand_dims(
- math_ops.range(i, dtype=dtypes.int64), 1),
- values=array_ops.fill([math_ops.to_int32(i)], i),
- dense_shape=[i])
-
- iterator = dataset_ops.Dataset.range(10).map(_sparse).apply(
- sliding.sliding_window_batch(5, 3)).make_initializable_iterator()
- init_op = iterator.initializer
- get_next = iterator.get_next()
-
- with self.test_session() as sess:
- sess.run(init_op)
- num_batches = (10 - 5) // 3 + 1
- for i in range(num_batches):
- actual = sess.run(get_next)
- expected_indices = []
- expected_values = []
- for j in range(5):
- for k in range(i * 3 + j):
- expected_indices.append([j, k])
- expected_values.append(i * 3 + j)
- expected = sparse_tensor.SparseTensorValue(
- indices=expected_indices,
- values=expected_values,
- dense_shape=[5, i * 3 + 5 - 1])
- self.assertTrue(sparse_tensor.is_sparse(actual))
- self.assertSparseValuesEqual(actual, expected)
- with self.assertRaises(errors.OutOfRangeError):
- sess.run(get_next)
-
- def testNestedSlideSparse(self):
-
- def _sparse(i):
- return sparse_tensor.SparseTensorValue(
- indices=[[0]], values=(i * [1]), dense_shape=[1])
-
- iterator = (dataset_ops.Dataset.range(10)
- .map(_sparse)
- .apply(sliding.sliding_window_batch(4, 2))
- .apply(sliding.sliding_window_batch(3, 1))
- .make_initializable_iterator())
- init_op = iterator.initializer
- get_next = iterator.get_next()
-
- with self.test_session() as sess:
- sess.run(init_op)
- # Slide: 1st batch.
- actual = sess.run(get_next)
- expected = sparse_tensor.SparseTensorValue(
- indices=[[0, 0, 0], [0, 1, 0], [0, 2, 0], [0, 3, 0],
- [1, 0, 0], [1, 1, 0], [1, 2, 0], [1, 3, 0],
- [2, 0, 0], [2, 1, 0], [2, 2, 0], [2, 3, 0]],
- values=[0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7],
- dense_shape=[3, 4, 1])
- self.assertTrue(sparse_tensor.is_sparse(actual))
- self.assertSparseValuesEqual(actual, expected)
- # Slide: 2nd batch.
- actual = sess.run(get_next)
- expected = sparse_tensor.SparseTensorValue(
- indices=[[0, 0, 0], [0, 1, 0], [0, 2, 0], [0, 3, 0],
- [1, 0, 0], [1, 1, 0], [1, 2, 0], [1, 3, 0],
- [2, 0, 0], [2, 1, 0], [2, 2, 0], [2, 3, 0]],
- values=[2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8, 9],
- dense_shape=[3, 4, 1])
- self.assertTrue(sparse_tensor.is_sparse(actual))
- self.assertSparseValuesEqual(actual, expected)
- with self.assertRaises(errors.OutOfRangeError):
- sess.run(get_next)
-
- def testSlideShapeError(self):
-
- def generator():
- yield [1.0, 2.0, 3.0]
- yield [4.0, 5.0, 6.0]
- yield [7.0, 8.0, 9.0, 10.0]
-
- iterator = (dataset_ops.Dataset.from_generator(generator, dtypes.float32,
- output_shapes=[None])
- .apply(sliding.sliding_window_batch(3, 1))
- .make_initializable_iterator())
- next_element = iterator.get_next()
-
- with self.test_session() as sess:
- sess.run(iterator.initializer)
- with self.assertRaisesRegexp(
- errors.InvalidArgumentError,
- r"Cannot batch tensors with different shapes in component 0. "
- r"First element had shape \[3\] and element 2 had shape \[4\]."):
- sess.run(next_element)
-
-
-if __name__ == "__main__":
- test.main()
diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD
index c3331e9636..f03430c5c5 100644
--- a/tensorflow/contrib/data/python/ops/BUILD
+++ b/tensorflow/contrib/data/python/ops/BUILD
@@ -106,7 +106,6 @@ py_library(
"interleave_ops.py",
"resampling.py",
"scan_ops.py",
- "sliding.py",
"stats_ops.py",
"threadpool.py",
"unique.py",
diff --git a/tensorflow/contrib/data/python/ops/sliding.py b/tensorflow/contrib/data/python/ops/sliding.py
deleted file mode 100644
index 19cc3cb89f..0000000000
--- a/tensorflow/contrib/data/python/ops/sliding.py
+++ /dev/null
@@ -1,102 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Sliding dataset transformations."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.data.util import nest
-from tensorflow.python.data.util import sparse
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.ops import gen_dataset_ops
-
-
-class _SlideDataset(dataset_ops.Dataset):
- """A `Dataset` that passes a sliding window over its input."""
-
- def __init__(self, input_dataset, window_size, stride=1):
- """See `sliding_window_batch` for details."""
- super(_SlideDataset, self).__init__()
- self._input_dataset = input_dataset
- self._window_size = ops.convert_to_tensor(
- window_size, dtype=dtypes.int64, name="window_size")
- self._stride = ops.convert_to_tensor(
- stride, dtype=dtypes.int64, name="stride")
-
- def _as_variant_tensor(self):
- return gen_dataset_ops.slide_dataset(
- self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access
- window_size=self._window_size,
- stride=self._stride,
- output_shapes=nest.flatten(
- sparse.as_dense_shapes(self.output_shapes, self.output_classes)),
- output_types=nest.flatten(
- sparse.as_dense_types(self.output_types, self.output_classes)))
-
- @property
- def output_classes(self):
- return self._input_dataset.output_classes
-
- @property
- def output_shapes(self):
- input_shapes = self._input_dataset.output_shapes
- return nest.pack_sequence_as(input_shapes, [
- tensor_shape.vector(None).concatenate(s)
- for s in nest.flatten(self._input_dataset.output_shapes)
- ])
-
- @property
- def output_types(self):
- return self._input_dataset.output_types
-
-
-def sliding_window_batch(window_size, stride=1):
- """A sliding window with size of `window_size` and step of `stride`.
-
- This transformation passes a sliding window over this dataset. The
- window size is `window_size` and step size is `stride`. If the left
- elements cannot fill up the sliding window, this transformation will
- drop the final smaller element. For example:
-
- ```python
- # NOTE: The following examples use `{ ... }` to represent the
- # contents of a dataset.
- a = { [1], [2], [3], [4], [5], [6] }
-
- a.apply(tf.contrib.data.sliding_window_batch(window_size=3, stride=2)) ==
- {
- [[1], [2], [3]],
- [[3], [4], [5]],
- }
- ```
-
- Args:
- window_size: A `tf.int64` scalar `tf.Tensor`, representing the number of
- elements in the sliding window.
- stride: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the
- steps moving the sliding window forward for one iteration. The default
- is `1`. It must be in `[1, window_size)`.
-
- Returns:
- A `Dataset` transformation function, which can be passed to
- @{tf.data.Dataset.apply}.
- """
- def _apply_fn(dataset):
- return _SlideDataset(dataset, window_size, stride)
-
- return _apply_fn
diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD
index 1bd73ee704..6bd3f5f09b 100644
--- a/tensorflow/contrib/distributions/BUILD
+++ b/tensorflow/contrib/distributions/BUILD
@@ -454,7 +454,6 @@ cuda_py_test(
"//tensorflow/python:framework_test_lib",
"//tensorflow/python:platform_test",
],
- tags = ["no_windows"], # TODO: needs investigation on Windows
)
cuda_py_test(
@@ -1144,7 +1143,6 @@ cuda_py_test(
"//tensorflow/python:math_ops",
"//tensorflow/python:platform_test",
],
- tags = ["no_windows"], # TODO: needs investigation on Windows
)
cuda_py_test(
diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD
index eb810e06dd..32aa2c0a4a 100644
--- a/tensorflow/contrib/eager/python/BUILD
+++ b/tensorflow/contrib/eager/python/BUILD
@@ -267,10 +267,7 @@ cuda_py_test(
"//tensorflow/python/eager:test",
"//tensorflow/python/keras",
],
- tags = [
- "no_windows", # TODO: needs investigation on Windows
- "notsan",
- ],
+ tags = ["notsan"],
)
filegroup(
diff --git a/tensorflow/contrib/eager/python/examples/linear_regression/BUILD b/tensorflow/contrib/eager/python/examples/linear_regression/BUILD
index 2f6cfdf31e..f86331af6f 100644
--- a/tensorflow/contrib/eager/python/examples/linear_regression/BUILD
+++ b/tensorflow/contrib/eager/python/examples/linear_regression/BUILD
@@ -22,7 +22,6 @@ cuda_py_test(
":linear_regression",
"//tensorflow:tensorflow_py",
],
- tags = ["no_windows"], # TODO: needs investigation on Windows
)
cuda_py_test(
diff --git a/tensorflow/contrib/factorization/BUILD b/tensorflow/contrib/factorization/BUILD
index ad8568ad44..90f10f1fa8 100644
--- a/tensorflow/contrib/factorization/BUILD
+++ b/tensorflow/contrib/factorization/BUILD
@@ -224,10 +224,7 @@ py_test(
srcs = ["python/ops/kmeans_test.py"],
shard_count = 4,
srcs_version = "PY2AND3",
- tags = [
- "nomac", # b/73741358
- "notsan", # b/67512932
- ],
+ tags = ["notsan"], # b/67512932
deps = [
":factorization_py",
":factorization_py_CYCLIC_DEPENDENCIES_THAT_NEED_TO_GO",
diff --git a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc
index 35341406a0..e61221a6b0 100644
--- a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc
+++ b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc
@@ -256,9 +256,6 @@ Status ReadInfoFile(const string& filename, uint32* width, uint32* height,
if (p != std::string::npos) {
string rgb24 = line.substr(p + 9, line.find(" ", p + 9));
rgb24 = rgb24.substr(0, rgb24.find(","));
- // Strip anything after " ", in case the format is
- // `640x360 [SAR 1:1 DAR 16:9]`
- rgb24 = rgb24.substr(0, rgb24.find(" "));
string rgb24_width = rgb24.substr(0, rgb24.find("x"));
string rgb24_height = rgb24.substr(rgb24_width.length() + 1);
if (strings::safe_strtou32(rgb24_width, &width_value) &&
@@ -273,10 +270,8 @@ Status ReadInfoFile(const string& filename, uint32* width, uint32* height,
// We only look for the first stream mapping to have the number of the
// frames.
// Once processed we will not further process stream mapping section.
- if (line.find("frame=") == 0) {
- // The format might be `frame= 166 ` or `frame=12488 `
- string number = line.substr(6);
- number = number.substr(number.find_first_not_of(" "));
+ if (line.find("frame= ") == 0) {
+ string number = line.substr(8, line.find(" ", 8));
number = number.substr(0, number.find(" "));
if (strings::safe_strtou32(number, &frames_value)) {
in_mapping = false;
diff --git a/tensorflow/contrib/gan/BUILD b/tensorflow/contrib/gan/BUILD
index ff6f3b7441..0eb0e3cbe2 100644
--- a/tensorflow/contrib/gan/BUILD
+++ b/tensorflow/contrib/gan/BUILD
@@ -354,7 +354,6 @@ py_test(
name = "classifier_metrics_test",
srcs = ["python/eval/python/classifier_metrics_test.py"],
srcs_version = "PY2AND3",
- tags = ["no_windows"], # TODO: needs investigation on Windows
deps = [
":classifier_metrics",
"//tensorflow/core:protos_all_py",
diff --git a/tensorflow/contrib/kafka/BUILD b/tensorflow/contrib/kafka/BUILD
index 1c3974871c..efb403462a 100644
--- a/tensorflow/contrib/kafka/BUILD
+++ b/tensorflow/contrib/kafka/BUILD
@@ -1,93 +1,66 @@
-package(default_visibility = ["//tensorflow:internal"])
+package(
+ default_visibility = ["//visibility:private"],
+)
licenses(["notice"]) # Apache 2.0
exports_files(["LICENSE"])
-load(
- "//tensorflow:tensorflow.bzl",
- "tf_gen_op_wrapper_py",
- "tf_kernel_library",
- "tf_custom_op_library",
- "tf_custom_op_py_library",
- "tf_gen_op_libs",
- "tf_py_test",
-)
-
-py_library(
- name = "kafka",
- srcs = ["__init__.py"],
- srcs_version = "PY2AND3",
- deps = [
- ":dataset_ops",
- ],
-)
-
-tf_custom_op_library(
- name = "_dataset_ops.so",
- srcs = ["ops/dataset_ops.cc"],
- deps = [":dataset_kernels"],
-)
-
-tf_gen_op_libs(
- op_lib_names = ["dataset_ops"],
-)
+load("//tensorflow:tensorflow.bzl", "tf_gen_op_libs")
+load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_py")
+load("//tensorflow:tensorflow.bzl", "tf_kernel_library")
+load("//tensorflow:tensorflow.bzl", "tf_py_test")
-cc_library(
- name = "dataset_kernels",
+tf_kernel_library(
+ name = "kafka_kernels",
srcs = ["kernels/kafka_dataset_ops.cc"],
+ visibility = ["//visibility:public"],
deps = [
- "//tensorflow/core:framework_headers_lib",
+ "//tensorflow/core:framework",
+ "//tensorflow/core:lib",
+ "//tensorflow/core:lib_internal",
+ "//tensorflow/core/kernels:bounds_check_lib",
+ "//tensorflow/core/kernels:dataset",
"//third_party/eigen3",
"@kafka",
- "@protobuf_archive//:protobuf_headers",
],
- alwayslink = 1,
)
-py_library(
- name = "dataset_ops",
- srcs = [
- "python/ops/kafka_dataset_ops.py",
- ],
- srcs_version = "PY2AND3",
+tf_gen_op_libs(
+ op_lib_names = ["kafka_ops"],
deps = [
- ":kafka_op_loader",
- "//tensorflow/python:dataset_ops_gen",
- "//tensorflow/python:util",
- "//tensorflow/python/data/ops:dataset_ops",
- "//tensorflow/python/data/util:nest",
+ "//tensorflow/core:lib",
],
)
tf_gen_op_wrapper_py(
- name = "gen_dataset_ops",
- out = "python/ops/gen_dataset_ops.py",
- deps = ["//tensorflow/contrib/kafka:dataset_ops_op_lib"],
-)
-
-tf_kernel_library(
- name = "dataset_ops_kernels",
- deps = [
- ":dataset_kernels",
- "//tensorflow/core:framework",
- ],
- alwayslink = 1,
+ name = "gen_kafka_ops",
+ out = "python/ops/gen_kafka_ops.py",
+ require_shape_functions = True,
+ deps = [":kafka_ops_op_lib"],
)
-tf_custom_op_py_library(
- name = "kafka_op_loader",
- srcs = ["python/ops/kafka_op_loader.py"],
- dso = ["//tensorflow/contrib/kafka:_dataset_ops.so"],
- kernels = [
- ":dataset_ops_kernels",
- "//tensorflow/contrib/kafka:dataset_ops_op_lib",
+py_library(
+ name = "kafka",
+ srcs = [
+ "__init__.py",
+ "python/ops/kafka_dataset_ops.py",
],
srcs_version = "PY2AND3",
+ visibility = ["//visibility:public"],
deps = [
- ":gen_dataset_ops",
+ ":gen_kafka_ops",
"//tensorflow/contrib/util:util_py",
+ "//tensorflow/python:array_ops",
+ "//tensorflow/python:control_flow_ops",
+ "//tensorflow/python:framework",
+ "//tensorflow/python:framework_for_generated_wrappers",
"//tensorflow/python:platform",
+ "//tensorflow/python:state_ops",
+ "//tensorflow/python:training",
+ "//tensorflow/python/data/ops:dataset_ops",
+ "//tensorflow/python/data/ops:iterator_ops",
+ "//tensorflow/python/data/ops:readers",
],
)
@@ -115,7 +88,6 @@ tf_py_test(
],
tags = [
"manual",
- "no_windows",
"notap",
],
)
@@ -123,9 +95,7 @@ tf_py_test(
filegroup(
name = "all_files",
srcs = glob(
- include = [
- "**/*",
- ],
+ ["**/*"],
exclude = [
"**/METADATA",
"**/OWNERS",
diff --git a/tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc b/tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc
index a4cd4a2cc4..88ef5f3571 100644
--- a/tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc
+++ b/tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc
@@ -13,7 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
-#include "tensorflow/core/framework/dataset.h"
+#include "tensorflow/core/kernels/dataset.h"
+
+#include "tensorflow/core/framework/tensor.h"
#include "src-cpp/rdkafkacpp.h"
diff --git a/tensorflow/contrib/kafka/ops/dataset_ops.cc b/tensorflow/contrib/kafka/ops/dataset_ops.cc
deleted file mode 100644
index 8cdf16103b..0000000000
--- a/tensorflow/contrib/kafka/ops/dataset_ops.cc
+++ /dev/null
@@ -1,44 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/framework/common_shape_fns.h"
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/shape_inference.h"
-
-namespace tensorflow {
-
-REGISTER_OP("KafkaDataset")
- .Input("topics: string")
- .Input("servers: string")
- .Input("group: string")
- .Input("eof: bool")
- .Input("timeout: int64")
- .Output("handle: variant")
- .SetIsStateful()
- .SetShapeFn(shape_inference::ScalarShape)
- .Doc(R"doc(
-Creates a dataset that emits the messages of one or more Kafka topics.
-
-topics: A `tf.string` tensor containing one or more subscriptions,
- in the format of [topic:partition:offset:length],
- by default length is -1 for unlimited.
-servers: A list of bootstrap servers.
-group: The consumer group id.
-eof: If True, the kafka reader will stop on EOF.
-timeout: The timeout value for the Kafka Consumer to wait
- (in millisecond).
-)doc");
-
-} // namespace tensorflow
diff --git a/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py b/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py
index a1624614d1..8e51d27a34 100644
--- a/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py
+++ b/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py
@@ -17,9 +17,8 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
-from tensorflow.contrib.kafka.python.ops import kafka_op_loader # pylint: disable=unused-import
-from tensorflow.contrib.kafka.python.ops import gen_dataset_ops
-from tensorflow.python.data.ops.dataset_ops import Dataset
+from tensorflow.contrib.kafka.python.ops import gen_kafka_ops
+from tensorflow.python.data.ops.readers import Dataset
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_shape
@@ -59,8 +58,8 @@ class KafkaDataset(Dataset):
timeout, dtype=dtypes.int64, name="timeout")
def _as_variant_tensor(self):
- return gen_dataset_ops.kafka_dataset(self._topics, self._servers,
- self._group, self._eof, self._timeout)
+ return gen_kafka_ops.kafka_dataset(self._topics, self._servers, self._group,
+ self._eof, self._timeout)
@property
def output_classes(self):
diff --git a/tensorflow/contrib/kafka/python/ops/kafka_op_loader.py b/tensorflow/contrib/kafka/python/ops/kafka_op_loader.py
deleted file mode 100644
index ec2fdea962..0000000000
--- a/tensorflow/contrib/kafka/python/ops/kafka_op_loader.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Python helper for loading kafka ops and kernels."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.contrib.util import loader
-from tensorflow.python.platform import resource_loader
-
-_dataset_ops = loader.load_op_library(
- resource_loader.get_path_to_datafile("../../_dataset_ops.so"))
diff --git a/tensorflow/contrib/kfac/python/kernel_tests/BUILD b/tensorflow/contrib/kfac/python/kernel_tests/BUILD
index d1c449402a..146ae8b7e2 100644
--- a/tensorflow/contrib/kfac/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/kfac/python/kernel_tests/BUILD
@@ -114,7 +114,6 @@ py_test(
name = "utils_test",
srcs = ["utils_test.py"],
srcs_version = "PY2AND3",
- tags = ["no_windows"], # TODO: needs investigation on Windows
deps = [
"//tensorflow/contrib/kfac/python/ops:utils",
"//tensorflow/contrib/tpu",
diff --git a/tensorflow/contrib/labeled_tensor/BUILD b/tensorflow/contrib/labeled_tensor/BUILD
index 544065dac6..894e6f6946 100644
--- a/tensorflow/contrib/labeled_tensor/BUILD
+++ b/tensorflow/contrib/labeled_tensor/BUILD
@@ -70,7 +70,6 @@ py_test(
"python/ops/core_test.py",
],
srcs_version = "PY2AND3",
- tags = ["no_windows"], # TODO: needs investigation on Windows
deps = [
":_typecheck",
":core",
diff --git a/tensorflow/contrib/layers/BUILD b/tensorflow/contrib/layers/BUILD
index cc7bbabf21..852d06e1e3 100644
--- a/tensorflow/contrib/layers/BUILD
+++ b/tensorflow/contrib/layers/BUILD
@@ -188,7 +188,6 @@ py_test(
size = "small",
srcs = ["python/layers/normalization_test.py"],
srcs_version = "PY2AND3",
- tags = ["no_windows"], # TODO: needs investigation on Windows
deps = [
":layers_py",
"//tensorflow/contrib/framework:framework_py",
@@ -354,7 +353,6 @@ py_test(
size = "small",
srcs = ["python/ops/sparse_ops_test.py"],
srcs_version = "PY2AND3",
- tags = ["no_windows"], # TODO: needs investigation on Windows
deps = [
":layers_py",
"//tensorflow/python:array_ops",
diff --git a/tensorflow/contrib/layers/python/layers/embedding_ops.py b/tensorflow/contrib/layers/python/layers/embedding_ops.py
index ffa208540d..b62e3050cd 100644
--- a/tensorflow/contrib/layers/python/layers/embedding_ops.py
+++ b/tensorflow/contrib/layers/python/layers/embedding_ops.py
@@ -470,7 +470,7 @@ def embedding_lookup_unique(params, ids, name=None):
ids = ops.convert_to_tensor(ids)
shape = array_ops.shape(ids)
ids_flat = array_ops.reshape(
- ids, math_ops.reduce_prod(shape, keepdims=True))
+ ids, math_ops.reduce_prod(shape, keep_dims=True))
unique_ids, idx = array_ops.unique(ids_flat)
unique_embeddings = embedding_ops.embedding_lookup(params, unique_ids)
embeds_flat = array_ops.gather(unique_embeddings, idx)
diff --git a/tensorflow/contrib/learn/BUILD b/tensorflow/contrib/learn/BUILD
index b05f5eeaee..f837ca3265 100644
--- a/tensorflow/contrib/learn/BUILD
+++ b/tensorflow/contrib/learn/BUILD
@@ -5,8 +5,6 @@ licenses(["notice"]) # Apache 2.0
exports_files(["LICENSE"])
-load("//tensorflow:tensorflow.bzl", "py_test")
-
package(default_visibility = [
"//engedu/ml/tf_from_scratch:__pkg__",
"//tensorflow:internal",
@@ -117,7 +115,6 @@ py_test(
size = "small",
srcs = ["python/learn/learn_io/data_feeder_test.py"],
srcs_version = "PY2AND3",
- tags = ["no_windows"], # TODO: needs investigation on Windows
deps = [
":learn",
"//tensorflow/python:client_testlib",
@@ -173,7 +170,6 @@ tf_py_test(
"//tensorflow/python:variables",
"//tensorflow/python/estimator",
],
- tags = ["no_windows"], # TODO: needs investigation on Windows
)
py_test(
@@ -192,7 +188,6 @@ py_test(
size = "small",
srcs = ["python/learn/graph_actions_test.py"],
srcs_version = "PY2AND3",
- tags = ["no_windows"], # TODO: needs investigation on Windows
deps = [
":learn",
"//tensorflow/contrib/framework:framework_py",
@@ -431,10 +426,7 @@ py_test(
size = "medium",
srcs = ["python/learn/estimators/kmeans_test.py"],
srcs_version = "PY2AND3",
- tags = [
- "noasan", # b/73741358
- "nomac",
- ],
+ tags = ["noasan"],
deps = [
":learn",
"//tensorflow/python:array_ops",
@@ -593,7 +585,6 @@ py_test(
size = "small",
srcs = ["python/learn/learn_io/io_test.py"],
srcs_version = "PY2AND3",
- tags = ["no_windows"], # TODO: needs investigation on Windows
deps = [
":learn",
"//tensorflow/contrib/learn/python/learn/datasets",
@@ -823,7 +814,6 @@ py_test(
size = "small",
srcs = ["python/learn/utils/saved_model_export_utils_test.py"],
srcs_version = "PY2AND3",
- tags = ["no_windows"], # TODO: needs investigation on Windows
deps = [
":learn",
"//tensorflow/contrib/layers:layers_py",
diff --git a/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py b/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py
index 8f9811cf25..b3b067b8e1 100644
--- a/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py
+++ b/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py
@@ -61,7 +61,7 @@ def embedding_lookup(params, ids, name='embedding_lookup'):
ids = ops.convert_to_tensor(ids)
shape = array_ops_.shape(ids)
ids_flat = array_ops_.reshape(
- ids, math_ops.reduce_prod(shape, keepdims=True))
+ ids, math_ops.reduce_prod(shape, keep_dims=True))
embeds_flat = nn.embedding_lookup(params, ids_flat, name)
embed_shape = array_ops_.concat([shape, [-1]], 0)
embeds = array_ops_.reshape(embeds_flat, embed_shape)
diff --git a/tensorflow/contrib/lite/Makefile b/tensorflow/contrib/lite/Makefile
index b4504f246a..7f31629272 100644
--- a/tensorflow/contrib/lite/Makefile
+++ b/tensorflow/contrib/lite/Makefile
@@ -27,10 +27,10 @@ LIBDIR := $(MAKEFILE_DIR)/gen/lib/
GENDIR := $(MAKEFILE_DIR)/gen/obj/
# Settings for the host compiler.
-CXX := $(CC_PREFIX)gcc
+CXX := $(CC_PREFIX) gcc
CXXFLAGS := --std=c++11 -O3 -DNDEBUG
-CC := $(CC_PREFIX)gcc
-CFLAGS := -O3 -DNDEBUG
+CC := $(CC_PREFIX) gcc
+CFLAGS :=
LDOPTS :=
LDOPTS += -L/usr/local/lib
ARFLAGS := -r
@@ -57,11 +57,10 @@ LIBS := \
# If we're on Linux, also link in the dl library.
ifeq ($(HOST_OS),LINUX)
- LIBS += -ldl
+ LIBS += -ldl -lpthread
endif
include $(MAKEFILE_DIR)/ios_makefile.inc
-include $(MAKEFILE_DIR)/rpi_makefile.inc
# This library is the main target for this makefile. It will contain a minimal
# runtime that can be linked in to other programs.
diff --git a/tensorflow/contrib/lite/arena_planner.h b/tensorflow/contrib/lite/arena_planner.h
index f84b3dad95..58bc164619 100644
--- a/tensorflow/contrib/lite/arena_planner.h
+++ b/tensorflow/contrib/lite/arena_planner.h
@@ -33,7 +33,7 @@ class AllocationInfo;
// each tensor needs to be allocated and deallocated, and preallocates all the
// necessary memory (the PlanAllocations phase). It then assigns portions of
// this memory buffer to each tensor (the ExecuteAllocations phase). Tensors may
-// share some of the buffer if a tensor B is to be allocated after another tensor
+// share some of the bufer if a tensor B is to be allocated after another tensor
// A has been deallocated.
//
// If dynamic tensors are used the planning steps can be repeated during model
diff --git a/tensorflow/contrib/lite/build_rpi_lib.sh b/tensorflow/contrib/lite/build_rpi_lib.sh
deleted file mode 100755
index 3824b16412..0000000000
--- a/tensorflow/contrib/lite/build_rpi_lib.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/bash -x
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-set -e
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-cd "$SCRIPT_DIR/../../.."
-
-CC_PREFIX=arm-linux-gnueabihf- make -j 3 -f tensorflow/contrib/lite/Makefile TARGET=RPI TARGET_ARCH=armv7
diff --git a/tensorflow/contrib/lite/builtin_ops.h b/tensorflow/contrib/lite/builtin_ops.h
index ea3ae3489e..2218ea8eac 100644
--- a/tensorflow/contrib/lite/builtin_ops.h
+++ b/tensorflow/contrib/lite/builtin_ops.h
@@ -24,7 +24,7 @@ extern "C" {
#endif // __cplusplus
// The enum for builtin operators.
-// Note: CUSTOM and DELEGATE are 2 special ops which are not real builtin
+// Note: CUSTOM and DELEGATE are 2 special ops which are not real biultin
// ops.
typedef enum {
kTfLiteBuiltinAdd = 0,
diff --git a/tensorflow/contrib/lite/error_reporter.h b/tensorflow/contrib/lite/error_reporter.h
index 3c5f805f12..da193d2586 100644
--- a/tensorflow/contrib/lite/error_reporter.h
+++ b/tensorflow/contrib/lite/error_reporter.h
@@ -30,7 +30,7 @@ namespace tflite {
// va_list args;
// foo.Report("test %d", args); // where args is va_list
//
-// Subclass ErrorReporter to provide another reporting destination.
+// Sublclass ErrorReporter to provide another reporting destination.
// For example, if you have a GUI program, you might redirect to a buffer
// that drives a GUI error log box.
class ErrorReporter {
diff --git a/tensorflow/contrib/lite/g3doc/rpi.md b/tensorflow/contrib/lite/g3doc/rpi.md
deleted file mode 100644
index 7a3a231626..0000000000
--- a/tensorflow/contrib/lite/g3doc/rpi.md
+++ /dev/null
@@ -1,50 +0,0 @@
-# TensorFlow Lite for Raspberry Pi
-
-## Cross compiling
-### Installing toolchian
-This has been tested on Ubuntu 16.04.3 64bit and Tensorflow devel docker image [tensorflow/tensorflow:nightly-devel](https://hub.docker.com/r/tensorflow/tensorflow/tags/).
-
-To cross compiling TensorFlow Lite. First you should install the toolchain and libs.
-```bash
-sudo apt-get update
-sudo apt-get install crossbuild-essential-armhf
-```
-> If you are using docker, you may not use `sudo`
-
-### Building
-Clone this Tensorflow repository, Run this script at the root of the repository to download all the dependencies:
-> The Tensorflow repository is in `/tensorflow` if you are using `tensorflow/tensorflow:nightly-devel` docker image, just try it.
-```bash
-./tensorflow/contrib/lite/download_dependencies.sh
-```
-Note than you only need to to this once.
-
-You should then be able to compile:
-```bash
-./tensorflow/contrib/lite/build_rpi_lib.sh
-```
-
-This should compile a static library in:
-`tensorflow/contrib/lite/gen/lib/rpi_armv7/libtensorflow-lite.a`.
-
-## Native compiling
-This has been tested on Raspberry Pi 3b, Raspbian GNU/Linux 9.1 (stretch), gcc version 6.3.0 20170516 (Raspbian 6.3.0-18+rpi1).
-
-Log in to you RPI, install the toolchain.
-```bash
-sudo apt-get instal build-essential
-```
-
-First, clone this TensorFlow repository. Run this at the root of the repository:
-```bash
-./tensorflow/contrib/lite/download_dependencies.sh
-```
-Note than you only need to to this once.
-
-You should then be able to compile:
-```bash
-./tensorflow/contrib/lite/build_rpi_lib.sh
-```
-
-This should compile a static library in:
-`tensorflow/contrib/lite/gen/lib/rpi_armv7/libtensorflow-lite.a`.
diff --git a/tensorflow/contrib/lite/interpreter.h b/tensorflow/contrib/lite/interpreter.h
index 3749869f58..af143370ee 100644
--- a/tensorflow/contrib/lite/interpreter.h
+++ b/tensorflow/contrib/lite/interpreter.h
@@ -481,7 +481,7 @@ class Interpreter {
// During Invoke(), Interpreter will allocate input tensors first, which are
// known to be fixed size. Then it will allocate outputs from nodes as many
// as possible. When there is a node that produces dynamic sized tensor.
- // Interpreter will stop allocating tensors, set the value of next allocate
+ // Intepreter will stop allocating tensors, set the value of next allocate
// node id, and execute the node to generate the output tensor before continue
// to allocate successors. This process repeats until all nodes are executed.
// NOTE: this relies on the order of nodes that is in topological order.
diff --git a/tensorflow/contrib/lite/interpreter_test.cc b/tensorflow/contrib/lite/interpreter_test.cc
index 72d4acedbe..7a029c7df8 100644
--- a/tensorflow/contrib/lite/interpreter_test.cc
+++ b/tensorflow/contrib/lite/interpreter_test.cc
@@ -40,7 +40,7 @@ TEST(BasicInterpreter, InvokeInvalidModel) {
ASSERT_EQ(interpreter.Invoke(), kTfLiteOk);
}
-// Test size accessor functions.
+// Test size accesser functions.
TEST(BasicInterpreter, TestSizeFunctions) {
Interpreter interpreter;
int base_index;
diff --git a/tensorflow/contrib/lite/kernels/conv.cc b/tensorflow/contrib/lite/kernels/conv.cc
index e0cd12f1b4..b91ba1a03d 100644
--- a/tensorflow/contrib/lite/kernels/conv.cc
+++ b/tensorflow/contrib/lite/kernels/conv.cc
@@ -64,7 +64,7 @@ struct OpData {
TfLitePaddingValues padding;
// The scaling factor from input to output (aka the 'real multiplier') can
- // be represented as a fixed point multiplier plus a left shift.
+ // be represented as a fixed point multipler plus a left shift.
int32_t output_multiplier;
int output_shift;
// The range of the fused activation layer. For example for kNone and
diff --git a/tensorflow/contrib/lite/kernels/depthwise_conv.cc b/tensorflow/contrib/lite/kernels/depthwise_conv.cc
index cad9ce114c..15dbfe08c8 100644
--- a/tensorflow/contrib/lite/kernels/depthwise_conv.cc
+++ b/tensorflow/contrib/lite/kernels/depthwise_conv.cc
@@ -52,7 +52,7 @@ enum KernelType {
struct OpData {
TfLitePaddingValues padding;
// The scaling factor from input to output (aka the 'real multiplier') can
- // be represented as a fixed point multiplier plus a left shift.
+ // be represented as a fixed point multipler plus a left shift.
int32_t output_multiplier;
int output_shift;
// The range of the fused activation layer. For example for kNone and
diff --git a/tensorflow/contrib/lite/kernels/fully_connected.cc b/tensorflow/contrib/lite/kernels/fully_connected.cc
index 888e67966c..a77fe94e49 100644
--- a/tensorflow/contrib/lite/kernels/fully_connected.cc
+++ b/tensorflow/contrib/lite/kernels/fully_connected.cc
@@ -48,7 +48,7 @@ enum KernelType {
struct OpData {
// The scaling factor from input to output (aka the 'real multiplier') can
- // be represented as a fixed point multiplier plus a left shift.
+ // be represented as a fixed point multipler plus a left shift.
int32_t output_multiplier;
int output_shift;
// The range of the fused activation layer. For example for kNone and
diff --git a/tensorflow/contrib/lite/kernels/kernel_util.h b/tensorflow/contrib/lite/kernels/kernel_util.h
index 21da1daff7..28f53b9fbb 100644
--- a/tensorflow/contrib/lite/kernels/kernel_util.h
+++ b/tensorflow/contrib/lite/kernels/kernel_util.h
@@ -58,7 +58,7 @@ inline bool IsConstantTensor(TfLiteTensor* tensor) {
}
// Determines whether tensor is dynamic. Note that a tensor can be non-const and
-// not dynamic. This function specifically checks for a dynamic tensor.
+// not dynamic. This function specificially checks for a dynamic tensor.
inline bool IsDynamicTensor(TfLiteTensor* tensor) {
return tensor->allocation_type == kTfLiteDynamic;
}
diff --git a/tensorflow/contrib/lite/kernels/lsh_projection.cc b/tensorflow/contrib/lite/kernels/lsh_projection.cc
index 0ee35775d5..5f73b56ed9 100644
--- a/tensorflow/contrib/lite/kernels/lsh_projection.cc
+++ b/tensorflow/contrib/lite/kernels/lsh_projection.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
-// LSH Projection projects an input to a bit vector via locality sensitive
+// LSH Projection projects an input to a bit vector via locality senstive
// hashing.
//
// Options:
diff --git a/tensorflow/contrib/lite/kernels/lstm.cc b/tensorflow/contrib/lite/kernels/lstm.cc
index 8cf1165135..b9255b23a5 100644
--- a/tensorflow/contrib/lite/kernels/lstm.cc
+++ b/tensorflow/contrib/lite/kernels/lstm.cc
@@ -213,9 +213,9 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
// present.
// 2) If projection weight is present, then projection bias is optional.
// TODO(ghodrat): make sure this is correct.
- const bool projection_tensors_consistent =
+ const bool projecton_tensors_consistent =
((projection_weights != nullptr) || (projection_bias == nullptr));
- TF_LITE_ENSURE(context, projection_tensors_consistent == true);
+ TF_LITE_ENSURE(context, projecton_tensors_consistent == true);
return kTfLiteOk;
}
@@ -357,7 +357,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const int n_output = recurrent_to_output_weights->dims->data[1];
// Since we have already checked that weights are all there or none, we can
- // check the existence of only one to get the condition.
+ // check the existense of only one to the get the condition.
const bool use_cifg = (input_to_input_weights == nullptr);
const bool use_peephole = (cell_to_output_weights != nullptr);
diff --git a/tensorflow/contrib/lite/kernels/reshape.cc b/tensorflow/contrib/lite/kernels/reshape.cc
index 438f70d311..f3e6ddc9f4 100644
--- a/tensorflow/contrib/lite/kernels/reshape.cc
+++ b/tensorflow/contrib/lite/kernels/reshape.cc
@@ -49,20 +49,20 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TfLiteIntArray* output_size = TfLiteIntArrayCreate(params->num_dimensions);
int num_output_elements = 1;
- int stretch_dim = -1;
+ int strech_dim = -1;
for (int i = 0; i < params->num_dimensions; ++i) {
int value = params->shape[i];
if (value == -1) {
- TF_LITE_ENSURE_EQ(context, stretch_dim, -1);
- stretch_dim = i;
+ TF_LITE_ENSURE_EQ(context, strech_dim, -1);
+ strech_dim = i;
} else {
num_output_elements *= value;
output_size->data[i] = value;
}
}
- if (stretch_dim != -1) {
- output_size->data[stretch_dim] = num_input_elements / num_output_elements;
- num_output_elements *= output_size->data[stretch_dim];
+ if (strech_dim != -1) {
+ output_size->data[strech_dim] = num_input_elements / num_output_elements;
+ num_output_elements *= output_size->data[strech_dim];
}
TF_LITE_ENSURE_EQ(context, num_input_elements, num_output_elements);
diff --git a/tensorflow/contrib/lite/kernels/reshape_test.cc b/tensorflow/contrib/lite/kernels/reshape_test.cc
index aecbd0399f..0fbcf6e6aa 100644
--- a/tensorflow/contrib/lite/kernels/reshape_test.cc
+++ b/tensorflow/contrib/lite/kernels/reshape_test.cc
@@ -60,7 +60,7 @@ TEST(ReshapeOpTest, TooManyDimensions) {
TEST(ReshapeOpTest, TooManySpecialDimensions) {
EXPECT_DEATH(ReshapeOpModel({1, 2, 4, 1}, {-1, -1, 2, 4}),
- "stretch_dim != -1");
+ "strech_dim != -1");
}
TEST(ReshapeOpTest, SimpleTest) {
diff --git a/tensorflow/contrib/lite/kernels/test_util.cc b/tensorflow/contrib/lite/kernels/test_util.cc
index 0bb28b50b2..373310bd87 100644
--- a/tensorflow/contrib/lite/kernels/test_util.cc
+++ b/tensorflow/contrib/lite/kernels/test_util.cc
@@ -141,8 +141,8 @@ void SingleOpModel::SetBuiltinOp(BuiltinOperator type,
void SingleOpModel::SetCustomOp(
const string& name, const std::vector<uint8_t>& custom_option,
- const std::function<TfLiteRegistration*()>& registration) {
- custom_registrations_[name] = registration;
+ const std::function<TfLiteRegistration*()>& registeration) {
+ custom_registrations_[name] = registeration;
opcodes_.push_back(
CreateOperatorCodeDirect(builder_, BuiltinOperator_CUSTOM, name.data()));
operators_.push_back(CreateOperator(
diff --git a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc
index 42941a97db..508a570e2e 100644
--- a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc
+++ b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc
@@ -360,7 +360,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const int n_output = recurrent_to_output_weights->dims->data[1];
// Since we have already checked that weights are all there or none, we can
- // check the existence of only one to get the condition.
+ // check the existense of only one to the get the condition.
const bool use_cifg = (input_to_input_weights == nullptr);
const bool use_peephole = (cell_to_output_weights != nullptr);
diff --git a/tensorflow/contrib/lite/memory_planner.h b/tensorflow/contrib/lite/memory_planner.h
index 0294ec815c..5cd6c20850 100644
--- a/tensorflow/contrib/lite/memory_planner.h
+++ b/tensorflow/contrib/lite/memory_planner.h
@@ -34,8 +34,8 @@ class MemoryPlanner {
// [first_node, last_node].
virtual TfLiteStatus ExecuteAllocations(int first_node, int last_node) = 0;
- // Invalidates allocations made earlier. This is called when tensors sizes
- // have changed. All planned allocations remain, but can't be used until
+ // Invalidates allocations made earliers. This is called when tensors sizes
+ // have change. All planned allocations remain, but can't be used until
// ExecuteAllocations() is called.
virtual TfLiteStatus ResetAllocations() = 0;
};
diff --git a/tensorflow/contrib/lite/model.h b/tensorflow/contrib/lite/model.h
index 38eea0e26b..51a622a28d 100644
--- a/tensorflow/contrib/lite/model.h
+++ b/tensorflow/contrib/lite/model.h
@@ -81,7 +81,7 @@ class FlatBufferModel {
const tflite::Model* model_spec,
ErrorReporter* error_reporter = DefaultErrorReporter());
- // Releases memory or unmaps mmaped memory.
+ // Releases memory or unmaps mmaped meory.
~FlatBufferModel();
// Copying or assignment is disallowed to simplify ownership semantics.
diff --git a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
index bd49d327c9..76032771af 100644
--- a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
+++ b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
@@ -569,7 +569,7 @@ enum {
ANEURALNETWORKS_LOGISTIC = 14,
/**
- * Projects an input to a bit vector via locality sensitive hashing.
+ * Projects an input to a bit vector via locality senstive hashing.
*
* Inputs:
* * 0: Hash functions. Dim.size == 2, DataType: Float.
diff --git a/tensorflow/contrib/lite/rpi_makefile.inc b/tensorflow/contrib/lite/rpi_makefile.inc
deleted file mode 100644
index 832ef5824b..0000000000
--- a/tensorflow/contrib/lite/rpi_makefile.inc
+++ /dev/null
@@ -1,33 +0,0 @@
-# Settings for Raspberry Pi.
-ifeq ($(TARGET), RPI)
- ifeq ($(TARGET_ARCH), armv7)
- CXXFLAGS += \
- -march=armv7-a \
- -mfpu=neon-vfpv4 \
- -funsafe-math-optimizations \
- -ftree-vectorize
-
- CCFLAGS += \
- -march=armv7-a \
- -mfpu=neon-vfpv4 \
- -funsafe-math-optimizations \
- -ftree-vectorize
-
- LDFLAGS := \
- -Wl,--no-export-dynamic \
- -Wl,--exclude-libs,ALL \
- -Wl,--gc-sections \
- -Wl,--as-needed
- endif
-
- LIBS := \
- -lstdc++ \
- -lpthread \
- -lm \
- -ldl
-
- OBJDIR := $(OBJDIR)rpi_$(TARGET_ARCH)/
- LIBDIR := $(LIBDIR)rpi_$(TARGET_ARCH)/
- BINDIR := $(BINDIR)rpi_$(TARGET_ARCH)/
- DEPDIR := $(DEPDIR)rpi_$(TARGET_ARCH)/
-endif
diff --git a/tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc b/tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc
index 640972de77..08bcfe4516 100644
--- a/tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc
+++ b/tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc
@@ -46,7 +46,7 @@ extern "C" {
#endif // __cplusplus
// The enum for builtin operators.
-// Note: CUSTOM and DELEGATE are 2 special ops which are not real builtin
+// Note: CUSTOM and DELEGATE are 2 special ops which are not real biultin
// ops.
typedef enum {
)";
diff --git a/tensorflow/contrib/lite/simple_memory_arena.cc b/tensorflow/contrib/lite/simple_memory_arena.cc
index 2f2004f56b..4aab244989 100644
--- a/tensorflow/contrib/lite/simple_memory_arena.cc
+++ b/tensorflow/contrib/lite/simple_memory_arena.cc
@@ -113,21 +113,21 @@ TfLiteStatus SimpleMemoryArena::Commit(TfLiteContext* context) {
underlying_buffer_size_ = required_size;
underlying_buffer_aligned_ptr_ = new_underlying_buffer_aligned_ptr;
}
- committed_ = true;
+ commited_ = true;
return underlying_buffer_ != nullptr ? kTfLiteOk : kTfLiteError;
}
TfLiteStatus SimpleMemoryArena::ResolveAlloc(TfLiteContext* context,
const ArenaAlloc& alloc,
char** output_ptr) {
- TF_LITE_ENSURE(context, committed_);
+ TF_LITE_ENSURE(context, commited_);
TF_LITE_ENSURE(context, output_ptr != nullptr);
*output_ptr = underlying_buffer_aligned_ptr_ + alloc.offset;
return kTfLiteOk;
}
TfLiteStatus SimpleMemoryArena::Clear() {
- committed_ = false;
+ commited_ = false;
high_water_mark_ = 0;
allocs_.clear();
return kTfLiteOk;
diff --git a/tensorflow/contrib/lite/simple_memory_arena.h b/tensorflow/contrib/lite/simple_memory_arena.h
index 5faf78b59e..0535522374 100644
--- a/tensorflow/contrib/lite/simple_memory_arena.h
+++ b/tensorflow/contrib/lite/simple_memory_arena.h
@@ -22,7 +22,7 @@ limitations under the License.
namespace tflite {
// This little structure holds the offset and the size for a dynamic memory
-// allocation in the memory arena. When the arena is committed and the
+// allocation in the memory arena. When the arena is commited and the
// underlying buffer is set, the alloc can be resolved into an actual memory
// pointer.
struct ArenaAlloc {
@@ -43,7 +43,7 @@ struct ArenaAlloc {
class SimpleMemoryArena {
public:
explicit SimpleMemoryArena(size_t arena_alignment)
- : committed_(false),
+ : commited_(false),
arena_alignment_(arena_alignment),
high_water_mark_(0),
underlying_buffer_size_(0),
@@ -73,7 +73,7 @@ class SimpleMemoryArena {
}
private:
- bool committed_;
+ bool commited_;
size_t arena_alignment_;
size_t high_water_mark_;
std::unique_ptr<char[]> underlying_buffer_;
diff --git a/tensorflow/contrib/lookup/BUILD b/tensorflow/contrib/lookup/BUILD
index 0a6edc33c5..8ca03f4193 100644
--- a/tensorflow/contrib/lookup/BUILD
+++ b/tensorflow/contrib/lookup/BUILD
@@ -46,7 +46,6 @@ tf_py_test(
"//tensorflow/python:variables",
],
grpc_enabled = True,
- tags = ["no_windows"], # TODO: needs investigation on Windows
)
filegroup(
diff --git a/tensorflow/contrib/makefile/README.md b/tensorflow/contrib/makefile/README.md
index 6c3b02e12b..995230dfa8 100644
--- a/tensorflow/contrib/makefile/README.md
+++ b/tensorflow/contrib/makefile/README.md
@@ -194,8 +194,6 @@ with:
srcs = glob(["libs/arm64-v8a/*.so"]),
```
-If you are building for Android TV (Shield TV devices), replace "portrait" with "landscape" for android:screenOrientation in all four activities in tensorflow/examples/android/AndroidManifest.xml
-
Then run:
```bash
# Create dir for native libs
diff --git a/tensorflow/contrib/makefile/build_all_ios.sh b/tensorflow/contrib/makefile/build_all_ios.sh
index 9b148688c4..2d99791839 100755
--- a/tensorflow/contrib/makefile/build_all_ios.sh
+++ b/tensorflow/contrib/makefile/build_all_ios.sh
@@ -80,9 +80,10 @@ if [[ ! -z "${OPTIMIZE_FOR_GRAPH}" ]]; then
fi
else
echo "${PRNT_SLCTV_BIN} found. Using it"
+ ${PRNT_SLCTV_BIN} --graphs=${OPTIMIZE_FOR_GRAPH} > ${TOP_SRCDIR}/tensorflow/core/framework/ops_to_register.h
+
fi
- ${PRNT_SLCTV_BIN} --graphs=${OPTIMIZE_FOR_GRAPH} > ${TOP_SRCDIR}/tensorflow/core/framework/ops_to_register.h
fi
if [[ "${ONLY_MAKE_TENSORFLOW}" != "true" ]]; then
diff --git a/tensorflow/contrib/mpi/mpi_utils.h b/tensorflow/contrib/mpi/mpi_utils.h
index df055ff567..fa297c28cb 100644
--- a/tensorflow/contrib/mpi/mpi_utils.h
+++ b/tensorflow/contrib/mpi/mpi_utils.h
@@ -24,8 +24,6 @@ limitations under the License.
#include "tensorflow/core/lib/strings/str_util.h"
-// Skip MPI C++ bindings support, this matches the usage in other places
-#define OMPI_SKIP_MPICXX
#include "third_party/mpi/mpi.h"
#define MPI_CHECK(cmd) \
do { \
diff --git a/tensorflow/contrib/predictor/predictor_factories.py b/tensorflow/contrib/predictor/predictor_factories.py
index 6e77e934fe..04b5d5bdf1 100644
--- a/tensorflow/contrib/predictor/predictor_factories.py
+++ b/tensorflow/contrib/predictor/predictor_factories.py
@@ -53,7 +53,7 @@ def from_contrib_estimator(estimator,
`Estimator`.
"""
if isinstance(estimator, core_estimator.Estimator):
- raise TypeError('Expected estimator to be of type '
+ raise TypeError('Espected estimator to be of type '
'tf.contrib.learn.Estimator, but got type '
'tf.python.estimator.Estimator. You likely want to call '
'from_estimator.')
@@ -88,7 +88,7 @@ def from_estimator(estimator,
`Estimator`.
"""
if isinstance(estimator, contrib_estimator.Estimator):
- raise TypeError('Expected estimator to be of type '
+ raise TypeError('Espected estimator to be of type '
'tf.python.estimator.Estimator, but got type '
'tf.contrib.learn.Estimator. You likely want to call '
'from_contrib_estimator.')
diff --git a/tensorflow/contrib/py2tf/converters/BUILD b/tensorflow/contrib/py2tf/converters/BUILD
index 4bb6f76019..f624c42686 100644
--- a/tensorflow/contrib/py2tf/converters/BUILD
+++ b/tensorflow/contrib/py2tf/converters/BUILD
@@ -81,7 +81,6 @@ py_test(
name = "builtin_functions_test",
srcs = ["builtin_functions_test.py"],
srcs_version = "PY2AND3",
- tags = ["no_windows"], # TODO: needs investigation on Windows
deps = [
":test_lib",
"//tensorflow/python:client_testlib",
@@ -92,7 +91,6 @@ py_test(
name = "call_trees_test",
srcs = ["call_trees_test.py"],
srcs_version = "PY2AND3",
- tags = ["no_windows"], # TODO: needs investigation on Windows
deps = [
":test_lib",
"//tensorflow/contrib/py2tf/impl",
diff --git a/tensorflow/contrib/py2tf/converters/single_return.py b/tensorflow/contrib/py2tf/converters/single_return.py
index 1194b98f5e..90bc22008f 100644
--- a/tensorflow/contrib/py2tf/converters/single_return.py
+++ b/tensorflow/contrib/py2tf/converters/single_return.py
@@ -212,7 +212,7 @@ class DetectReturnInUnsupportedControlFlow(gast.NodeVisitor):
def __init__(self):
self.cant_return = False
- super(DetectReturnInUnsupportedControlFlow, self).__init__()
+ super(gast.NodeVisitor, self).__init__()
def visit_While(self, node):
self.cant_return = True
diff --git a/tensorflow/contrib/py2tf/utils/BUILD b/tensorflow/contrib/py2tf/utils/BUILD
index 8bc338e801..d029289f5a 100644
--- a/tensorflow/contrib/py2tf/utils/BUILD
+++ b/tensorflow/contrib/py2tf/utils/BUILD
@@ -83,7 +83,6 @@ py_test(
name = "py_func_test",
srcs = ["py_func_test.py"],
srcs_version = "PY2AND3",
- tags = ["no_windows"], # TODO: needs investigation on Windows
deps = [
":utils",
"//tensorflow/python:client_testlib",
diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms.py b/tensorflow/contrib/quantize/python/fold_batch_norms.py
index 1afcbb8504..b278265639 100644
--- a/tensorflow/contrib/quantize/python/fold_batch_norms.py
+++ b/tensorflow/contrib/quantize/python/fold_batch_norms.py
@@ -237,7 +237,7 @@ def _FindFusedBatchNorms(graph):
# The batch variance used during forward and backward prop is biased,
# i.e it is calculated as: V=sum(x(k)-mu)^2/N. For the moving average
# calculation, the variance is corrected by the term N/N-1 (Bessel's
- # correction). The variance tensor read from FuseBatchNorm has Bessel's
+ # correction). The variance tensor read from FuseBatchNorm has bessel's
# correction applied, so we undo it here.
scope, sep, _ = bn_op.name.rpartition('/')
g = ops.get_default_graph()
@@ -306,7 +306,7 @@ def _ComputeBatchNormCorrections(context, match, freeze_batch_norm_delay,
Args:
context: The scope under which we look for batch norm params
- match: Object containing required batch norm tensors for correction
+ match: Object containg required batch norm tensors for correction
computation.
freeze_batch_norm_delay: Delay in steps at which computation switches
from regular batch norm to frozen mean and variance.
diff --git a/tensorflow/contrib/quantize/python/quant_ops.py b/tensorflow/contrib/quantize/python/quant_ops.py
index a4f7b1b221..0a8e35080c 100644
--- a/tensorflow/contrib/quantize/python/quant_ops.py
+++ b/tensorflow/contrib/quantize/python/quant_ops.py
@@ -282,8 +282,8 @@ def _FakeQuantWithMinMaxVars(inputs, min_var, max_var, per_channel, num_bits,
Args:
inputs: a tensor containing values to be quantized.
min_var: a variable containing quantization range lower end(s).
- max_var: a variable containing quantization range upper end(s).
- per_channel: a boolean specifying whether to use per-channel quantization.
+ max_var: a variable containing quantization range lupper end(s).
+ per_channel: a boolean specifying whether to use per-channel quantizatioh.
num_bits: Number of bits to use for quantization, must be between 2 and 8.
narrow_range: Whether to use the narrow quantization range
[1; 2^num_bits - 1] or wide range [0; 2^num_bits - 1].
diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py
index ec721afbc8..0608ab9302 100644
--- a/tensorflow/contrib/quantize/python/quantize.py
+++ b/tensorflow/contrib/quantize/python/quantize.py
@@ -267,7 +267,7 @@ def _InsertQuantOp(context,
"""Inserts a quant op between a producer op and (multiple) consumer ops.
Args:
- context: Context where producer and consumer operations are nested.
+ context: Context w,here producer and consumer operations are nested.
name: Name for the new quantization op within the context.
producer: Producer operation of the pairs where quantization will be
inserted.
diff --git a/tensorflow/contrib/quantize/python/quantize_graph.py b/tensorflow/contrib/quantize/python/quantize_graph.py
index 5abdcd2475..5a3a74cec4 100644
--- a/tensorflow/contrib/quantize/python/quantize_graph.py
+++ b/tensorflow/contrib/quantize/python/quantize_graph.py
@@ -158,7 +158,7 @@ def experimental_create_training_graph(input_graph=None,
often fail.
Args:
- input_graph: The tf.Graph to be transformed, if None then defaults to the
+ input_graph: The tf.Graph to be transformed,if None then defaults to the
default graph.
weight_bits: Number of bits to use for quantizing weights.
activation_bits: Number of bits to use for quantizing activations.
diff --git a/tensorflow/contrib/quantize/python/quantize_parameterized_test.py b/tensorflow/contrib/quantize/python/quantize_parameterized_test.py
index db745aa562..0624cc878b 100644
--- a/tensorflow/contrib/quantize/python/quantize_parameterized_test.py
+++ b/tensorflow/contrib/quantize/python/quantize_parameterized_test.py
@@ -419,7 +419,7 @@ class QuantizeTest(test_util.TensorFlowTestCase):
normalizer_params=self._BatchNormParams(fused_batch_norm),
scope=scope)
- # Manually add a bypass (optional) and an activation.
+ # Manually add a bypass (optionaly) and an activation.
if with_bypass:
node = math_ops.add(inputs, node, name='test/Add')
@@ -470,7 +470,7 @@ class QuantizeTest(test_util.TensorFlowTestCase):
normalizer_params=self._BatchNormParams(fused_batch_norm),
scope=scope)
- # Manually add a bypass (optional) and an activation.
+ # Manually add a bypass (optionaly) and an activation.
if with_bypass:
node = math_ops.add(inputs, node, name='test/Add')
@@ -526,7 +526,7 @@ class QuantizeTest(test_util.TensorFlowTestCase):
normalizer_params=self._BatchNormParams(fused_batch_norm),
scope=scope)
- # Manually add a bypass (optional) and an activation.
+ # Manually add a bypass (optionaly) and an activation.
if with_bypass:
node = math_ops.add(inputs, node, name='test/Add')
@@ -565,7 +565,7 @@ class QuantizeTest(test_util.TensorFlowTestCase):
stddev: Standard deviation of normal variable.
Returns:
- An initialized that initializes with a truncated normal variable.
+ An initialized that initialzes with a truncated normal variable.
"""
return init_ops.truncated_normal_initializer(stddev=stddev)
diff --git a/tensorflow/contrib/quantize/python/quantize_test.py b/tensorflow/contrib/quantize/python/quantize_test.py
index b2e5707a6d..ef59475167 100644
--- a/tensorflow/contrib/quantize/python/quantize_test.py
+++ b/tensorflow/contrib/quantize/python/quantize_test.py
@@ -144,7 +144,7 @@ class QuantizeTest(test_util.TensorFlowTestCase):
stddev: Standard deviation of normal variable.
Returns:
- An initialized that initializes with a truncated normal variable.
+ An initialized that initialzes with a truncated normal variable.
"""
return init_ops.truncated_normal_initializer(stddev=stddev)
diff --git a/tensorflow/contrib/remote_fused_graph/pylib/BUILD b/tensorflow/contrib/remote_fused_graph/pylib/BUILD
index 54c66271cd..27f0a7f58f 100644
--- a/tensorflow/contrib/remote_fused_graph/pylib/BUILD
+++ b/tensorflow/contrib/remote_fused_graph/pylib/BUILD
@@ -38,6 +38,7 @@ py_test(
size = "small",
srcs = ["python/ops/remote_fused_graph_ops_test.py"],
srcs_version = "PY2AND3",
+ tags = ["no_windows"],
deps = [
":remote_fused_graph_ops_py",
"//tensorflow/core:protos_all_py",
diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
index 358b2eb02b..73f2607d84 100644
--- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py
+++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
@@ -2133,7 +2133,7 @@ class Conv1DLSTMCell(ConvLSTMCell):
def __init__(self, name="conv_1d_lstm_cell", **kwargs):
"""Construct Conv1DLSTM. See `ConvLSTMCell` for more details."""
- super(Conv1DLSTMCell, self).__init__(conv_ndims=1, name=name, **kwargs)
+ super(Conv1DLSTMCell, self).__init__(conv_ndims=1, **kwargs)
class Conv2DLSTMCell(ConvLSTMCell):
@@ -2144,7 +2144,7 @@ class Conv2DLSTMCell(ConvLSTMCell):
def __init__(self, name="conv_2d_lstm_cell", **kwargs):
"""Construct Conv2DLSTM. See `ConvLSTMCell` for more details."""
- super(Conv2DLSTMCell, self).__init__(conv_ndims=2, name=name, **kwargs)
+ super(Conv2DLSTMCell, self).__init__(conv_ndims=2, **kwargs)
class Conv3DLSTMCell(ConvLSTMCell):
@@ -2155,7 +2155,7 @@ class Conv3DLSTMCell(ConvLSTMCell):
def __init__(self, name="conv_3d_lstm_cell", **kwargs):
"""Construct Conv3DLSTM. See `ConvLSTMCell` for more details."""
- super(Conv3DLSTMCell, self).__init__(conv_ndims=3, name=name, **kwargs)
+ super(Conv3DLSTMCell, self).__init__(conv_ndims=3, **kwargs)
def _conv(args, filter_size, num_features, bias, bias_start=0.0):
diff --git a/tensorflow/contrib/saved_model/BUILD b/tensorflow/contrib/saved_model/BUILD
index b10757df47..245fe07f2b 100644
--- a/tensorflow/contrib/saved_model/BUILD
+++ b/tensorflow/contrib/saved_model/BUILD
@@ -53,7 +53,6 @@ py_test(
size = "small",
srcs = ["python/saved_model/reader_test.py"],
srcs_version = "PY2AND3",
- tags = ["no_windows"], # TODO: needs investigation on Windows
visibility = ["//visibility:private"],
deps = [
":saved_model_py",
diff --git a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py
index 6adbb8be40..03fe31abf7 100644
--- a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py
+++ b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py
@@ -299,13 +299,12 @@ class BeamSearchDecoder(decoder.Decoder):
"""
finished, start_inputs = self._finished, self._start_inputs
- dtype = nest.flatten(self._initial_cell_state)[0].dtype
log_probs = array_ops.one_hot( # shape(batch_sz, beam_sz)
array_ops.zeros([self._batch_size], dtype=dtypes.int32),
depth=self._beam_width,
- on_value=ops.convert_to_tensor(0.0, dtype=dtype),
- off_value=ops.convert_to_tensor(-np.Inf, dtype=dtype),
- dtype=dtype)
+ on_value=0.0,
+ off_value=-np.Inf,
+ dtype=nest.flatten(self._initial_cell_state)[0].dtype)
initial_state = BeamSearchDecoderState(
cell_state=self._initial_cell_state,
diff --git a/tensorflow/contrib/session_bundle/BUILD b/tensorflow/contrib/session_bundle/BUILD
index 3ad88a8a22..67011c8fef 100644
--- a/tensorflow/contrib/session_bundle/BUILD
+++ b/tensorflow/contrib/session_bundle/BUILD
@@ -165,7 +165,6 @@ py_test(
name = "gc_test",
srcs = ["gc_test.py"],
srcs_version = "PY2AND3",
- tags = ["no_windows"], # TODO: needs investigation on Windows
visibility = ["//visibility:private"],
deps = [
":gc",
diff --git a/tensorflow/contrib/slim/python/slim/data/BUILD b/tensorflow/contrib/slim/python/slim/data/BUILD
index 7aa1684839..5daabbd62e 100644
--- a/tensorflow/contrib/slim/python/slim/data/BUILD
+++ b/tensorflow/contrib/slim/python/slim/data/BUILD
@@ -61,7 +61,6 @@ py_test(
name = "dataset_data_provider_test",
srcs = ["dataset_data_provider_test.py"],
srcs_version = "PY2AND3",
- tags = ["no_windows"], # TODO: needs investigation on Windows
deps = [
":dataset",
":dataset_data_provider",
diff --git a/tensorflow/contrib/tensor_forest/BUILD b/tensorflow/contrib/tensor_forest/BUILD
index 07b6b1f142..1e4cc3f095 100644
--- a/tensorflow/contrib/tensor_forest/BUILD
+++ b/tensorflow/contrib/tensor_forest/BUILD
@@ -553,6 +553,7 @@ py_test(
srcs = ["client/random_forest_test.py"],
srcs_version = "PY2AND3",
tags = [
+ "no_windows",
"nomac", # b/63258195
"notsan",
],
diff --git a/tensorflow/contrib/tensorboard/BUILD b/tensorflow/contrib/tensorboard/BUILD
index db2e000ef8..d833744d0c 100644
--- a/tensorflow/contrib/tensorboard/BUILD
+++ b/tensorflow/contrib/tensorboard/BUILD
@@ -9,7 +9,6 @@ exports_files(["LICENSE"])
# For platform specific build config
load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library")
-load("//tensorflow:tensorflow.bzl", "py_test")
tf_proto_library(
name = "protos_all",
diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD
index 906cc3f034..c832c6f2e0 100644
--- a/tensorflow/contrib/tensorrt/BUILD
+++ b/tensorflow/contrib/tensorrt/BUILD
@@ -83,7 +83,6 @@ cc_library(
"kernels/trt_engine_op.h",
],
copts = tf_copts(),
- visibility = ["//visibility:public"],
deps = [
":trt_logging",
":trt_resources",
@@ -155,7 +154,6 @@ py_library(
deps = [
":trt_convert_py",
":trt_ops_py",
- "//tensorflow/python:errors",
],
)
diff --git a/tensorflow/contrib/tensorrt/README.md b/tensorflow/contrib/tensorrt/README.md
index 461e627e99..dfcce0fd00 100644
--- a/tensorflow/contrib/tensorrt/README.md
+++ b/tensorflow/contrib/tensorrt/README.md
@@ -2,8 +2,7 @@ Using TensorRT in TensorFlow
============================
This module provides necessary bindings and introduces TRT_engine_op
-operator that wraps a subgraph in TensorRT. This is still a work in progress
-but should be useable with most common graphs.
+operator that wraps a subgraph in TensorRT.
Compilation
-----------
@@ -16,10 +15,26 @@ configure script should find the necessary components from the system
automatically. If installed from tar packages, user has to set path to
location where the library is installed during configuration.
-```shell
+
+```
bazel build --config=cuda --config=opt //tensorflow/tools/pip_package:build_pip_package
bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/
```
After the installation of tensorflow package, TensorRT transformation
-will be available. An example use can be found in test/test_tftrt.py directory
+will be available. An example use is shown below.
+
+```python
+import tensorflow as tf
+import tensorflow.contrib.tensorrt as trt
+#... create and train or load model
+gdef = sess.graph.as_graph_def()
+trt_gdef = trt.create_inference_graph(
+ gdef, #original graph_def
+ ["output"], #name of output node(s)
+ max_batch_size, #maximum batch size to run the inference
+ max_workspace_size_bytes) # max memory for TensorRT to use
+tf.reset_default_graph()
+tf.import_graph_def(graph_def=trt_gdef)
+#...... run inference
+```
diff --git a/tensorflow/contrib/tensorrt/__init__.py b/tensorflow/contrib/tensorrt/__init__.py
index 140ad48282..fd551d70b4 100644
--- a/tensorflow/contrib/tensorrt/__init__.py
+++ b/tensorflow/contrib/tensorrt/__init__.py
@@ -18,18 +18,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
-from tensorflow.python.framework import errors
-
-# pylint: disable=unused-import,wildcard-import,g-import-not-at-top
-try:
- from tensorflow.contrib.tensorrt.python import *
-except errors.NotFoundError as e:
- no_trt_message = (
- '**** Failed to initialize TensorRT. This is either because the TensorRT'
- ' installation path is not in LD_LIBRARY_PATH, or because you do not have'
- ' it installed. If not installed, please go to'
- ' https://developer.nvidia.com/tensorrt to download and install'
- ' TensorRT ****')
- print(no_trt_message)
- raise e
-# pylint: enable=unused-import,wildcard-import,g-import-not-at-top
+# pylint: disable=unused-import,wildcard-import
+from tensorflow.contrib.tensorrt.python import *
+# pylint: enable=unused-import,wildcard-import
diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
index eea8c8efa2..970f810473 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
@@ -15,7 +15,6 @@ limitations under the License.
#include "tensorflow/contrib/tensorrt/convert/convert_graph.h"
-#include <list>
#include <map>
#include <set>
#include <unordered_map>
@@ -49,29 +48,13 @@ namespace tensorrt {
namespace convert {
namespace {
-bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) {
+static bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) {
// LINT.IfChange
// TODO(jie): Segmentation shouldn't associated with op name.
// Split it into a registration for each kernel.
static const std::set<string> candidate_ops = {
- "Identity",
- "Const",
- "Conv2D",
- "MaxPool",
- "BiasAdd",
- "Relu",
- "Add",
- "Mul",
- "Sub",
- "Rsqrt",
- "Pad",
- "Mean",
- "AvgPool",
- "ConcatV2",
- "DepthwiseConv2dNative",
- "FusedBatchNorm",
- "FusedBatchNormV2",
- // TODO(ben,jie): ...
+ "Identity", "Const", "Conv2D", "MaxPool", "BiasAdd", "Relu",
+ "Add", "Mul", "Sub", "Rsqrt", "Pad" // "Placeholder" ,"Mean"
};
// LINT.ThenChange(//tensorflow/contrib/tensorrt/convert/convert_nodes.h)
return candidate_ops.count(node_def.op());
@@ -86,8 +69,6 @@ void GetSubGraphIncomingEdges(const tensorflow::Graph& graph,
if (!subgraph_node_ids.count(edge->src()->id()) &&
!edge->src()->IsSource()) {
incoming_edges->insert(edge);
- } else {
- VLOG(2) << edge->src()->name() << " N, ";
}
}
}
@@ -101,10 +82,7 @@ void GetSubGraphOutgoingEdges(const tensorflow::Graph& graph,
for (const tensorflow::Edge* edge : node->out_edges()) {
if (!subgraph_node_ids.count(edge->dst()->id()) &&
!edge->dst()->IsSink()) {
- VLOG(2) << edge->dst()->name() << " Y, ";
outgoing_edges->insert(edge);
- } else {
- VLOG(2) << edge->dst()->name() << " N, ";
}
}
}
@@ -131,150 +109,74 @@ std::unordered_map<string, std::vector<int>> BuildTensorNameMap(
}
return result;
}
-// TODO(sami): convert references to pointers
-struct ConvertGraphParams {
- ConvertGraphParams(
- tensorflow::Graph& inp_graph,
- const std::vector<string>& output_node_names,
- const std::set<int>& subgraph_node_id_numbers,
- size_t max_supported_batch_size, size_t max_consumed_workspace_size_bytes,
- const tensorflow::grappler::GraphProperties& current_graph_properties,
- std::unordered_map<string, std::pair<int, string>>* output_edges,
- int engine_precision_mode)
- : graph(inp_graph),
- output_names(output_node_names),
- subgraph_node_ids(subgraph_node_id_numbers),
- max_batch_size(max_supported_batch_size),
- max_workspace_size_bytes(max_consumed_workspace_size_bytes),
- graph_properties(current_graph_properties),
- output_edge_map(output_edges),
- precision_mode(engine_precision_mode) {}
- tensorflow::Graph& graph;
- const std::vector<string>& output_names;
- const std::set<int>& subgraph_node_ids;
- size_t max_batch_size;
- size_t max_workspace_size_bytes;
- const tensorflow::grappler::GraphProperties& graph_properties;
- std::unordered_map<string, std::pair<int, string>>* output_edge_map;
- int precision_mode;
- std::vector<std::pair<int, int>> subgraph_inputs;
- std::vector<std::pair<int, int>> subgraph_outputs;
+
+tensorflow::Status ConvertSubGraphToTensorRT(
+ const std::vector<string>& output_names,
+ const std::set<int>& subgraph_node_ids,
+ size_t max_batch_size, // Max batch size that engine will be created for
+ // Max amount of memory that engine will be allowed to consume, in bytes
+ size_t max_workspace_size_bytes,
+ const tensorflow::grappler::GraphProperties& graph_properties,
+ tensorflow::Graph* graph) {
tensorflow::EdgeSet subgraph_incoming_edges;
- tensorflow::EdgeSet subgraph_outgoing_edges;
-};
+ GetSubGraphIncomingEdges(*graph, subgraph_node_ids, &subgraph_incoming_edges);
+
+ std::vector<std::pair<int, int>> subgraph_inputs;
-static tensorflow::Status FillSubGraphEdgeSets(ConvertGraphParams* p) {
- GetSubGraphIncomingEdges(p->graph, p->subgraph_node_ids,
- &p->subgraph_incoming_edges);
- for (const tensorflow::Edge* edge : p->subgraph_incoming_edges) {
- p->subgraph_inputs.push_back({edge->src()->id(), edge->src_output()});
+ // Collect inputs by looking for incoming edges
+ for (const tensorflow::Edge* edge : subgraph_incoming_edges) {
+ subgraph_inputs.push_back({edge->src()->id(), edge->src_output()});
}
- auto output_name_to_index_map = BuildTensorNameMap(p->output_names);
std::set<std::pair<int, int>> subgraph_outputs_set;
// Collect outputs referenced from output_names
- for (int node_id : p->subgraph_node_ids) {
- tensorflow::Node* node = p->graph.FindNodeId(node_id);
+ auto output_name_to_index_map = BuildTensorNameMap(output_names);
+ for (int node_id : subgraph_node_ids) {
+ tensorflow::Node* node = graph->FindNodeId(node_id);
if (output_name_to_index_map.count(node->name())) {
for (int index : output_name_to_index_map.at(node->name())) {
subgraph_outputs_set.insert({node_id, index});
}
}
}
- GetSubGraphOutgoingEdges(p->graph, p->subgraph_node_ids,
- &p->subgraph_outgoing_edges);
- for (const tensorflow::Edge* edge : p->subgraph_outgoing_edges) {
+ // Collect outputs referenced from outgoing edges
+ tensorflow::EdgeSet subgraph_outgoing_edges;
+ GetSubGraphOutgoingEdges(*graph, subgraph_node_ids, &subgraph_outgoing_edges);
+ for (const tensorflow::Edge* edge : subgraph_outgoing_edges) {
subgraph_outputs_set.insert({edge->src()->id(), edge->src_output()});
}
- p->subgraph_outputs.reserve(subgraph_outputs_set.size());
- p->subgraph_outputs.insert(p->subgraph_outputs.begin(),
- subgraph_outputs_set.begin(),
- subgraph_outputs_set.end());
- return tensorflow::Status::OK();
-};
-
-tensorflow::Status GetCalibNode(ConvertGraphParams* params) {
- TF_RETURN_IF_ERROR(FillSubGraphEdgeSets(params));
+ // Impose an ordering on the outputs
+ std::vector<std::pair<int, int>> subgraph_outputs(
+ subgraph_outputs_set.begin(), subgraph_outputs_set.end());
+ // Build TensorRT node and add it to the graph
tensorflow::NodeDef trt_node_def;
- SubGraphParams s(params->graph, params->subgraph_node_ids,
- params->subgraph_inputs, params->subgraph_outputs,
- params->max_batch_size, params->max_workspace_size_bytes,
- params->graph_properties, params->output_edge_map,
- &trt_node_def, params->precision_mode);
- TF_RETURN_IF_ERROR(InjectCalibrationNode(s));
+ TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRTNodeDef(
+ *graph, subgraph_node_ids, subgraph_inputs, subgraph_outputs,
+ max_batch_size, max_workspace_size_bytes, graph_properties,
+ &trt_node_def));
tensorflow::Status status;
- tensorflow::Node* trt_node = params->graph.AddNode(trt_node_def, &status);
-
- TF_RETURN_IF_ERROR(status);
-
- for (auto in_edge :
- params->subgraph_incoming_edges) { // loop over incoming edges and
- // attach them to calib node
- // tensorflow::Node* src_node = in_edge->src();
- auto src_output = in_edge->src_output();
- auto dst_node = in_edge->dst();
- auto dst_input = in_edge->dst_input();
- VLOG(1) << " update edge " << trt_node->name() << ":" << src_output
- << " -> " << dst_node->name() << ":" << dst_input;
- TF_RETURN_IF_ERROR(
- params->graph.UpdateEdge(trt_node, src_output, dst_node, dst_input));
- }
- return tensorflow::Status::OK();
-}
-
-tensorflow::Status ConvertSubGraphToTensorRT(ConvertGraphParams* params) {
- TF_RETURN_IF_ERROR(FillSubGraphEdgeSets(params));
- tensorflow::NodeDef trt_node_def;
-
- SubGraphParams s(params->graph, params->subgraph_node_ids,
- params->subgraph_inputs, params->subgraph_outputs,
- params->max_batch_size, params->max_workspace_size_bytes,
- params->graph_properties, params->output_edge_map,
- &trt_node_def, params->precision_mode);
- TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRTNodeDef(s));
- tensorflow::Status status;
- tensorflow::Node* trt_node = params->graph.AddNode(trt_node_def, &status);
-
- // AddNode does not wire edges.
- // Re-map incoming edges to use the new TRT node instead of the orig subgraph
- std::map<std::pair<int, int>, int> subgraph_edge_to_input_map;
- for (size_t i = 0; i < params->subgraph_inputs.size(); ++i) {
- subgraph_edge_to_input_map.insert({params->subgraph_inputs.at(i), i});
- }
- for (const tensorflow::Edge* edge : params->subgraph_incoming_edges) {
- std::pair<int, int> old_src = {edge->src()->id(), edge->src_output()};
- int new_src_output = subgraph_edge_to_input_map.at(old_src);
- params->graph.AddEdge(edge->src(), edge->src_output(), trt_node,
- new_src_output);
- params->graph.RemoveEdge(edge);
- }
-
- VLOG(2) << "new wiring edges: " << trt_node->in_edges().size();
- for (const tensorflow::Edge* edge : trt_node->in_edges()) {
- VLOG(2) << edge->src()->name() << " port: " << edge->src_output();
- }
-
+ tensorflow::Node* trt_node = graph->AddNode(trt_node_def, &status);
TF_RETURN_IF_ERROR(status);
// Re-map outgoing edges to use the new TRT node instead of the orig subgraph
std::map<std::pair<int, int>, int> subgraph_edge_to_output_map;
- for (size_t i = 0; i < params->subgraph_outputs.size(); ++i) {
- subgraph_edge_to_output_map.insert({params->subgraph_outputs.at(i), i});
+ for (size_t i = 0; i < subgraph_outputs.size(); ++i) {
+ subgraph_edge_to_output_map.insert({subgraph_outputs.at(i), i});
}
TF_RETURN_IF_ERROR(status);
- for (const tensorflow::Edge* edge : params->subgraph_outgoing_edges) {
+ for (const tensorflow::Edge* edge : subgraph_outgoing_edges) {
std::pair<int, int> old_src = {edge->src()->id(), edge->src_output()};
int new_src_output = subgraph_edge_to_output_map.at(old_src);
- TF_RETURN_IF_ERROR(params->graph.UpdateEdge(
- trt_node, new_src_output, edge->dst(), edge->dst_input()));
+ TF_RETURN_IF_ERROR(graph->UpdateEdge(trt_node, new_src_output, edge->dst(),
+ edge->dst_input()));
}
// Remove the original subgraph
- for (int node_id : params->subgraph_node_ids) {
- tensorflow::Node* node = params->graph.FindNodeId(node_id);
+ for (int node_id : subgraph_node_ids) {
+ tensorflow::Node* node = graph->FindNodeId(node_id);
// Don't remove the input placeholders
if (node->type_string() == "Placeholder") {
continue;
}
- params->graph.RemoveNode(node);
+ graph->RemoveNode(node);
}
return tensorflow::Status::OK();
}
@@ -292,39 +194,12 @@ tensorflow::Status BuildNodeMap(
}
} // namespace
-tensorflow::Status ConvertCalibGraphToInferGraph(
- const tensorflow::GraphDef& graph_def, tensorflow::GraphDef* infer_graph) {
- VLOG(0) << "Starting Calib Conversion";
- tensorflow::Graph graph(tensorflow::OpRegistry::Global());
- TF_RETURN_IF_ERROR(tensorflow::ConvertGraphDefToGraph(
- tensorflow::GraphConstructorOptions(), graph_def, &graph));
- // get calib nodes
- std::vector<tensorflow::Node*> calib_nodes;
- for (auto node : graph.op_nodes()) {
- if (node->type_string() == "TRTCalibOp") {
- VLOG(1) << "Found Calib Node";
- calib_nodes.push_back(node);
- }
- }
- VLOG(0) << "Num Calib nodes in graph= " << calib_nodes.size();
- if (calib_nodes.size() == 0)
- return tensorflow::errors::FailedPrecondition(
- "Graph doesn't contain any calibration nodes!."
- " Please generate calibration graph and run calibration first");
- for (auto n : calib_nodes) {
- TF_RETURN_IF_ERROR(
- tensorrt::convert::ConvertCalibrationNodeToEngineNode(graph, n));
- }
- graph.ToGraphDef(infer_graph);
- return tensorflow::Status::OK();
-}
tensorflow::Status ConvertGraphDefToTensorRT(
const tensorflow::GraphDef& graph_def,
const std::vector<string>& output_names, size_t max_batch_size,
- size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def,
- int precision_mode = FP32MODE, int minimum_segment_size = 3) {
- // optimization pass
+ size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def) {
+ // Optimization pass
tensorflow::grappler::GrapplerItem item;
item.fetch = output_names;
tensorflow::GraphDef gdef;
@@ -334,23 +209,16 @@ tensorflow::Status ConvertGraphDefToTensorRT(
tensorflow::grappler::LayoutOptimizer optimizer;
tensorflow::grappler::Cluster* cluster;
- // virtual cluster
+ // Virtual cluster
tensorflow::DeviceProperties device_properties;
-
device_properties.set_type("GPU");
device_properties.mutable_environment()->insert({"architecture", "6"});
cluster =
new tensorflow::grappler::VirtualCluster({{"/GPU:0", device_properties}});
- // single machine
- int num_cpu_cores = tensorflow::grappler::GetNumAvailableLogicalCPUCores();
- int num_gpus = tensorflow::grappler::GetNumAvailableGPUs();
- VLOG(2) << "cpu_cores: " << num_cpu_cores;
- VLOG(2) << "gpus: " << num_gpus;
-
TF_RETURN_IF_ERROR(optimizer.Optimize(cluster, item, &gdef));
- // constant folding
+ // Constant folding
item.graph = gdef;
tensorflow::grappler::ConstantFolding fold(nullptr);
TF_RETURN_IF_ERROR(fold.Optimize(nullptr, item, &gdef));
@@ -358,6 +226,7 @@ tensorflow::Status ConvertGraphDefToTensorRT(
// AJ refactoring shape inference through grappler/GraphProperties.
tensorflow::grappler::GraphProperties static_graph_properties(item);
TF_RETURN_IF_ERROR(static_graph_properties.InferStatically(false));
+
// Build full graph
tensorflow::FunctionLibraryDefinition flib(tensorflow::OpRegistry::Global(),
gdef.library());
@@ -374,7 +243,7 @@ tensorflow::Status ConvertGraphDefToTensorRT(
}
// TODO(sami): this should be passed as a knob!!!!
- segment_options.minimum_segment_size = minimum_segment_size;
+ segment_options.minimum_segment_size = 2;
tensorflow::tensorrt::segment::SegmentNodesVector segments;
TF_RETURN_IF_ERROR(tensorrt::segment::SegmentGraph(
gdef, IsTensorRTCandidate, segment_options, &segments));
@@ -383,37 +252,14 @@ tensorflow::Status ConvertGraphDefToTensorRT(
}
std::unordered_map<string, tensorflow::Node*> node_map;
TF_RETURN_IF_ERROR(BuildNodeMap(graph, &node_map));
- std::unordered_map<string, std::pair<int, string>> output_edge_map;
- int count = 0;
- float total_num_nodes_in_segments = 0.;
- for (auto s : segments) {
- total_num_nodes_in_segments += s.size();
- }
for (const std::set<string>& subgraph_node_names : segments) {
std::set<int> subgraph_node_ids;
- size_t max_mem_per_engine =
- max_workspace_size_bytes *
- ((float)subgraph_node_names.size() / total_num_nodes_in_segments);
- std::stringstream oss;
for (const string& node_name : subgraph_node_names) {
- oss << " " << node_name;
subgraph_node_ids.insert(node_map.at(node_name)->id());
}
- VLOG(2) << "Subgraph nodes" << oss.str();
- ConvertGraphParams p(graph, output_names, subgraph_node_ids, max_batch_size,
- max_mem_per_engine, static_graph_properties,
- &output_edge_map, precision_mode);
- if (precision_mode == INT8MODE) {
- TF_RETURN_IF_ERROR(GetCalibNode(&p));
- } else {
- tensorflow::Status status = ConvertSubGraphToTensorRT(&p);
- if (status != tensorflow::Status::OK()) {
- LOG(WARNING) << "subgraph conversion error for subgraph_index:" << count
- << " due to: \n"
- << status.ToString() << " SKIPPING......";
- }
- count++;
- }
+ TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRT(
+ output_names, subgraph_node_ids, max_batch_size,
+ max_workspace_size_bytes, static_graph_properties, &graph));
}
graph.ToGraphDef(new_graph_def);
return tensorflow::Status::OK();
diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.h b/tensorflow/contrib/tensorrt/convert/convert_graph.h
index e1596e89e2..154ad3f2e8 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_graph.h
+++ b/tensorflow/contrib/tensorrt/convert/convert_graph.h
@@ -28,11 +28,6 @@ namespace tensorflow {
namespace tensorrt {
namespace convert {
-// This method converts an already generated calibration graph which was used in
-// calibration runs to an inference graph
-tensorflow::Status ConvertCalibGraphToInferGraph(
- const tensorflow::GraphDef& graph_def, tensorflow::GraphDef* new_graph_def);
-
// max_batch_size: maximum batch size which can be used for inference for
// optimization targets inference run with max batch size.
// max_workspace_size_bytes: The upper bound of memory allowence for
@@ -40,8 +35,7 @@ tensorflow::Status ConvertCalibGraphToInferGraph(
tensorflow::Status ConvertGraphDefToTensorRT(
const tensorflow::GraphDef& graph_def,
const std::vector<string>& output_names, size_t max_batch_size,
- size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def,
- int precision_mode, int minimum_segment_size);
+ size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def);
} // namespace convert
} // namespace tensorrt
diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index 75a3c3d034..9ee717dd7f 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -24,10 +24,6 @@ limitations under the License.
#include <utility>
#include <vector>
-#include "tensorflow/contrib/tensorrt/log/trt_logger.h"
-#include "tensorflow/contrib/tensorrt/resources/trt_resource_manager.h"
-#include "tensorflow/contrib/tensorrt/resources/trt_resources.h"
-#include "tensorflow/core/framework/node_def.pb.h" // NOLINT
#include "tensorflow/core/framework/node_def_builder.h"
#include "tensorflow/core/framework/tensor_shape.pb.h" // NOLINT
#include "tensorflow/core/framework/types.h"
@@ -36,7 +32,6 @@ limitations under the License.
#include "tensorflow/core/graph/graph_constructor.h"
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/strings/str_util.h"
#include "tensorflow/core/lib/strings/strcat.h"
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/platform/tensor_coding.h"
@@ -44,6 +39,7 @@ limitations under the License.
#if GOOGLE_CUDA
#if GOOGLE_TENSORRT
+#include "tensorflow/contrib/tensorrt/log/trt_logger.h"
#include "tensorrt/include/NvInfer.h"
// Check if the types are equal. Cast to int first so that failure log message
@@ -53,7 +49,6 @@ limitations under the License.
namespace tensorflow {
namespace tensorrt {
namespace convert {
-using ::tensorflow::strings::StrCat;
namespace {
@@ -70,8 +65,7 @@ inline tensorflow::Status ConvertDType(tensorflow::DataType tf_dtype,
*trt_dtype = nvinfer1::DataType::kHALF;
break;
default:
- return tensorflow::errors::InvalidArgument(
- "Unsupported data type " + tensorflow::DataTypeString(tf_dtype));
+ return tensorflow::errors::InvalidArgument("Unsupported data type");
}
return tensorflow::Status::OK();
}
@@ -118,18 +112,6 @@ static std::vector<std::pair<int, int>> CreateSamePadding(
return padding;
}
-string GetCommonNameScope(const string& op_name_a, const string& op_name_b) {
- size_t last_scope_separator = 0;
- for (size_t i = 0; i < std::min(op_name_a.size(), op_name_b.size()); ++i) {
- if (op_name_a[i] != op_name_b[i]) {
- break;
- } else if (op_name_a[i] == '/') {
- last_scope_separator = i + 1;
- }
- }
- return op_name_a.substr(0, last_scope_separator);
-}
-
class TRT_ShapedWeights {
public:
TRT_ShapedWeights(tensorflow::DataType type, const void* values,
@@ -263,11 +245,6 @@ std::vector<int> TFAttrs::get<std::vector<int>>(string key) const {
}
template <>
-std::vector<string> TFAttrs::get<std::vector<string>>(string key) const {
- auto attr = this->at(key)->list().s();
- return std::vector<string>(attr.begin(), attr.end());
-}
-template <>
nvinfer1::Dims TFAttrs::get<nvinfer1::Dims>(string key) const {
auto values = this->get<std::vector<int>>(key);
nvinfer1::Dims dims;
@@ -289,17 +266,6 @@ tensorflow::DataType TFAttrs::get<tensorflow::DataType>(string key) const {
return this->at(key)->type();
}
-template <>
-float TFAttrs::get<float>(string key) const {
- return this->at(key)->f();
-}
-
-template <>
-bool TFAttrs::get<bool>(string key) const {
- return this->at(key)->b();
-}
-
-// TODO(jie): reorder4 & reorder2 should be merged?
template <typename T>
void Reorder4(nvinfer1::DimsNCHW shape, const T* idata,
nvinfer1::DimsNCHW istrides, T* odata,
@@ -317,87 +283,29 @@ void Reorder4(nvinfer1::DimsNCHW shape, const T* idata,
}
}
-template <typename T>
-void Reorder2(nvinfer1::DimsHW shape, const T* idata, nvinfer1::DimsHW istrides,
- T* odata, nvinfer1::DimsHW ostrides) {
- for (int h = 0; h < shape.h(); ++h) {
- for (int w = 0; w < shape.w(); ++w) {
- odata[h * ostrides.h() + w * ostrides.w()] =
- idata[h * ostrides.h() + w * ostrides.w()];
- }
- }
-}
-
-// TODO(jie): fallback to tensorflow!!
-void ReorderCKtoKC(const TRT_ShapedWeights& iweights,
- TRT_ShapedWeights* oweights) {
- int c = iweights.shape_.d[0];
- int k = iweights.shape_.d[1];
- oweights->shape_.d[0] = k;
- oweights->shape_.d[1] = c;
- nvinfer1::DimsHW istrides = {1, k};
- nvinfer1::DimsHW ostrides = {c, 1};
- switch (iweights.type_) {
- case tensorflow::DataType::DT_FLOAT: {
- Reorder2({k, c}, static_cast<float const*>(iweights.GetValues()),
- istrides,
- static_cast<float*>(const_cast<void*>(oweights->GetValues())),
- ostrides);
- break;
- }
- case tensorflow::DataType::DT_HALF: {
- Reorder2(
- {k, c}, static_cast<Eigen::half const*>(iweights.GetValues()),
- istrides,
- static_cast<Eigen::half*>(const_cast<void*>(oweights->GetValues())),
- ostrides);
- break;
- }
- default:
- LOG(FATAL) << "Unsupported type in reorder expected fp32 or fp16 but got "
- << DataTypeString(iweights.type_);
- }
-}
-
void ReorderRSCKToKCRS(const TRT_ShapedWeights& iweights,
- TRT_ShapedWeights* oweights, int num_groups) {
+ TRT_ShapedWeights* oweights) {
CHECK_EQ(iweights.type_, oweights->type_);
CHECK_EQ(iweights.size_bytes(), oweights->size_bytes());
int r = iweights.shape_.d[0];
int s = iweights.shape_.d[1];
- // TRT requires GKcRS, while TF depthwise has RSCK
- // where c=1, C=G
- VLOG(2) << "num_groups: " << num_groups;
- int c = iweights.shape_.d[2] / num_groups;
- VLOG(2) << "c" << iweights.shape_.d[2] << " then " << c;
- int k = iweights.shape_.d[3] * num_groups;
- VLOG(2) << "k" << iweights.shape_.d[3] << " then " << k;
- oweights->shape_.d[0] = k / num_groups;
- oweights->shape_.d[1] = c * num_groups;
+ int c = iweights.shape_.d[2];
+ int k = iweights.shape_.d[3];
+ oweights->shape_.d[0] = k;
+ oweights->shape_.d[1] = c;
oweights->shape_.d[2] = r;
oweights->shape_.d[3] = s;
nvinfer1::DimsNCHW istrides = {1, k, s * k * c, c * k};
nvinfer1::DimsNCHW ostrides = {c * r * s, r * s, s, 1};
switch (iweights.type_) {
- case tensorflow::DataType::DT_FLOAT: {
+ case tensorflow::DataType::DT_FLOAT:
Reorder4({k, c, r, s}, static_cast<float const*>(iweights.GetValues()),
istrides,
static_cast<float*>(const_cast<void*>(oweights->GetValues())),
ostrides);
break;
- }
- case tensorflow::DataType::DT_HALF: {
- Reorder4(
- {k, c, r, s}, static_cast<Eigen::half const*>(iweights.GetValues()),
- istrides,
- static_cast<Eigen::half*>(const_cast<void*>(oweights->GetValues())),
- ostrides);
- break;
- }
-
default:
- LOG(FATAL) << "Unsupported type, expected fp32 or fp16 but got "
- << DataTypeString(iweights.type_);
+ LOG(FATAL) << "!!!!!!!!!!!!!!!!!!!!!!!!broke!!!!!!!!!!!!";
}
}
@@ -415,11 +323,12 @@ inline std::shared_ptr<T> infer_object(T* obj) {
return std::shared_ptr<T>(obj, InferDeleter());
}
+// Logger for GIE info/warning/errors
class Converter;
using OpConverter =
std::function<tensorflow::Status(Converter&, const tensorflow::NodeDef&,
- const std::vector<TRT_TensorOrWeights>&,
+ std::vector<TRT_TensorOrWeights> const&,
std::vector<TRT_TensorOrWeights>*)>;
class Converter {
@@ -427,57 +336,34 @@ class Converter {
std::unordered_map<string, OpConverter> op_registry_;
nvinfer1::INetworkDefinition* trt_network_;
std::list<std::vector<uint8_t>> temp_bufs_;
- tensorflow::tensorrt::TRTWeightStore* weight_store_;
- bool fp16_;
+
void register_op_converters();
+
std::vector<TRT_TensorOrWeights> get_inputs(
const tensorflow::NodeDef& node_def) {
std::vector<TRT_TensorOrWeights> inputs;
- for (auto const& input_name : node_def.input()) {
- /*************************************************************************
- * TODO(jie) handle case 1) here
- * Normalizes the inputs and extracts associated metadata:
- * 1) Inputs can contain a colon followed by a suffix of characters.
- * That suffix may be a single number (e.g. inputName:1) or several
- * word characters separated from a number by a colon
- * (e.g. inputName:foo:1). The
- * latter case is used to denote inputs and outputs of functions.
- * 2) Control dependency inputs contain caret at the beginning and we
- * remove this and annotate the edge as a control dependency.
- ************************************************************************/
- string name = input_name[0] == '^' ? input_name.substr(1) : input_name;
- auto first = name.find_first_of(':');
- if (first != string::npos && first + 2 == name.size() &&
- name[first + 1] == '0')
- name.erase(first);
-
- VLOG(2) << "retrieve input: " << name;
- if (trt_tensors_.count(name)) {
- inputs.push_back(trt_tensors_.at(name));
- } else {
- LOG(FATAL) << "input: " << name << " not availabled for node at, "
- << node_def.name();
- }
+ for (const auto& input_name : node_def.input()) {
+ VLOG(2) << "Retrieve input: " << input_name;
+ inputs.push_back(trt_tensors_.at(input_name));
}
return inputs;
}
public:
- explicit Converter(nvinfer1::INetworkDefinition* trt_network,
- tensorflow::tensorrt::TRTWeightStore* ws, bool fp16)
- : trt_network_(trt_network), weight_store_(ws), fp16_(fp16) {
+ explicit Converter(nvinfer1::INetworkDefinition* trt_network)
+ : trt_network_(trt_network) {
this->register_op_converters();
}
- tensorflow::tensorrt::TRTWeightStore* weight_store() { return weight_store_; }
+
TRT_ShapedWeights get_temp_weights(tensorflow::DataType type,
nvinfer1::Dims shape) {
TRT_ShapedWeights weights(type, nullptr, shape);
// TODO(jie): check weights size_bytes. 0 means type error
- weight_store_->store_.push_back(std::vector<uint8_t>(weights.size_bytes()));
- weights.SetValues(weight_store_->store_.back().data());
+ temp_bufs_.push_back(std::vector<uint8_t>(weights.size_bytes()));
+ weights.SetValues(temp_bufs_.back().data());
return weights;
}
- bool isFP16() { return fp16_; };
+
TRT_ShapedWeights get_temp_weights_like(const TRT_ShapedWeights& weights) {
return this->get_temp_weights(weights.type_, weights.shape_);
}
@@ -496,7 +382,7 @@ class Converter {
TRT_TensorOrWeights output = outputs.at(i);
// TODO(jie): tf protobuf seems to be omitting the :0 suffix
string output_name = node_def.name();
- if (i != 0) output_name = StrCat(output_name, ":", i);
+ if (i != 0) output_name = output_name + ":" + std::to_string(i);
if (output.is_tensor()) {
output.tensor()->setName(output_name.c_str());
}
@@ -562,7 +448,7 @@ struct LambdaFactory {
switch (op) {
case OP_CATEGORY::RSQRT: {
VLOG(2) << "RSQRT GETS DONE";
- return [](T t) -> T { return 1.0 / sqrt(t); };
+ return [](T t) -> T { return 1.0 / std::sqrt(t); };
}
case OP_CATEGORY::NEG:
return [](T t) -> T { return -t; };
@@ -648,22 +534,6 @@ struct LambdaFactory {
}
};
-template <>
-std::function<Eigen::half(Eigen::half)> LambdaFactory::unary<Eigen::half>() {
- switch (op) {
- case OP_CATEGORY::RSQRT: {
- VLOG(2) << "RSQRT GETS DONE";
- return [](Eigen::half t) -> Eigen::half {
- return Eigen::half(1.0 / sqrt(float(t)));
- };
- }
- case OP_CATEGORY::NEG:
- return [](Eigen::half t) -> Eigen::half { return -t; };
- default:
- VLOG(2) << "Not supported op for unary: " << static_cast<int>(op);
- return nullptr;
- }
-}
tensorflow::Status UnaryCompute(const TRT_ShapedWeights& iweights,
TRT_ShapedWeights* oweights,
LambdaFactory unary_op) {
@@ -675,14 +545,6 @@ tensorflow::Status UnaryCompute(const TRT_ShapedWeights& iweights,
std::transform(inp, inp + iweights.count(), oup, unary_op.unary<float>());
break;
}
- case tensorflow::DataType::DT_HALF: {
- auto inp = static_cast<Eigen::half const*>(iweights.GetValues());
- auto oup =
- static_cast<Eigen::half*>(const_cast<void*>(oweights->GetValues()));
- std::transform(inp, inp + iweights.count(), oup,
- unary_op.unary<Eigen::half>());
- break;
- }
default:
return tensorflow::errors::Unimplemented(
"Data type not supported: " +
@@ -726,32 +588,6 @@ tensorflow::Status BinaryCompute(const TRT_ShapedWeights& iweights_l,
}
break;
}
- case tensorflow::DataType::DT_HALF: {
- auto inp_l = static_cast<const Eigen::half*>(iweights_l.GetValues());
- auto inp_r = static_cast<const Eigen::half*>(iweights_r.GetValues());
- auto oup =
- static_cast<Eigen::half*>(const_cast<void*>(oweights->GetValues()));
-
- if (iweights_l.count() != iweights_r.count()) {
- // We only supports broadcast of RankZero
- if (iweights_l.count() == 1) {
- VLOG(2) << "I bet it is not working!" << (*inp_l);
- std::transform(inp_r, inp_r + iweights_r.count(), oup,
- binary_op.broadcast_l<Eigen::half>(*inp_l));
- } else if (iweights_r.count() == 1) {
- VLOG(2) << "I bet it is not working!" << (*inp_r);
- std::transform(inp_l, inp_l + iweights_l.count(), oup,
- binary_op.broadcast_r<Eigen::half>(*inp_r));
- } else {
- return tensorflow::errors::Unimplemented(
- "Binary op with non-rankZero broadcast not supported");
- }
- } else {
- std::transform(inp_l, inp_l + iweights_l.count(), inp_r, oup,
- binary_op.binary<Eigen::half>());
- }
- break;
- }
default:
return tensorflow::errors::Unimplemented(
"Data type not supported: " +
@@ -763,7 +599,7 @@ tensorflow::Status BinaryCompute(const TRT_ShapedWeights& iweights_l,
tensorflow::Status ConstantFoldUnary(
Converter& ctx, const tensorflow::NodeDef& node_def,
- const std::vector<TRT_TensorOrWeights>& inputs,
+ std::vector<TRT_TensorOrWeights> const& inputs,
std::vector<TRT_TensorOrWeights>* outputs) {
TRT_ShapedWeights weights_input = inputs.at(0).weights();
@@ -777,12 +613,13 @@ tensorflow::Status ConstantFoldUnary(
CHECK_EQ(weights_input.type_,
TFAttrs(node_def).get<tensorflow::DataType>("T"));
+ // Maybe I should do a switch
LambdaFactory unary_op;
if (node_def.op() == "Rsqrt") {
// Compute rsqrt
unary_op.op = LambdaFactory::OP_CATEGORY::RSQRT;
auto ret = UnaryCompute(weights_input, &weights_output, unary_op);
- // Pass the output
+ // PAss the output
if (ret == tensorflow::Status::OK()) {
outputs->push_back(TRT_TensorOrWeights(weights_output));
}
@@ -794,11 +631,11 @@ tensorflow::Status ConstantFoldUnary(
}
// TODO(jie,ben) broadcast is needed yet not implemented
-// Let's get the simple stuff working first. Maybe we should fall back to TF
+// Let's get the simple stuff working first. Maybe we should fall bakc to TF
// approach for constant folding
tensorflow::Status ConstantFoldBinary(
Converter& ctx, const tensorflow::NodeDef& node_def,
- const std::vector<TRT_TensorOrWeights>& inputs,
+ std::vector<TRT_TensorOrWeights> const& inputs,
std::vector<TRT_TensorOrWeights>* outputs) {
TRT_ShapedWeights weights_input_l = inputs.at(0).weights();
TRT_ShapedWeights weights_input_r = inputs.at(1).weights();
@@ -811,12 +648,12 @@ tensorflow::Status ConstantFoldBinary(
"Binary op implicit broadcast not supported: " + node_def.op());
// TODO(jie): constant fold should really fall back to TF.
- int num_dims = weights_input_l.shape_.nbDims;
+ int nb_dims = weights_input_l.shape_.nbDims;
nvinfer1::Dims output_shape;
- output_shape.nbDims = num_dims;
- VLOG(2) << "nb_dims: " << num_dims
+ output_shape.nbDims = nb_dims;
+ VLOG(2) << "nb_dims: " << nb_dims
<< ", the other: " << weights_input_r.shape_.nbDims;
- for (int i = 0; i < num_dims; i++) {
+ for (int i = 0; i < nb_dims; i++) {
if (weights_input_l.shape_.d[i] == weights_input_r.shape_.d[i]) {
output_shape.d[i] = weights_input_l.shape_.d[i];
} else if (weights_input_l.shape_.d[i] == 1 ||
@@ -841,6 +678,7 @@ tensorflow::Status ConstantFoldBinary(
// Allocate output weights
TRT_ShapedWeights weights_output = ctx.get_temp_weights(dtype, output_shape);
+ // Maybe I should do a switch
LambdaFactory binary_op;
if (node_def.op() == "Sub") {
binary_op.op = LambdaFactory::OP_CATEGORY::SUB;
@@ -874,90 +712,48 @@ tensorflow::Status BinaryTensorOpWeight(
// Maybe this part has to be moved into the block of rsqrt later
// Check type consistency
+ auto dtype = TFAttrs(node_def).get<nvinfer1::DataType>("T");
+ CHECK_EQ_TYPE(tensor->getType(), dtype); // Cast to int for error messages
nvinfer1::DataType ttype;
TF_CHECK_OK(ConvertDType(weights.type_, &ttype));
+ CHECK_EQ_TYPE(ttype, dtype); // Cast to int for error message
// Check scale mode
auto dims_w = weights.shape_;
auto dims_t = tensor->getDimensions();
- // default to element-wise
+ // Default to channel-wise
auto scale_mode = nvinfer1::ScaleMode::kELEMENTWISE;
- // TODO(jie): maybe use a permuatation instead to support more cases;
- bool permutation_flag = false;
-
if (weights.count() == 1) {
VLOG(2) << "UNIFORM";
scale_mode = nvinfer1::ScaleMode::kUNIFORM;
} else {
- // no broadcasting on Batch dimension;
- VLOG(2) << "WEIGHTS DIM: " << dims_w.nbDims
- << " tensor DIM: " << dims_t.nbDims;
- if (dims_w.nbDims == dims_t.nbDims + 1) {
- if (dims_w.d[0] == 1) {
- for (int i = 1; i < dims_w.nbDims; i++) {
- dims_w.d[i - 1] = dims_w.d[i];
- }
- dims_w.nbDims--;
- } else {
- return tensorflow::errors::InvalidArgument(
- "Binary op cannot operate on batch, " + node_def.name());
- }
- }
+ // No broadcasting on Batch dimension;
+ assert(dims_w.d[0] == 1);
- if (dims_w.nbDims == dims_t.nbDims && dims_w.d[0] == dims_t.d[0]) {
- scale_mode = nvinfer1::ScaleMode::kELEMENTWISE;
- // default is element;
- for (int i = 1; i < dims_w.nbDims; i++) {
- if (dims_w.d[i] != dims_t.d[i]) {
- // if dimension does not match, switch back to channel;
- VLOG(2) << "channel";
- scale_mode = nvinfer1::ScaleMode::kCHANNEL;
- break;
- }
- }
- // if channel as candidate, validate it
- if (scale_mode == nvinfer1::ScaleMode::kCHANNEL) {
- for (int i = 1; i < dims_w.nbDims; i++) {
- if (dims_w.d[i] != 1)
- return tensorflow::errors::InvalidArgument(
- "Weight shape not compatible at, " + node_def.name());
- }
- } else {
- VLOG(2) << "elementwise";
+ // Broadcasting on Channel dimension only allowed in kUNIFORM
+ assert(dims_w.d[1] == dims_t.d[0]);
+ assert(dims_w.nbDims == dims_t.nbDims);
+
+ // Default is element;
+ for (int i = 2; i < dims_w.nbDims; i++) {
+ if (dims_w.d[i] != dims_t.d[i - 1]) {
+ scale_mode = nvinfer1::ScaleMode::kCHANNEL;
+ break;
}
- } else if (dims_w.nbDims == 1 &&
- dims_w.d[0] == dims_t.d[dims_t.nbDims - 1]) {
- // channel wise and broadcast required;
- permutation_flag = true;
- scale_mode = nvinfer1::ScaleMode::kCHANNEL;
- } else {
- return tensorflow::errors::InvalidArgument(
- "Weight shape not compatible at, " + node_def.name());
}
- }
-
- // transpose last dimension
- std::vector<int> permutation(dims_t.nbDims + 1);
- if (permutation_flag) {
- if (scale_mode == nvinfer1::ScaleMode::kCHANNEL && dims_t.nbDims > 1) {
- // we swap the last dimension into channel for trt.
- // because of tensorflow default broadcasting rules.
- for (int i = 0; i < static_cast<int>(permutation.size()); i++) {
- permutation[i] = i;
+ if (scale_mode == nvinfer1::ScaleMode::kELEMENTWISE) {
+ scale_mode = nvinfer1::ScaleMode::kELEMENTWISE;
+ for (int i = 2; i < dims_w.nbDims; i++) {
+ if (dims_w.d[i] != 1)
+ return tensorflow::errors::InvalidArgument(
+ "Weight shape not compatible at, " + node_def.name());
}
- permutation[1] = dims_t.nbDims;
- permutation[dims_t.nbDims] = 1;
- tensor = ctx.TransposeTensor(const_cast<nvinfer1::ITensor*>(tensor),
- permutation);
- } else {
- return tensorflow::errors::InvalidArgument(
- "Transpose cannot be applied, " + node_def.name());
}
}
- // prepare weights
+ // Prepare weights
TRT_ShapedWeights shift_weights(weights.type_);
TRT_ShapedWeights scale_weights(weights.type_);
TRT_ShapedWeights power_weights(weights.type_);
@@ -983,26 +779,88 @@ tensorflow::Status BinaryTensorOpWeight(
scale_weights, power_weights);
nvinfer1::ITensor* output_tensor = layer->getOutput(0);
- // transpose back dimension
- if (permutation_flag) {
- output_tensor = ctx.TransposeTensor(output_tensor, permutation);
- }
// Pass the output
outputs->push_back(TRT_TensorOrWeights(output_tensor));
return tensorflow::Status::OK();
}
-enum class ConvolutionType { DEFAULT, DEPTHWISE_CONV };
+tensorflow::Status BinaryTensorOpTensor(
+ Converter& ctx, const tensorflow::NodeDef& node_def,
+ const nvinfer1::ITensor* tensor_l, const nvinfer1::ITensor* tensor_r,
+ std::vector<TRT_TensorOrWeights>* outputs) {
+ static const std::unordered_map<string, nvinfer1::ElementWiseOperation> ops{
+ {"Add", nvinfer1::ElementWiseOperation::kSUM},
+ {"Mul", nvinfer1::ElementWiseOperation::kPROD},
+ // {"max", nvinfer1::ElementWiseOperation::kMAX},
+ // {"min", nvinfer1::ElementWiseOperation::kMIN},
+ {"Sub", nvinfer1::ElementWiseOperation::kSUB},
+ {"Div", nvinfer1::ElementWiseOperation::kDIV},
+ };
+
+ // FIXME assume type matches input weights
+ // Get trt type & shape
+ TFAttrs attrs(node_def);
+ // Maybe this part has to be moved into the block of rsqrt later
+ nvinfer1::DataType dtype = attrs.get<nvinfer1::DataType>("T");
+
+ // Check type consistency
+ CHECK_EQ_TYPE(tensor_l->getType(), dtype);
+ CHECK_EQ_TYPE(tensor_r->getType(), dtype);
+ auto op_pair = ops.find(node_def.op());
+ if (op_pair == ops.end())
+ return tensorflow::errors::Unimplemented("binary op: " + node_def.op() +
+ " not supported at: " +
+ node_def.name());
-tensorflow::Status ConvertConv2DHelper(
+ nvinfer1::IElementWiseLayer* layer = ctx.network()->addElementWise(
+ *const_cast<nvinfer1::ITensor*>(tensor_l),
+ *const_cast<nvinfer1::ITensor*>(tensor_r), op_pair->second);
+
+ nvinfer1::ITensor* output_tensor = layer->getOutput(0);
+
+ // Pass the output
+ outputs->push_back(TRT_TensorOrWeights(output_tensor));
+ return tensorflow::Status::OK();
+}
+
+tensorflow::Status ConvertPlaceholder(
Converter& ctx, const tensorflow::NodeDef& node_def,
- const std::vector<TRT_TensorOrWeights>& inputs,
- std::vector<TRT_TensorOrWeights>* outputs,
- int group // group ==0 specifies depthwise conv
-) {
- const nvinfer1::ITensor* tensor = inputs.at(0).tensor();
+ std::vector<TRT_TensorOrWeights> const& inputs,
+ std::vector<TRT_TensorOrWeights>* outputs) {
+ VLOG(2) << "Placeholder should have been replace already";
+ return tensorflow::errors::Unimplemented(", cannot convert Placeholder op");
+ // OK this make sense since we are supposed to replace it with input
+ TFAttrs attrs(node_def);
+ nvinfer1::DataType dtype = attrs.get<nvinfer1::DataType>("dtype");
+ nvinfer1::Dims dims = attrs.get<nvinfer1::Dims>("shape");
+
+ dims.nbDims--;
+ for (int i = 0; i < dims.nbDims; i++) dims.d[i] = dims.d[i + 1];
+
+ nvinfer1::ITensor* output =
+ ctx.network()->addInput(node_def.name().c_str(), dtype, dims);
+ if (!output) {
+ return tensorflow::errors::InvalidArgument("Failed to create Input layer");
+ }
+ outputs->push_back(TRT_TensorOrWeights(output));
+ return tensorflow::Status::OK();
+}
+tensorflow::Status ConvertConv2D(Converter& ctx,
+ const tensorflow::NodeDef& node_def,
+ const std::vector<TRT_TensorOrWeights>& inputs,
+ std::vector<TRT_TensorOrWeights>* outputs) {
+ nvinfer1::ITensor const* tensor = inputs.at(0).tensor();
+ // TODO(jie): handle NHWC/NCHW transpose;
+ TRT_ShapedWeights weights_rsck = inputs.at(1).weights();
+ TRT_ShapedWeights weights = ctx.get_temp_weights_like(weights_rsck);
+ ReorderRSCKToKCRS(weights_rsck, &weights);
+ TRT_ShapedWeights biases(weights.type_);
+ int noutput = weights.shape_.d[0];
+ nvinfer1::DimsHW kernel_size;
+ kernel_size.h() = weights.shape_.d[2];
+ kernel_size.w() = weights.shape_.d[3];
TFAttrs attrs(node_def);
int h_index = 2;
@@ -1016,31 +874,11 @@ tensorflow::Status ConvertConv2DHelper(
// TODO(jie): transpose it
}
- // tensor after transpose (NCHW)
- auto tensor_dim = tensor->getDimensions();
-
- int num_groups = group;
- if (num_groups == 0) // depthwise convolution
- num_groups = tensor_dim.d[0];
- VLOG(2) << "groups count: " << num_groups;
-
- TRT_ShapedWeights weights_rsck = inputs.at(1).weights();
- TRT_ShapedWeights weights = ctx.get_temp_weights_like(weights_rsck);
- ReorderRSCKToKCRS(weights_rsck, &weights, num_groups);
- TRT_ShapedWeights biases(weights.type_);
- int noutput = weights.shape_.d[0] * num_groups;
- nvinfer1::DimsHW kernel_size;
- kernel_size.h() = weights.shape_.d[2];
- kernel_size.w() = weights.shape_.d[3];
- VLOG(2) << "kernel size: " << kernel_size.h() << ", " << kernel_size.w();
-
// TODO(jie): stride. (NHWC/NCHW)
auto tf_stride = attrs.get<std::vector<int>>("strides");
- VLOG(2) << "h_INDEX" << h_index << ", w_index " << w_index;
- VLOG(2) << "stride!!!: " << tf_stride[0] << tf_stride[1] << tf_stride[2]
- << tf_stride[3];
nvinfer1::DimsHW stride(tf_stride[h_index], tf_stride[w_index]);
+ auto tensor_dim = tensor->getDimensions();
std::vector<std::pair<int, int>> padding;
// TODO(jie): padding.
if (attrs.get<string>("padding") == "SAME") {
@@ -1081,11 +919,10 @@ tensorflow::Status ConvertConv2DHelper(
layer->setStride(stride);
layer->setPadding({padding[0].first, padding[1].first});
layer->setName(node_def.name().c_str());
- layer->setNbGroups(num_groups);
nvinfer1::ITensor* output_tensor = layer->getOutput(0);
auto dim_after = output_tensor->getDimensions();
- VLOG(2) << "TENSOR out: " << dim_after.d[0] << ", " << dim_after.d[1] << ", "
+ VLOG(2) << "TENSOR out: " << dim_after.d[0] << ", " << dim_after.d[1]
<< dim_after.d[2] << ", " << dim_after.d[3];
if (data_format == "NHWC") {
@@ -1098,101 +935,11 @@ tensorflow::Status ConvertConv2DHelper(
return tensorflow::Status::OK();
}
-tensorflow::Status ConvertConv2DHelper(
- Converter& ctx, const tensorflow::NodeDef& node_def,
- const std::vector<TRT_TensorOrWeights>& inputs,
- std::vector<TRT_TensorOrWeights>* outputs, ConvolutionType type) {
- switch (type) {
- case ConvolutionType::DEFAULT:
- return ConvertConv2DHelper(ctx, node_def, inputs, outputs, 1);
- case ConvolutionType::DEPTHWISE_CONV:
- return ConvertConv2DHelper(ctx, node_def, inputs, outputs, 0);
- }
- return tensorflow::errors::Unimplemented("unsupported convolution type at, " +
- node_def.name());
-}
-
-tensorflow::Status BinaryTensorOpTensor(
- Converter& ctx, const tensorflow::NodeDef& node_def,
- const nvinfer1::ITensor* tensor_l, const nvinfer1::ITensor* tensor_r,
- std::vector<TRT_TensorOrWeights>* outputs) {
- static const std::unordered_map<string, nvinfer1::ElementWiseOperation> ops{
- {"Add", nvinfer1::ElementWiseOperation::kSUM},
- {"Mul", nvinfer1::ElementWiseOperation::kPROD},
- {"Sub", nvinfer1::ElementWiseOperation::kSUB},
- {"Div", nvinfer1::ElementWiseOperation::kDIV},
- };
-
- // FIXME assume type matches input weights
- // get trt type & shape
- TFAttrs attrs(node_def);
- // maybe this part has to be moved into the block of rsqrt later
- nvinfer1::DataType dtype = attrs.get<nvinfer1::DataType>("T");
-
- // check type consistency
- CHECK_EQ_TYPE(tensor_l->getType(), dtype);
- CHECK_EQ_TYPE(tensor_r->getType(), dtype);
- auto op_pair = ops.find(node_def.op());
- if (op_pair == ops.end())
- return tensorflow::errors::Unimplemented("binary op: " + node_def.op() +
- " not supported at: " +
- node_def.name());
-
- nvinfer1::IElementWiseLayer* layer = ctx.network()->addElementWise(
- *const_cast<nvinfer1::ITensor*>(tensor_l),
- *const_cast<nvinfer1::ITensor*>(tensor_r), op_pair->second);
-
- nvinfer1::ITensor* output_tensor = layer->getOutput(0);
-
- // pass the output
- outputs->push_back(TRT_TensorOrWeights(output_tensor));
- return tensorflow::Status::OK();
-}
-
-tensorflow::Status ConvertPlaceholder(
- Converter& ctx, const tensorflow::NodeDef& node_def,
- const std::vector<TRT_TensorOrWeights>& inputs,
- std::vector<TRT_TensorOrWeights>* outputs) {
- VLOG(2) << "Placeholder should have been replace already";
- return tensorflow::errors::Unimplemented("cannot convert Placeholder op");
- // OK this make sense since we are supposed to replace it with input
- TFAttrs attrs(node_def);
- nvinfer1::DataType dtype = attrs.get<nvinfer1::DataType>("dtype");
- nvinfer1::Dims dims = attrs.get<nvinfer1::Dims>("shape");
-
- dims.nbDims--;
- for (int i = 0; i < dims.nbDims; i++) dims.d[i] = dims.d[i + 1];
-
- nvinfer1::ITensor* output =
- ctx.network()->addInput(node_def.name().c_str(), dtype, dims);
- if (!output) {
- return tensorflow::errors::InvalidArgument("Failed to create Input layer");
- }
- outputs->push_back(TRT_TensorOrWeights(output));
- return tensorflow::Status::OK();
-}
-
-tensorflow::Status ConvertConv2D(Converter& ctx,
- const tensorflow::NodeDef& node_def,
- const std::vector<TRT_TensorOrWeights>& inputs,
- std::vector<TRT_TensorOrWeights>* outputs) {
- return ConvertConv2DHelper(ctx, node_def, inputs, outputs,
- ConvolutionType::DEFAULT);
-}
-
-tensorflow::Status ConvertConv2DDepthwise(
- Converter& ctx, const tensorflow::NodeDef& node_def,
- const std::vector<TRT_TensorOrWeights>& inputs,
- std::vector<TRT_TensorOrWeights>* outputs) {
- return ConvertConv2DHelper(ctx, node_def, inputs, outputs,
- ConvolutionType::DEPTHWISE_CONV);
-}
-
tensorflow::Status ConvertPool(Converter& ctx,
const tensorflow::NodeDef& node_def,
- const std::vector<TRT_TensorOrWeights>& inputs,
+ std::vector<TRT_TensorOrWeights> const& inputs,
std::vector<TRT_TensorOrWeights>* outputs) {
- const nvinfer1::ITensor* tensor = inputs.at(0).tensor();
+ nvinfer1::ITensor const* tensor = inputs.at(0).tensor();
TFAttrs attrs(node_def);
int h_index = 2;
@@ -1210,8 +957,6 @@ tensorflow::Status ConvertPool(Converter& ctx,
// TODO(jie): support other pooling type
if (node_def.op() == "MaxPool")
type = nvinfer1::PoolingType::kMAX;
- else if (node_def.op() == "AvgPool")
- type = nvinfer1::PoolingType::kAVERAGE;
else
return tensorflow::errors::Unimplemented("Only supports Max pool");
@@ -1274,9 +1019,9 @@ tensorflow::Status ConvertPool(Converter& ctx,
tensorflow::Status ConvertActivation(
Converter& ctx, const tensorflow::NodeDef& node_def,
- const std::vector<TRT_TensorOrWeights>& inputs,
+ std::vector<TRT_TensorOrWeights> const& inputs,
std::vector<TRT_TensorOrWeights>* outputs) {
- const nvinfer1::ITensor* tensor = inputs.at(0).tensor();
+ nvinfer1::ITensor const* tensor = inputs.at(0).tensor();
nvinfer1::IActivationLayer* layer = ctx.network()->addActivation(
*const_cast<nvinfer1::ITensor*>(tensor), nvinfer1::ActivationType::kRELU);
nvinfer1::ITensor* output_tensor = layer->getOutput(0);
@@ -1286,14 +1031,14 @@ tensorflow::Status ConvertActivation(
tensorflow::Status ConvertScale(Converter& ctx,
const tensorflow::NodeDef& node_def,
- const std::vector<TRT_TensorOrWeights>& inputs,
+ std::vector<TRT_TensorOrWeights> const& inputs,
std::vector<TRT_TensorOrWeights>* outputs) {
if (inputs.size() != 2 || !inputs.at(0).is_tensor() ||
!inputs.at(1).is_weights())
return tensorflow::errors::Unimplemented(
"Only supports tensor op weight for now, at " + node_def.name());
// Implement tensor binaryOp weight [channel wise] for now;
- const nvinfer1::ITensor* tensor = inputs.at(0).tensor();
+ nvinfer1::ITensor const* tensor = inputs.at(0).tensor();
// TODO(jie): handle NHWC/NCHW transpose;
TRT_ShapedWeights weights = inputs.at(1).weights();
@@ -1310,26 +1055,9 @@ tensorflow::Status ConvertScale(Converter& ctx,
} else {
VLOG(2) << "NCHW !!!!";
}
-
- auto dims = tensor->getDimensions();
- VLOG(2) << "tensor dimensions: " << dims.nbDims;
- for (int i = 0; i < dims.nbDims; i++) {
- VLOG(2) << "i: " << dims.d[i];
- }
- dims = weights.shape_;
- VLOG(2) << "tensor dimensions: " << dims.nbDims;
- for (int i = 0; i < dims.nbDims; i++) {
- VLOG(2) << "i: " << dims.d[i];
- }
-
- nvinfer1::ScaleMode mode = nvinfer1::ScaleMode::kCHANNEL;
- if (weights.shape_.d[0] == 1) {
- mode = nvinfer1::ScaleMode::kUNIFORM;
- }
-
- nvinfer1::IScaleLayer* layer =
- ctx.network()->addScale(*const_cast<nvinfer1::ITensor*>(tensor), mode,
- weights, empty_weights, empty_weights);
+ nvinfer1::IScaleLayer* layer = ctx.network()->addScale(
+ *const_cast<nvinfer1::ITensor*>(tensor), nvinfer1::ScaleMode::kCHANNEL,
+ weights, empty_weights, empty_weights);
nvinfer1::ITensor* output_tensor = layer->getOutput(0);
if (data_format == "NHWC") {
@@ -1344,7 +1072,7 @@ tensorflow::Status ConvertScale(Converter& ctx,
tensorflow::Status ConvertConst(Converter& ctx,
const tensorflow::NodeDef& node_def,
- const std::vector<TRT_TensorOrWeights>& inputs,
+ std::vector<TRT_TensorOrWeights> const& inputs,
std::vector<TRT_TensorOrWeights>* outputs) {
const auto& weights_tensor = node_def.attr().at("value").tensor();
@@ -1363,144 +1091,20 @@ tensorflow::Status ConvertConst(Converter& ctx,
VLOG(2) << "SCALAR!!!" << node_def.name();
nvinfer1::Dims scalar_shape;
if (tensor.dims() > 0) {
- VLOG(2) << "dimensions: " << tensor.dims();
- VLOG(2) << "size: " << weights_tensor.float_val_size();
- scalar_shape = GetTensorShape(tensor);
- for (int i = 0; i < scalar_shape.nbDims; i++)
- VLOG(2) << scalar_shape.d[i];
- if (GetShapeSize(scalar_shape) != weights_tensor.float_val_size()) {
- if (weights_tensor.float_val_size() == 1 ||
- scalar_shape.d[0] == weights_tensor.float_val_size()) {
- scalar_shape.nbDims = 1;
- // no dimension provided. flatten it
- scalar_shape.d[0] = weights_tensor.float_val_size();
- scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL;
- } else {
- LOG(FATAL) << "Broadcast on weights only supports kCHANNEL and"
- << " kUNIFORM, at: " << node_def.name();
- }
- }
- } else {
VLOG(2) << "Dimensions: " << tensor.dims();
- scalar_shape.nbDims = 1;
- // no dimension provided. flatten it
- scalar_shape.d[0] = weights_tensor.float_val_size();
- scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL;
- for (int i = 1; i < nvinfer1::Dims::MAX_DIMS; i++) {
- scalar_shape.d[i] = 0;
- scalar_shape.type[i] = nvinfer1::DimensionType::kSPATIAL;
- }
- }
- if (ctx.isFP16()) {
- auto dtype_new = tensorflow::DataType::DT_HALF;
- size_t len_data = tensorflow::DataTypeSize(dtype_new);
- for (int i = 0; i < scalar_shape.nbDims; i++)
- len_data *= scalar_shape.d[i];
- ctx.weight_store()->store_.push_back(std::vector<uint8_t>(len_data));
- void* dst = static_cast<void*>(&(ctx.weight_store()->store_.back()[0]));
- tensorflow::Tensor temp_tensor(tensorflow::DT_HALF, tensor.shape());
- auto half_tensor = temp_tensor.flat<Eigen::half>();
- Eigen::DefaultDevice defd;
- half_tensor.device(defd) =
- tensor.flat<float>().template cast<Eigen::half>();
- memcpy(dst, half_tensor.data(), len_data); // store into weight store
- weights = TRT_ShapedWeights(dtype_new, dst, scalar_shape);
+ weights = TRT_ShapedWeights(dtype, weights_tensor.float_val().data(),
+ GetTensorShape(tensor));
} else {
- size_t len_data = tensorflow::DataTypeSize(dtype);
- for (int i = 0; i < scalar_shape.nbDims; i++)
- len_data *= scalar_shape.d[i];
- ctx.weight_store()->store_.push_back(std::vector<uint8_t>(len_data));
- void* dst = static_cast<void*>(&(ctx.weight_store()->store_.back()[0]));
- std::vector<float> tensor_data(
- weights_tensor.float_val().begin(),
- weights_tensor.float_val()
- .end()); // make a local copy first to flatten
- memcpy(dst, tensor_data.data(), len_data); // store into weight store
- weights = TRT_ShapedWeights(dtype, dst, scalar_shape);
- }
- } else if (!weights_tensor.int_val().empty()) {
- VLOG(2) << "int!!!" << node_def.name();
- nvinfer1::Dims scalar_shape;
- if (tensor.dims() > 0) {
- VLOG(2) << "dimensions: " << tensor.dims();
- scalar_shape = GetTensorShape(tensor);
- if (GetShapeSize(scalar_shape) != weights_tensor.int_val_size()) {
- if (weights_tensor.int_val_size() == 1 ||
- scalar_shape.d[0] == weights_tensor.int_val_size()) {
- scalar_shape.nbDims = 1;
- // no dimension provided. flatten it
- scalar_shape.d[0] = weights_tensor.int_val_size();
- scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL;
- } else {
- LOG(FATAL) << "Broadcast on weights only supports kCHANNEL and"
- << " kUNIFORM, at: " << node_def.name();
- }
- }
- } else {
- VLOG(2) << "dimensions: " << tensor.dims();
+ VLOG(2) << "Dimensions: " << tensor.dims();
scalar_shape.nbDims = 1;
- // no dimension provided. flatten it
- scalar_shape.d[0] = weights_tensor.int_val_size();
+ scalar_shape.d[0] = 1;
scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL;
for (int i = 1; i < nvinfer1::Dims::MAX_DIMS; i++) {
scalar_shape.d[i] = 0;
scalar_shape.type[i] = nvinfer1::DimensionType::kSPATIAL;
}
- }
- if (ctx.isFP16()) {
- auto dtype_new = tensorflow::DataType::DT_HALF;
- size_t len_data = tensorflow::DataTypeSize(dtype_new);
- for (int i = 0; i < scalar_shape.nbDims; i++)
- len_data *= scalar_shape.d[i];
- ctx.weight_store()->store_.push_back(std::vector<uint8_t>(len_data));
- void* dst = static_cast<void*>(&(ctx.weight_store()->store_.back()[0]));
- tensorflow::Tensor temp_tensor(tensorflow::DT_HALF, tensor.shape());
- TTypes<Eigen::half>::Flat half_tensor = temp_tensor.flat<Eigen::half>();
- Eigen::DefaultDevice defd;
- switch (dtype) {
- case (tensorflow::DT_INT32): {
- half_tensor.device(defd) =
- tensor.flat<int32>().template cast<Eigen::half>();
- break;
- }
- case (tensorflow::DT_INT16): {
- half_tensor.device(defd) =
- tensor.flat<int16>().template cast<Eigen::half>();
- break;
- }
- case (tensorflow::DT_INT8): {
- half_tensor.device(defd) =
- tensor.flat<int8>().template cast<Eigen::half>();
- break;
- }
- case (tensorflow::DT_UINT8): {
- half_tensor.device(defd) =
- tensor.flat<uint8>().template cast<Eigen::half>();
- break;
- }
- default:
- return tensorflow::errors::InvalidArgument(
- "Datatype " + tensorflow::DataTypeString(dtype) +
- " for FP16 conversion");
- break;
- };
- memcpy(dst, half_tensor.data(), len_data); // store into weight store
- weights = TRT_ShapedWeights(dtype_new, dst, scalar_shape);
- } else {
- size_t len_data = tensorflow::DataTypeSize(dtype);
- for (int i = 0; i < scalar_shape.nbDims; i++)
- len_data *= scalar_shape.d[i];
- size_t len_tensor = weights_tensor.int_val_size() * sizeof(int32);
- len_data = std::max(len_data, len_tensor);
- ctx.weight_store()->store_.push_back(std::vector<uint8_t>(len_data));
- void* dst = static_cast<void*>(&(ctx.weight_store()->store_.back()[0]));
- std::vector<int32> tensor_data(
- weights_tensor.int_val().begin(),
- weights_tensor.int_val()
- .end()); // make a local copy first to flatten
- // doesn't have to be contigous
- memcpy(dst, tensor_data.data(), len_tensor); // store into weight store
- weights = TRT_ShapedWeights(dtype, dst, scalar_shape);
+ weights = TRT_ShapedWeights(dtype, weights_tensor.float_val().data(),
+ scalar_shape);
}
} else if (!weights_tensor.tensor_content().empty()) {
VLOG(2) << "TENSOR!!!" << node_def.name();
@@ -1526,7 +1130,7 @@ tensorflow::Status ConvertConst(Converter& ctx,
tensorflow::Status ConvertIdentity(
Converter& ctx, const tensorflow::NodeDef& node_def,
- const std::vector<TRT_TensorOrWeights>& inputs,
+ std::vector<TRT_TensorOrWeights> const& inputs,
std::vector<TRT_TensorOrWeights>* outputs) {
outputs->push_back(inputs.at(0));
return tensorflow::Status::OK();
@@ -1534,7 +1138,7 @@ tensorflow::Status ConvertIdentity(
tensorflow::Status ConvertBinary(Converter& ctx,
const tensorflow::NodeDef& node_def,
- const std::vector<TRT_TensorOrWeights>& inputs,
+ std::vector<TRT_TensorOrWeights> const& inputs,
std::vector<TRT_TensorOrWeights>* outputs) {
if (inputs.size() != 2)
return tensorflow::errors::FailedPrecondition(
@@ -1561,7 +1165,7 @@ tensorflow::Status ConvertBinary(Converter& ctx,
tensorflow::Status ConvertUnary(Converter& ctx,
const tensorflow::NodeDef& node_def,
- const std::vector<TRT_TensorOrWeights>& inputs,
+ std::vector<TRT_TensorOrWeights> const& inputs,
std::vector<TRT_TensorOrWeights>* outputs) {
if (inputs.size() != 1)
return tensorflow::errors::FailedPrecondition(
@@ -1579,7 +1183,7 @@ tensorflow::Status ConvertUnary(Converter& ctx,
tensorflow::Status ConvertReduce(Converter& ctx,
const tensorflow::NodeDef& node_def,
- const std::vector<TRT_TensorOrWeights>& inputs,
+ std::vector<TRT_TensorOrWeights> const& inputs,
std::vector<TRT_TensorOrWeights>* outputs) {
if (inputs.size() != 2 || !inputs.at(0).is_tensor() ||
!inputs.at(1).is_weights())
@@ -1587,7 +1191,7 @@ tensorflow::Status ConvertReduce(Converter& ctx,
"Input expects tensor and weights, at" + node_def.name());
// Implement tensor binaryOp weight [channel wise] for now;
- const nvinfer1::ITensor* tensor = inputs.at(0).tensor();
+ nvinfer1::ITensor const* tensor = inputs.at(0).tensor();
auto dims = tensor->getDimensions();
// Restore implicit batch dimension
int nb_dims = dims.nbDims + 1;
@@ -1625,7 +1229,6 @@ tensorflow::Status ConvertReduce(Converter& ctx,
return tensorflow::errors::InvalidArgument("TRT cannot reduce at 0, at" +
node_def.name());
if (index_list_data[i] == 1) permuted_index = 1;
-
idx_set.emplace(index_list_data[i]);
}
@@ -1633,7 +1236,7 @@ tensorflow::Status ConvertReduce(Converter& ctx,
nvinfer1::DimsHW pool_kernel;
if (permuted_index == 1) {
for (int i = 2; i < nb_dims; i++) {
- if (idx_set.count(i) == 0) {
+ if (idx_set.count(i)) {
permuted_index = i;
break;
}
@@ -1668,13 +1271,12 @@ tensorflow::Status ConvertReduce(Converter& ctx,
output_tensor = ctx.TransposeTensor(
const_cast<nvinfer1::ITensor*>(output_tensor), permutation_order);
}
- outputs->push_back(TRT_TensorOrWeights(output_tensor));
return tensorflow::Status::OK();
}
tensorflow::Status ConvertPad(Converter& ctx,
const tensorflow::NodeDef& node_def,
- const std::vector<TRT_TensorOrWeights>& inputs,
+ std::vector<TRT_TensorOrWeights> const& inputs,
std::vector<TRT_TensorOrWeights>* outputs) {
if (inputs.size() != 2 || !inputs.at(0).is_tensor() ||
!inputs.at(1).is_weights())
@@ -1682,7 +1284,7 @@ tensorflow::Status ConvertPad(Converter& ctx,
"Input expects tensor and weights, at" + node_def.name());
// Implement tensor binaryOp weight [channel wise] for now;
- const nvinfer1::ITensor* tensor = inputs.at(0).tensor();
+ nvinfer1::ITensor const* tensor = inputs.at(0).tensor();
auto dims = tensor->getDimensions();
// Restore implicit batch dimension
int nb_dims = dims.nbDims + 1;
@@ -1769,287 +1371,19 @@ tensorflow::Status ConvertPad(Converter& ctx,
return tensorflow::Status::OK();
}
-tensorflow::Status ConvertConcat(Converter& ctx,
- const tensorflow::NodeDef& node_def,
- const std::vector<TRT_TensorOrWeights>& inputs,
- std::vector<TRT_TensorOrWeights>* outputs) {
- // not including the last input (axis) here
- int input_size = static_cast<int>(inputs.size()) - 1;
-
- if (!inputs.at(0).is_tensor())
- return tensorflow::errors::InvalidArgument(
- "Concat in TRT support only Tensor input, at " + node_def.name());
-
- // We are retrieving the axis
- TRT_ShapedWeights axis = inputs.at(input_size).weights();
-
- TFAttrs attrs(node_def);
- // auto attr_size = attrs.at("N")->i();
- // auto data_type = attrs.get<nvinfer1::DataType>("T");
- auto index_type = attrs.get<tensorflow::DataType>("Tidx");
-
- // TODO(jie): handle data type
- // Only expect to handle INT32 as index attributes for now
- if (index_type != tensorflow::DataType::DT_INT32)
- return tensorflow::errors::Unimplemented(
- "Tidx supports only DT_INT32, at " + node_def.name());
-
- int index = *(static_cast<int*>(const_cast<void*>(axis.GetValues())));
-
- // TODO(jie): early termination with no-op (attr_size==1)
-
- auto dim = inputs.at(0).tensor()->getDimensions();
- // dimension check
- if (index > dim.nbDims + 1)
- return tensorflow::errors::InvalidArgument(
- "Concatenate on axis out of dimension range, at " + node_def.name());
-
- if (index == 0)
- return tensorflow::errors::InvalidArgument(
- "Concatenate on batch dimension not supported, at " + node_def.name());
-
- // incase we need permutation;
- std::vector<int> permutation_order(dim.nbDims + 1);
-
- for (int i = 0; i < dim.nbDims + 1; i++) permutation_order[i] = i;
-
- if (index != 1) {
- permutation_order[1] = index - 1;
- permutation_order[index - 1] = 1;
- }
-
- std::vector<nvinfer1::ITensor const*> inputs_vec;
- // Shap chack (all input tensor should have same shape)
- // starting from 0 since we are probably also doing transpose here;
- for (int i = 0; i < input_size; i++) {
- auto tensor_i = inputs.at(i).tensor();
- auto dim_i = tensor_i->getDimensions();
- if (dim_i.nbDims != dim.nbDims)
- return tensorflow::errors::InvalidArgument(
- "Concatenate receives inputs with inconsistent dimensions, at " +
- node_def.name());
-
- for (int j = 0; j < dim.nbDims; j++) {
- // check dimension consistency on non-concatenate axis
- if (j != index - 1 && dim_i.d[j] != dim.d[j])
- return tensorflow::errors::InvalidArgument(
- "Concatenate receives inputs with inconsistent shape, at" +
- node_def.name());
- }
-
- // TRT does concatenation only on channel!
- if (index != 1)
- tensor_i = ctx.TransposeTensor(const_cast<nvinfer1::ITensor*>(tensor_i),
- permutation_order);
-
- inputs_vec.push_back(tensor_i);
- }
-
- // nvinfer1::ITensor const* tensor = inputs.at(0).tensor();
- nvinfer1::IConcatenationLayer* layer = ctx.network()->addConcatenation(
- const_cast<nvinfer1::ITensor* const*>(inputs_vec.data()),
- inputs_vec.size());
- nvinfer1::ITensor* output_tensor = layer->getOutput(0);
-
- if (index != 1) {
- output_tensor = ctx.TransposeTensor(output_tensor, permutation_order);
- }
- outputs->push_back(TRT_TensorOrWeights(output_tensor));
- return tensorflow::Status::OK();
-}
-
-tensorflow::Status ConvertFusedBatchNorm(
- Converter& ctx, const tensorflow::NodeDef& node_def,
- const std::vector<TRT_TensorOrWeights>& inputs,
- std::vector<TRT_TensorOrWeights>* outputs) {
- TFAttrs attrs(node_def);
- float epsilon = attrs.get<float>("epsilon");
- auto data_format = attrs.get<string>("data_format");
- if (data_format != "NCHW") {
- return tensorflow::errors::Unimplemented(
- "only data_format=NCHW is supported, at " + node_def.name());
- }
- bool is_training = attrs.get<bool>("is_training");
- if (is_training) {
- return tensorflow::errors::Unimplemented(
- "only is_training=false is supported, at " + node_def.name());
- }
- nvinfer1::ITensor const* tensor = inputs.at(0).tensor();
- TRT_ShapedWeights scale_weights = inputs.at(1).weights();
- TRT_ShapedWeights offset_weights = inputs.at(2).weights();
- TRT_ShapedWeights mean_weights = inputs.at(3).weights();
- TRT_ShapedWeights variance_weights = inputs.at(4).weights();
- TRT_ShapedWeights dummy_power_weights(scale_weights.type_);
- TRT_ShapedWeights combined_scale_weights =
- ctx.get_temp_weights_like(scale_weights);
- TRT_ShapedWeights combined_offset_weights =
- ctx.get_temp_weights_like(offset_weights);
- size_t nweight = scale_weights.count();
- if ((scale_weights.type_ == offset_weights.type_) &&
- (mean_weights.type_ == variance_weights.type_) &&
- (scale_weights.type_ == variance_weights.type_)) {
- if ((scale_weights.type_ != tensorflow::DataType::DT_FLOAT) &&
- (scale_weights.type_ != tensorflow::DataType::DT_HALF)) {
- return tensorflow::errors::Unimplemented(
- "only float32 or float16 weight data type is supported, for node " +
- node_def.name() + " got " +
- tensorflow::DataTypeString(scale_weights.type_));
- }
- if (scale_weights.type_ == tensorflow::DT_FLOAT) {
- for (size_t i = 0; i < nweight; ++i) {
- float scale = (static_cast<float const*>(scale_weights.GetValues()))[i];
- float offset =
- (static_cast<float const*>(offset_weights.GetValues()))[i];
- float mean = (static_cast<float const*>(mean_weights.GetValues()))[i];
- float variance =
- (static_cast<float const*>(variance_weights.GetValues()))[i];
- float& combined_scale_ref = const_cast<float*>(
- static_cast<float const*>(combined_scale_weights.GetValues()))[i];
- float& combined_offset_ref = const_cast<float*>(
- static_cast<float const*>(combined_offset_weights.GetValues()))[i];
- combined_scale_ref = scale / sqrtf(variance + epsilon);
- combined_offset_ref = offset - mean * combined_scale_ref;
- }
- } else {
- const Eigen::half* scale_vals =
- (static_cast<Eigen::half const*>(scale_weights.GetValues()));
- const Eigen::half* off_vals =
- (static_cast<Eigen::half const*>(offset_weights.GetValues()));
- const Eigen::half* mean_vals =
- (static_cast<Eigen::half const*>(mean_weights.GetValues()));
- const Eigen::half* variance_vals =
- (static_cast<Eigen::half const*>(variance_weights.GetValues()));
- Eigen::half* comb_scale_vals = const_cast<Eigen::half*>(
- static_cast<Eigen::half const*>(combined_scale_weights.GetValues()));
- Eigen::half* comb_off_vals = const_cast<Eigen::half*>(
- static_cast<Eigen::half const*>(combined_offset_weights.GetValues()));
- for (size_t i = 0; i < nweight; ++i) {
- float scale(scale_vals[i]);
- float offset(off_vals[i]);
- float mean(mean_vals[i]);
- float variance(variance_vals[i]);
- float combined_scale_ref = scale / sqrtf(variance + epsilon);
- comb_scale_vals[i] = Eigen::half(combined_scale_ref);
- float combined_offset_ref = offset - mean * combined_scale_ref;
- comb_off_vals[i] = Eigen::half(combined_offset_ref);
- }
- }
- }
- nvinfer1::IScaleLayer* layer = ctx.network()->addScale(
- *const_cast<nvinfer1::ITensor*>(tensor), nvinfer1::ScaleMode::kCHANNEL,
- combined_offset_weights.GetWeightsForTRT(),
- combined_scale_weights.GetWeightsForTRT(),
- dummy_power_weights.GetWeightsForTRT());
- nvinfer1::ITensor* output_tensor = layer->getOutput(0);
- outputs->push_back(TRT_TensorOrWeights(output_tensor));
- return tensorflow::Status::OK();
-}
-
-tensorflow::Status ConvertMatMul(Converter& ctx,
- const tensorflow::NodeDef& node_def,
- const std::vector<TRT_TensorOrWeights>& inputs,
- std::vector<TRT_TensorOrWeights>* outputs) {
- const nvinfer1::ITensor* tensor = inputs.at(0).tensor();
-
- // TODO(jie): transpose!
- TFAttrs attrs(node_def);
-
- TRT_ShapedWeights weights_ck = inputs.at(1).weights();
- TRT_ShapedWeights weights = ctx.get_temp_weights_like(weights_ck);
- ReorderCKtoKC(weights_ck, &weights);
- TRT_ShapedWeights biases(weights.type_);
-
- int noutput = weights.shape_.d[0];
-
- nvinfer1::IFullyConnectedLayer* layer = ctx.network()->addFullyConnected(
- *const_cast<nvinfer1::ITensor*>(tensor), noutput, weights, biases);
-
- nvinfer1::ITensor* output_tensor = layer->getOutput(0);
- outputs->push_back(TRT_TensorOrWeights(output_tensor));
- return tensorflow::Status::OK();
-}
-
-tensorflow::Status ConvertReshape(
- Converter& ctx, const tensorflow::NodeDef& node_def,
- const std::vector<TRT_TensorOrWeights>& inputs,
- std::vector<TRT_TensorOrWeights>* outputs) {
- if (inputs.size() != 2 || !inputs.at(0).is_tensor() ||
- !inputs.at(1).is_weights())
- return tensorflow::errors::InvalidArgument(
- "Input expects tensor and weights, at" + node_def.name());
-
- // implement tensor binaryOp weight [channel wise] for now;
- const nvinfer1::ITensor* tensor = inputs.at(0).tensor();
- auto dims = tensor->getDimensions();
- // restore implicit batch dimension
-
- TRT_ShapedWeights shape = inputs.at(1).weights();
-
- TFAttrs attrs(node_def);
-
- auto padding_type = attrs.get<tensorflow::DataType>("Tshape");
-
- if (shape.shape_.nbDims != 1)
- return tensorflow::errors::InvalidArgument(
- "reshape new shape is not 1 dimensional, at " + node_def.name());
-
- // Only expect to handle INT32 as attributes for now
- if (padding_type != tensorflow::DataType::DT_INT32)
- return tensorflow::errors::Unimplemented(
- "reshape new shape supports only DT_INT32, at " + node_def.name());
-
- auto shape_data = static_cast<int*>(const_cast<void*>(shape.GetValues()));
-
- if (shape_data[0] != -1)
- return tensorflow::errors::InvalidArgument(
- "reshape new shape first dimension is not -1, at " + node_def.name());
-
- auto shape_num_dims = shape.shape_.d[0];
- VLOG(2) << "shape dimensions: " << shape_num_dims;
- int volume_w = 1;
- for (int i = 1; i < shape.shape_.d[0]; i++) volume_w *= shape_data[i];
-
- int volume_t = 1;
- for (int i = 0; i < dims.nbDims; i++) volume_t *= dims.d[i];
-
- VLOG(2) << "volume: " << volume_t << " volume weights: " << volume_w;
- if (volume_w != volume_t)
- return tensorflow::errors::InvalidArgument(
- "volume does not agree between tensor and new shape, at " +
- node_def.name());
-
- nvinfer1::IShuffleLayer* layer =
- ctx.network()->addShuffle(*const_cast<nvinfer1::ITensor*>(tensor));
-
- nvinfer1::Dims reshape_dims;
- VLOG(2) << "new dimension: " << shape_num_dims - 1;
- reshape_dims.nbDims = shape_num_dims - 1;
- for (int32_t i = 0; i < reshape_dims.nbDims; ++i) {
- reshape_dims.d[i] = shape_data[i + 1];
- }
- layer->setReshapeDimensions(reshape_dims);
- VLOG(2) << "new dimension: " << shape_num_dims - 1;
-
- nvinfer1::ITensor* output_tensor = layer->getOutput(0);
- auto dims_output = output_tensor->getDimensions();
- VLOG(2) << "output tensor dimension:" << dims_output.nbDims;
- outputs->push_back(TRT_TensorOrWeights(output_tensor));
- return tensorflow::Status::OK();
-}
-
void Converter::register_op_converters() {
// vgg_16 slim implementation
op_registry_["Placeholder"] = ConvertPlaceholder;
op_registry_["Conv2D"] = ConvertConv2D;
- op_registry_["DepthwiseConv2dNative"] = ConvertConv2DDepthwise;
op_registry_["Relu"] = ConvertActivation;
op_registry_["MaxPool"] = ConvertPool;
- op_registry_["AvgPool"] = ConvertPool;
// This could be really handled as ConvertBinary
op_registry_["BiasAdd"] = ConvertScale;
op_registry_["Const"] = ConvertConst;
+ // op_registry_["MatMul"] = ConvertFullyConnected; // Not used in vgg
// TODO(ben,jie): this is a temp hack.
op_registry_["Identity"] = ConvertIdentity; // Identity should be removed
+ // op_registry_["AvgPool"] = ConvertPool;
// resnet_50_v1 slim implementation
op_registry_["Add"] = ConvertBinary;
@@ -2059,364 +1393,26 @@ void Converter::register_op_converters() {
op_registry_["Mean"] = ConvertReduce;
op_registry_["Pad"] = ConvertPad;
// TODO(ben,jie): Add more ops
-
- op_registry_["ConcatV2"] = ConvertConcat;
- op_registry_["MatMul"] = ConvertMatMul;
- op_registry_["Reshape"] = ConvertReshape;
- op_registry_["FusedBatchNorm"] = ConvertFusedBatchNorm;
- op_registry_["FusedBatchNormV2"] = ConvertFusedBatchNorm;
}
} // namespace
-tensorflow::Status GetTensorRTGraph(tensorrt::convert::SubGraphParams& s) {
- return tensorflow::errors::Unimplemented("Not implemented yet");
-}
-tensorflow::Status ConvertCalibrationNodeToEngineNode(
- tensorflow::Graph& graph, tensorflow::Node* c_node) {
- const auto ndef = c_node->def();
-
- TFAttrs attrs(ndef);
- std::vector<string> segment_nodes(
- attrs.get<std::vector<string>>("segment_nodes"));
- std::vector<string> output_nodes(
- attrs.get<std::vector<string>>("segment_output_names"));
- std::vector<string> input_names(
- attrs.get<std::vector<string>>("input_names"));
- string res_name = attrs.get<string>("resource_name");
- VLOG(1) << "Node name " << c_node->name() << " res_name " << res_name;
- string engine_name = "my_trt_op";
- {
- const auto node_id = tensorflow::str_util::Split(res_name, "_");
- engine_name += node_id.back();
- }
- std::map<string, tensorflow::Node*> node_maps;
-
- for (auto n : graph.op_nodes()) {
- node_maps.insert({n->name(), n});
- }
- VLOG(1) << "Output Nodes:";
- std::vector<tensorflow::DataType> out_types;
- std::vector<const tensorflow::Edge*> out_edges;
- for (auto& i : output_nodes) {
- auto node_port = tensorflow::str_util::Split(i, ":");
- VLOG(1) << " " << i << " in graph " << node_maps.count(i);
- auto out_node_name = node_port.at(0);
- if (node_port.size() > 1) {
- VLOG(1) << "Multi port output" << node_port.at(0) << " "
- << node_port.at(1) << " size=" << node_port.size();
- }
- auto node_it = node_maps.find(out_node_name);
- if (node_it != node_maps.end()) {
- tensorflow::Node* out_node = node_it->second;
- int port = 0;
- if (node_port.size() == 2) {
- port = std::strtoul(node_port.at(1).c_str(), nullptr, 10);
- out_types.push_back(out_node->output_type(port));
- } else {
- out_types.push_back(out_node->output_type(0));
- }
- for (auto out_edge : out_node->out_edges()) {
- if (out_edge->src_output() == port) {
- out_edges.push_back(out_edge);
- break;
- }
- }
- } else {
- LOG(WARNING) << " couldn't find output node " << out_node_name;
- }
- }
- VLOG(1) << "Input Nodes:";
- for (auto& i : input_names) {
- VLOG(1) << " " << i << " in graph " << node_maps.count(i);
- }
- auto trt_rm = tensorflow::tensorrt::TRTResourceManager::instance();
- auto resmgr = trt_rm->getManager("TRTCalibOps");
- tensorflow::tensorrt::TRTCalibrationResource* calib_res = nullptr;
- auto status = resmgr->Lookup(res_name, res_name, &calib_res);
- if (!status.ok() || !calib_res->calibrator_) {
- return tensorflow::errors::FailedPrecondition(
- "You must run calibration"
- " and inference conversion in the same proces");
- }
-
- calib_res->calibrator_->setDone();
- calib_res->thr_->join();
- delete calib_res->thr_;
- if (!calib_res->engine_) {
- LOG(FATAL) << "Calibration failed!, engine is nullptr. Did you run "
- "calibration graph?";
- }
- auto weight_rmgr = trt_rm->getManager("WeightStore");
- TF_CHECK_OK(weight_rmgr->Delete<tensorflow::tensorrt::TRTWeightStore>(
- res_name, res_name));
- auto engine_plan = calib_res->engine_->serialize();
- calib_res->engine_->destroy();
- calib_res->network_->destroy();
- calib_res->builder_->destroy();
- calib_res->thr_ = nullptr;
- calib_res->engine_ = nullptr;
- calib_res->builder_ = nullptr;
- tensorflow::NodeDefBuilder op_builder(engine_name, "TRTEngineOp");
- std::vector<tensorflow::NodeDefBuilder::NodeOut> income_edges;
- for (const auto in_edge : c_node->in_edges()) {
- auto src = in_edge->src();
- int dest_port = in_edge->dst_input();
- income_edges.emplace_back(src->name(), in_edge->src_output(),
- c_node->input_type(dest_port));
- }
- tensorflow::gtl::ArraySlice<tensorflow::NodeDefBuilder::NodeOut> input_list(
- income_edges);
- op_builder.Input(input_list);
- tensorflow::NodeDef engine_node;
- const char* engine_plan_data = static_cast<const char*>(engine_plan->data());
- string engine_plan_string(engine_plan_data,
- engine_plan_data + engine_plan->size());
- status = op_builder.Attr("serialized_engine", engine_plan_string)
- .Attr("input_nodes", input_names)
- .Attr("output_nodes", output_nodes)
- .Attr("OutT", out_types)
- .Finalize(&engine_node);
- if (!status.ok()) {
- LOG(ERROR) << "Engine Node creation failed";
- return status;
- }
- auto trt_engine_node = graph.AddNode(engine_node, &status);
- TF_CHECK_OK(status);
- for (size_t i = 0; i < out_edges.size(); i++) {
- VLOG(1) << "Connecting trt_engine_node output " << i << " with "
- << out_edges.at(i)->dst()->name() << " port "
- << out_edges.at(i)->dst_input();
- TF_RETURN_IF_ERROR(graph.UpdateEdge(trt_engine_node, i,
- out_edges.at(i)->dst(),
- out_edges.at(i)->dst_input()));
- }
- VLOG(1) << "Segment nodes:";
- for (auto& i : segment_nodes) {
- VLOG(1) << " " << i << " in graph " << node_maps.count(i);
- auto it = node_maps.find(i);
- if (it != node_maps.end()) {
- graph.RemoveNode(it->second);
- }
- }
- graph.RemoveNode(c_node);
- return tensorflow::Status::OK();
-}
-
-tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) {
- // Visit nodes in reverse topological order and construct the TRT network.
-
- // Toposort
- std::vector<tensorflow::Node*> order_vec;
- tensorflow::GetPostOrder(s.graph, &order_vec);
- // Select just the subgraph
- std::list<tensorflow::Node*> order;
- for (tensorflow::Node* node : order_vec) {
- if (s.subgraph_node_ids.count(node->id())) {
- order.push_front(node); // we want topological order to contstruct the
- // network layer by layer
- }
- }
- // topological order is needed to build TRT network
- static int static_id = 0;
- string subgraph_name_scope;
- if (!order.empty()) {
- subgraph_name_scope = order.front()->name();
- }
- for (const tensorflow::Node* node : order) {
- subgraph_name_scope = GetCommonNameScope(subgraph_name_scope, node->name());
- }
- // TODO(sami,ben,jie): proper naming!
- string calib_op_name =
- StrCat(subgraph_name_scope, "my_trt_calib_op_", static_id);
- string engine_name = StrCat(subgraph_name_scope, "my_trt_op", static_id);
- static_id++;
- auto trt_rmgr = tensorflow::tensorrt::TRTResourceManager::instance();
- auto op_rmgr = trt_rmgr->getManager("TRTCalibOps");
- auto op_res = new tensorflow::tensorrt::TRTCalibrationResource();
- TF_CHECK_OK(op_rmgr->Create(calib_op_name, calib_op_name, op_res));
- op_res->logger_ = new tensorflow::tensorrt::Logger();
- op_res->builder_ = nvinfer1::createInferBuilder(*(op_res->logger_));
-
- if (!op_res->builder_) {
- return tensorflow::errors::Internal(
- "failed to create TensorRT builder object");
- }
-
- op_res->network_ = op_res->builder_->createNetwork();
- if (!op_res->network_) {
- return tensorflow::errors::Internal(
- "failed to create TensorRT network object");
- }
-
- // Build the network
- auto weight_rmgr = trt_rmgr->getManager("WeightStore");
- auto ws = new tensorflow::tensorrt::TRTWeightStore();
- TF_CHECK_OK(weight_rmgr->Create(calib_op_name, calib_op_name, ws));
- Converter converter(op_res->network_, ws, s.precision_mode == FP16MODE);
- std::vector<string> input_names;
- std::vector<tensorflow::DataType> input_dtypes;
- for (const std::pair<int, int>& input : s.input_inds) {
- VLOG(2) << "parsing input. Node id= " << input.first;
- int node_id = input.first;
- int output_idx = input.second;
- tensorflow::Node* node = s.graph.FindNodeId(node_id);
- auto node_name = node->name();
- input_names.push_back(node_name); // insert original node name without port
- // TODO(jie): alternative :)
- if (!s.graph_properties.HasOutputProperties(node_name))
- return tensorflow::errors::Internal("failed to find input node: " +
- node_name);
-
- auto op_info_vec = s.graph_properties.GetOutputProperties(node_name);
- if (static_cast<int>(op_info_vec.size()) < output_idx)
- return tensorflow::errors::Internal(
- "accessing output index of: ", output_idx, ", at node: ", node_name,
- "with output entry from shape_map: ", op_info_vec.size());
-
- auto op_info = op_info_vec.at(output_idx);
-
- tensorflow::DataType tf_dtype = op_info.dtype();
- input_dtypes.push_back(tf_dtype);
-
- nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT);
- TF_CHECK_OK(ConvertDType(tf_dtype, &dtype));
-
- VLOG(2) << "accessing output index of: " << output_idx
- << ", at node: " << node_name
- << "with output entry from shape_map: " << op_info_vec.size();
-
- // TODO(ben,jie): update TRT input format/dimension
- nvinfer1::DimsCHW input_dim_psuedo_chw;
- for (int i = 0; i < 3; i++) input_dim_psuedo_chw.d[i] = 1;
-
- for (int i = 1; i < op_info.shape().dim_size(); i++) {
- VLOG(2) << "dimension: " << i
- << " , size: " << op_info.shape().dim(i).size();
- input_dim_psuedo_chw.d[i - 1] = op_info.shape().dim(i).size();
- }
-
- // TODO(ben,jie): proper way to restore input tensor name?
- auto input_tensor_name = node_name;
- if (output_idx != 0) input_tensor_name = StrCat(node_name, ":", output_idx);
-
- nvinfer1::ITensor* input_tensor = converter.network()->addInput(
- input_tensor_name.c_str(), dtype, input_dim_psuedo_chw);
-
- if (!input_tensor)
- return tensorflow::errors::InvalidArgument(
- "Failed to create Input layer");
- VLOG(2) << "input tensor name :" << input_tensor_name;
-
- if (!converter.insert_input_tensor(input_tensor_name, input_tensor))
- return tensorflow::errors::AlreadyExists(
- "output tensor already exists for op: " + input_tensor_name);
- }
-
- VLOG(2) << "finished sorting";
-
- for (const tensorflow::Node* node : order) {
- const tensorflow::NodeDef& node_def = node->def();
- VLOG(2) << "converting node: " << node_def.name() << " , " << node_def.op();
- TF_RETURN_IF_ERROR(converter.convert_node(node_def));
- }
-
- VLOG(2) << "finished conversion";
-
- // Gather output metadata
- std::vector<string> output_names;
- std::vector<tensorflow::DataType> output_dtypes;
- int trt_engine_op_output_idx = 0;
- for (const std::pair<int, int>& output : s.output_inds) {
- int node_id = output.first;
- int output_idx = output.second;
- tensorflow::Node* node = s.graph.FindNodeId(node_id);
- string op_name = node->name();
- string tensor_name = op_name;
-
- s.output_edge_map->insert(
- {trt_engine_op_output_idx == 0
- ? engine_name
- : StrCat(engine_name, ":", trt_engine_op_output_idx),
- {output_idx, tensor_name}});
- trt_engine_op_output_idx++;
- if (output_idx != 0) {
- tensor_name = StrCat(tensor_name, ":", output_idx);
- }
- VLOG(1) << "output tensor name: " << tensor_name;
- output_names.push_back(tensor_name);
- auto tensor_or_weights = converter.get_tensor(tensor_name);
- if (!tensor_or_weights.is_tensor()) {
- return tensorflow::errors::InvalidArgument(
- "Output node is weights not tensor");
- }
- nvinfer1::ITensor* tensor = tensor_or_weights.tensor();
- if (!tensor) {
- return tensorflow::errors::NotFound("Output tensor not found: " +
- tensor_name);
- }
- converter.network()->markOutput(*tensor);
- tensorflow::DataType tf_dtype = node->output_type(output_idx);
- output_dtypes.push_back(tf_dtype);
- nvinfer1::DataType trt_dtype = nvinfer1::DataType::kFLOAT;
- TF_RETURN_IF_ERROR(ConvertDType(tf_dtype, &trt_dtype));
- tensor->setType(trt_dtype);
- }
-
- VLOG(2) << "finished output";
-
- // Build the engine
- op_res->builder_->setMaxBatchSize(s.max_batch_size);
- op_res->builder_->setMaxWorkspaceSize(s.max_workspace_size_bytes);
-
- // Build the TRT op
- // TODO(sami,ben,jie): proper naming!
- tensorflow::NodeDefBuilder op_builder(calib_op_name, "TRTCalibOp");
- std::vector<tensorflow::NodeDefBuilder::NodeOut> income_edges;
- for (size_t i = 0; i < input_names.size(); ++i) {
- int output_idx = s.input_inds.at(i).second;
- // we wired up the input here already, it is redundant to do it again in
- // ConvertSubGraphToTensorRT(convert_graph.cc)
- auto incoming_edge = tensorflow::NodeDefBuilder::NodeOut(
- input_names.at(i), output_idx, input_dtypes.at(i));
- VLOG(1) << calib_op_name << " input " << i << " = " << input_names.at(i)
- << ":" << output_idx
- << " dType= " << tensorflow::DataTypeString(input_dtypes.at(i));
- income_edges.push_back(incoming_edge);
- }
- tensorflow::gtl::ArraySlice<tensorflow::NodeDefBuilder::NodeOut> input_list(
- income_edges);
- op_builder.Input(input_list);
- std::vector<string> segment_names;
- segment_names.reserve(s.subgraph_node_ids.size());
- for (int i : s.subgraph_node_ids) {
- auto node = s.graph.FindNodeId(i);
- segment_names.push_back(node->name());
- }
- LOG(INFO) << "finished op preparation";
-
- auto status = op_builder.Attr("segment_nodes", segment_names)
- .Attr("input_names", input_names)
- .Attr("segment_output_names", output_names)
- .Attr("resource_name", calib_op_name)
- .Finalize(s.trt_node);
-
- LOG(INFO) << status.ToString();
- LOG(INFO) << "finished op building";
-
- return tensorflow::Status::OK();
-}
tensorflow::Status ConvertSubGraphToTensorRTNodeDef(
- tensorrt::convert::SubGraphParams& s) {
+ const tensorflow::Graph& graph, const std::set<int>& subgraph_node_ids,
+ const std::vector<std::pair<int, int>>& input_inds,
+ const std::vector<std::pair<int, int>>& output_inds, size_t max_batch_size,
+ size_t max_workspace_size_bytes,
+ const tensorflow::grappler::GraphProperties& graph_properties,
+ tensorflow::NodeDef* trt_node) {
// Visit nodes in reverse topological order and construct the TRT network.
// Toposort
std::vector<tensorflow::Node*> order_vec;
- tensorflow::GetPostOrder(s.graph, &order_vec);
+ tensorflow::GetPostOrder(graph, &order_vec);
// Select just the subgraph
std::list<tensorflow::Node*> order;
for (tensorflow::Node* node : order_vec) {
- if (s.subgraph_node_ids.count(node->id())) {
+ if (subgraph_node_ids.count(node->id())) {
// We want topological order to contstruct the
// network layer by layer
order.push_front(node);
@@ -2438,86 +1434,46 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef(
"Failed to create TensorRT network object");
}
- string subgraph_name_scope;
- if (!order.empty()) {
- subgraph_name_scope = order.front()->name();
- }
- for (const tensorflow::Node* node : order) {
- subgraph_name_scope = GetCommonNameScope(subgraph_name_scope, node->name());
- }
- static int static_id = 0;
- // TODO(sami,ben,jie): proper naming!
- string engine_name = StrCat(subgraph_name_scope, "my_trt_op");
- engine_name = StrCat(engine_name, static_id++);
- auto trt_rmgr = tensorflow::tensorrt::TRTResourceManager::instance();
- auto weight_rmgr = trt_rmgr->getManager("WeightStore");
- auto ws = new tensorflow::tensorrt::TRTWeightStore();
- TF_CHECK_OK(weight_rmgr->Create(engine_name, engine_name, ws));
-
// Build the network
- Converter converter(trt_network.get(), ws, s.precision_mode == FP16MODE);
+ Converter converter(trt_network.get());
std::vector<string> input_names;
std::vector<tensorflow::DataType> input_dtypes;
- for (const std::pair<int, int>& input : s.input_inds) {
- VLOG(2) << "parsing input!!!!!";
+ for (std::pair<int, int> const& input : input_inds) {
int node_id = input.first;
int output_idx = input.second;
- tensorflow::Node* node = s.graph.FindNodeId(node_id);
+ tensorflow::Node* node = graph.FindNodeId(node_id);
auto node_name = node->name();
- // input_names should use the node name in the graph
- // here it should be the input tensor name -> matching the binding
- // insert original node name without port
- auto tensor_name = node_name;
- if (output_idx != 0) {
- tensor_name = StrCat(tensor_name, ":", output_idx);
- }
-
- VLOG(2) << "input name: " << node_name << " tensor_name: " << tensor_name
- << " idx: " << output_idx;
-
- auto shape_inference_node_name = node_name;
- auto shape_inference_output_idx = output_idx;
- // rewire the shape inference to original node in the graph
- if (s.output_edge_map->count(tensor_name)) {
- shape_inference_node_name = s.output_edge_map->at(tensor_name).second;
- shape_inference_output_idx = s.output_edge_map->at(tensor_name).first;
- }
- if (shape_inference_output_idx < 0) continue;
- VLOG(2) << "shapeinference name: " << shape_inference_node_name
- << " idx: " << shape_inference_output_idx;
-
- if (!s.graph_properties.HasOutputProperties(shape_inference_node_name))
- return tensorflow::errors::Internal("failed to find input node: " +
- shape_inference_node_name);
+ input_names.push_back(node_name); // Insert original node name without port
+ // TODO(jie): alternative :)
+ if (!graph_properties.HasOutputProperties(node_name))
+ return tensorflow::errors::Internal("Failed to find input node: " +
+ node_name);
- auto op_info_vec =
- s.graph_properties.GetOutputProperties(shape_inference_node_name);
- if (static_cast<int>(op_info_vec.size()) <= shape_inference_output_idx)
+ auto op_info_vec = graph_properties.GetOutputProperties(node_name);
+ if (static_cast<int>(op_info_vec.size()) < output_idx)
return tensorflow::errors::Internal(
- "accessing output index of: ", shape_inference_output_idx,
- ", at node: ", shape_inference_node_name,
- " with output entry from shape_map: ", op_info_vec.size());
+ "Accessing output index of: " + std::to_string(output_idx) +
+ ", at node: " + node_name + " with output entry from shape_map: " +
+ std::to_string(op_info_vec.size()));
+
+ auto op_info = op_info_vec.at(output_idx);
- auto op_info = op_info_vec.at(shape_inference_output_idx);
tensorflow::DataType tf_dtype = op_info.dtype();
input_dtypes.push_back(tf_dtype);
nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT);
TF_CHECK_OK(ConvertDType(tf_dtype, &dtype));
- VLOG(2) << "Accessing output index of: " << output_idx
+ VLOG(2) << "Accessing output index of: " << std::to_string(output_idx)
<< ", at node: " << node_name
- << " with output entry from shape_map: " << op_info_vec.size();
+ << " with output entry from shape_map: "
+ << std::to_string(op_info_vec.size());
+
// TODO(ben,jie): update TRT input format/dimension
nvinfer1::DimsCHW input_dim_psuedo_chw;
for (int i = 0; i < 3; i++) input_dim_psuedo_chw.d[i] = 1;
- // TODO(jie): TRT 3.x only support 4 dimensional input tensor.
- // update the code once TRT 4.0 comes out.
- if (op_info.shape().dim_size() != 4)
- return tensorflow::errors::Unimplemented("require 4 dimensional input");
-
for (int i = 1; i < op_info.shape().dim_size(); i++) {
VLOG(2) << "dimension: " << i
<< " , size: " << op_info.shape().dim(i).size();
@@ -2526,11 +1482,9 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef(
// TODO(ben,jie): proper way to restore input tensor name?
auto input_tensor_name = node_name;
- if (output_idx != 0) {
- input_tensor_name = StrCat(node_name, ":", output_idx);
- }
+ if (output_idx != 0)
+ input_tensor_name = node_name + ":" + std::to_string(output_idx);
- input_names.push_back(input_tensor_name);
nvinfer1::ITensor* input_tensor = converter.network()->addInput(
input_tensor_name.c_str(), dtype, input_dim_psuedo_chw);
@@ -2557,22 +1511,14 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef(
// Gather output metadata
std::vector<string> output_names;
std::vector<tensorflow::DataType> output_dtypes;
- int trt_engine_op_output_idx = 0;
- for (const std::pair<int, int>& output : s.output_inds) {
+ for (std::pair<int, int> const& output : output_inds) {
int node_id = output.first;
int output_idx = output.second;
- tensorflow::Node* node = s.graph.FindNodeId(node_id);
+ tensorflow::Node* node = graph.FindNodeId(node_id);
string op_name = node->name();
string tensor_name = op_name;
-
- s.output_edge_map->insert(
- {trt_engine_op_output_idx == 0
- ? engine_name
- : StrCat(engine_name, ":", trt_engine_op_output_idx),
- {output_idx, tensor_name}});
- trt_engine_op_output_idx++;
if (output_idx != 0)
- tensorflow::strings::StrAppend(&tensor_name, ":", output_idx);
+ tensor_name = tensor_name + ":" + std::to_string(output_idx);
VLOG(2) << "Output tensor name: " << tensor_name;
output_names.push_back(tensor_name);
auto tensor_or_weights = converter.get_tensor(tensor_name);
@@ -2594,25 +1540,19 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef(
}
VLOG(2) << "Finished output";
+ // TODO(jie): static_id is not thread safe.
+ static int static_id = 0;
// Build the engine
- trt_builder->setMaxBatchSize(s.max_batch_size);
- trt_builder->setMaxWorkspaceSize(s.max_workspace_size_bytes);
- VLOG(0) << "Max batch size= " << s.max_batch_size
- << " max workspace size= " << s.max_workspace_size_bytes;
- if (s.precision_mode == FP16MODE) {
- trt_builder->setHalf2Mode(true);
- VLOG(0) << "Using FP16 precision mode";
- }
- LOG(INFO) << "starting build engine";
+ trt_builder->setMaxBatchSize(max_batch_size);
+ trt_builder->setMaxWorkspaceSize(max_workspace_size_bytes);
+ VLOG(0) << "Starting build engine " << static_id;
+ // TODO(ben,jie): half2 and int8 mode support
string engine_plan_string;
{
auto trt_engine =
infer_object(trt_builder->buildCudaEngine(*converter.network()));
VLOG(0) << "Built network";
- if (trt_engine.get() == nullptr) {
- return tensorflow::errors::Internal("Engine building failure");
- }
auto engine_plan = infer_object(trt_engine->serialize());
VLOG(0) << "Serialized engine";
const char* engine_plan_data =
@@ -2620,19 +1560,18 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef(
engine_plan_string =
string(engine_plan_data, engine_plan_data + engine_plan->size());
}
- TF_RETURN_IF_ERROR(weight_rmgr->Delete<tensorflow::tensorrt::TRTWeightStore>(
- engine_name, engine_name));
- LOG(INFO) << "finished engine " << engine_name;
+
+ VLOG(0) << "Finished engine";
// Build the TRT op
- tensorflow::NodeDefBuilder op_builder(engine_name, "TRTEngineOp");
+ // TODO(sami,ben,jie): proper naming!
+ tensorflow::NodeDefBuilder op_builder(
+ tensorflow::strings::StrCat("my_trt_op", static_id++), "TRTEngineOp");
std::vector<tensorflow::NodeDefBuilder::NodeOut> income_edges;
- VLOG(2) << "input edge size: " << input_names.size();
for (size_t i = 0; i < input_names.size(); ++i) {
- VLOG(2) << "input edges: " << i << " " << input_names.at(i);
- int output_idx = s.input_inds.at(i).second;
- // we wired up the input here already, it is redundant to do it again in
- // ConvertSubGraphToTensorRT(convert_graph.cc)
+ int output_idx = input_inds.at(i).second;
+ // We wired up the input here already, it is redundant to do it again in
+ // ConvertSubGraphToTensorRT(convert_graph.cc)
auto incoming_edge = tensorflow::NodeDefBuilder::NodeOut(
input_names.at(i), output_idx, input_dtypes.at(i));
income_edges.push_back(incoming_edge);
@@ -2647,7 +1586,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef(
.Attr("input_nodes", input_names)
.Attr("output_nodes", output_names)
.Attr("OutT", output_dtypes)
- .Finalize(s.trt_node);
+ .Finalize(trt_node);
VLOG(0) << status.ToString() << " finished op building";
diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.h b/tensorflow/contrib/tensorrt/convert/convert_nodes.h
index 954a1e72f8..2e7fd19566 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h
@@ -17,8 +17,6 @@ limitations under the License.
#define TENSORFLOW_CONTRIB_TENSORRT_CONVERT_CONVERT_NODES_H_
#include <set>
-#include <string>
-#include <unordered_map>
#include <utility>
#include <vector>
@@ -34,49 +32,16 @@ namespace tensorflow {
namespace tensorrt {
namespace convert {
-const int FP32MODE = 0;
-const int FP16MODE = 1;
-const int INT8MODE = 2;
+tensorflow::Status ConvertSubGraphToTensorRTNodeDef(
+ const tensorflow::Graph& graph, const std::set<int>& subgraph_node_ids,
+ const std::vector<std::pair<int, int>>&
+ input_inds, // {node_id, output_idx}
+ const std::vector<std::pair<int, int>>&
+ output_inds, // {node_id, output_idx}
+ size_t max_batch_size, size_t max_workspace_size_bytes,
+ const tensorflow::grappler::GraphProperties& graph_prop,
+ tensorflow::NodeDef* trt_node);
-struct SubGraphParams {
- SubGraphParams(
- tensorflow::Graph& inp_graph,
- const std::set<int>& subgraph_node_id_numbers,
- const std::vector<std::pair<int, int>>& input_indices,
- const std::vector<std::pair<int, int>>& output_indices,
- size_t max_supported_batch_size, size_t max_consumed_workspace_size_bytes,
- const tensorflow::grappler::GraphProperties& current_graph_properties,
- std::unordered_map<string, std::pair<int, string>>* output_edges,
- tensorflow::NodeDef* constructed_trt_node,
- int engine_precision_mode = FP32MODE)
- : graph(inp_graph),
- subgraph_node_ids(subgraph_node_id_numbers),
- input_inds(input_indices),
- output_inds(output_indices),
- max_batch_size(max_supported_batch_size),
- max_workspace_size_bytes(max_consumed_workspace_size_bytes),
- graph_properties(current_graph_properties),
- output_edge_map(output_edges),
- trt_node(constructed_trt_node),
- precision_mode(engine_precision_mode) {}
-
- tensorflow::Graph& graph;
- const std::set<int>& subgraph_node_ids;
- const std::vector<std::pair<int, int>>& input_inds; // {node_id, output_idx}
- const std::vector<std::pair<int, int>>& output_inds; // {node_id, output_idx}
- size_t max_batch_size;
- size_t max_workspace_size_bytes;
- const tensorflow::grappler::GraphProperties& graph_properties;
- std::unordered_map<string, std::pair<int, string>>* output_edge_map;
- tensorflow::NodeDef* trt_node;
- const int precision_mode;
-};
-
-// TODO(sami): Replace references with const reference or pointers
-tensorflow::Status ConvertSubGraphToTensorRTNodeDef(SubGraphParams& params);
-tensorflow::Status InjectCalibrationNode(SubGraphParams& params);
-tensorflow::Status ConvertCalibrationNodeToEngineNode(tensorflow::Graph& graph,
- tensorflow::Node* c_node);
} // namespace convert
} // namespace tensorrt
} // namespace tensorflow
diff --git a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc
index aea44fd8a2..1dcb87e768 100644
--- a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc
+++ b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc
@@ -21,11 +21,10 @@ limitations under the License.
#include "tensorflow/core/framework/tensor_shape.h"
#include "tensorflow/core/framework/tensor_types.h"
#include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/platform/stream_executor.h"
#if GOOGLE_CUDA
#if GOOGLE_TENSORRT
-#include "cuda/include/cuda_runtime_api.h"
+#include "cuda_runtime_api.h"
#include "tensorrt/include/NvInfer.h"
namespace tensorflow {
@@ -114,13 +113,7 @@ void TRTCalibOp::Compute(tensorflow::OpKernelContext* ctx) {
ctx->set_output(i, t);
}
VLOG(2) << "Filled map for sending";
- // copied from cuda_kernel_helper since it seems only valid in *.cu.cc files
- const cudaStream_t* stream = CHECK_NOTNULL(
- reinterpret_cast<const cudaStream_t*>(ctx->op_device_context()
- ->stream()
- ->implementation()
- ->CudaStreamMemberHack()));
- calib_res->calibrator_->setBatch(input_data, *stream);
+ calib_res->calibrator_->setBatch(input_data);
VLOG(2) << "Passed calibration data";
// TODO(aaroey): make sure we wait for the completion of calibration on the
// last batch in future PR.
diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc
index b32371b642..8efdf63ebe 100644
--- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc
+++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc
@@ -24,12 +24,8 @@ limitations under the License.
#include "cuda/include/cuda_runtime_api.h"
namespace tensorflow {
-static ::tensorflow::tensorrt::Logger logger;
-namespace gpu = ::perftools::gputools;
-using IRuntime = nvinfer1::IRuntime;
-using Dims = nvinfer1::Dims;
-
namespace tensorrt {
+static ::tensorflow::tensorrt::Logger logger;
TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) : OpKernel(context) {
// read serialized_engine
@@ -44,21 +40,10 @@ TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) : OpKernel(context) {
// TODO(samikama) runtime should be taken from a resourcemanager as well.
// Only engine should be in the op and context and runtime should be taken
// from resourcemanager
- // TODO(jie): cudaSetDevice make sure trt engine is allocated on the same
- // gpu where the input/output is also located.
- int gpu_id = context->device()->tensorflow_gpu_device_info()->gpu_id;
- cudaSetDevice(gpu_id);
- int device;
- cudaGetDevice(&device);
- if (gpu_id != device) LOG(FATAL) << "set device failed!";
-
- // TODO(samikama) runtime should be taken from a resourcemanager as well.
- // Only engine should be in the op and context and runtime should be taken
- // from resourcemanager
-
- IRuntime* infer = nvinfer1::createInferRuntime(logger);
+ nvinfer1::IRuntime* infer = nvinfer1::createInferRuntime(logger);
trt_engine_ptr_.reset(infer->deserializeCudaEngine(
serialized_engine.c_str(), serialized_engine.size(), nullptr));
+
trt_execution_context_ptr_.reset(trt_engine_ptr_->createExecutionContext());
// Runtime is safe to delete after engine creation
infer->destroy();
@@ -70,6 +55,7 @@ void TRTEngineOp::Compute(OpKernelContext* context) {
size_t binding_index;
int num_batch = 0;
+ bool valid = true;
for (int i = 0; i < context->num_inputs(); i++) {
// Grab the input tensor
binding_index = trt_engine_ptr_->getBindingIndex(input_nodes_[i].c_str());
@@ -78,12 +64,8 @@ void TRTEngineOp::Compute(OpKernelContext* context) {
const TensorShape& input_shape = input_tensor.shape();
if (i == 0) {
num_batch = input_shape.dim_size(0);
- if (num_batch > trt_engine_ptr_->getMaxBatchSize()) {
- LOG(FATAL) << "input tensor batch larger than max_batch_size: "
- << trt_engine_ptr_->getMaxBatchSize();
- }
} else if (num_batch != input_shape.dim_size(0)) {
- LOG(FATAL) << "input data inconsistent batch size";
+ valid = false;
break;
}
switch (trt_engine_ptr_->getBindingDataType(binding_index)) {
@@ -99,6 +81,9 @@ void TRTEngineOp::Compute(OpKernelContext* context) {
}
}
+ // Might want a different way to inform the user of batch size inconsistency
+ if (!valid) LOG(WARNING) << "input data inconsistent batch size";
+
for (int i = 0; i < static_cast<int>(output_nodes_.size()); i++) {
// This is bad that we have to reallocate output buffer every run.
// Create an output tensor
@@ -141,11 +126,9 @@ void TRTEngineOp::Compute(OpKernelContext* context) {
->implementation()
->CudaStreamMemberHack()));
- // TODO(jie): trt enqueue does not return error
- auto ret = trt_execution_context_ptr_->enqueue(num_batch, &buffers[0],
- *stream, nullptr);
- VLOG(2) << "enqueue returns: " << ret;
- // sync should be done by TF.
+ // execution handled by TF since we are getting stream from TF.
+ // it is safe for CPU pointer array (buffers) to go out of scope after enqueue
+ trt_execution_context_ptr_->enqueue(num_batch, &buffers[0], *stream, nullptr);
}
REGISTER_KERNEL_BUILDER(Name("TRTEngineOp").Device(DEVICE_GPU), TRTEngineOp);
diff --git a/tensorflow/contrib/tensorrt/log/trt_logger.cc b/tensorflow/contrib/tensorrt/log/trt_logger.cc
index dda0dc9e71..7add8cb8b3 100644
--- a/tensorflow/contrib/tensorrt/log/trt_logger.cc
+++ b/tensorflow/contrib/tensorrt/log/trt_logger.cc
@@ -27,19 +27,19 @@ void Logger::log(Severity severity, const char* msg) {
// Suppress info-level messages
switch (severity) {
case Severity::kINFO: { // Mark TRT info messages as debug!
- VLOG(2) << name_ << " " << msg;
+ VLOG(2) << msg;
break;
}
case Severity::kWARNING: {
- LOG(WARNING) << name_ << " " << msg;
+ LOG(WARNING) << msg;
break;
}
case Severity::kERROR: {
- LOG(ERROR) << name_ << " " << msg;
+ LOG(ERROR) << msg;
break;
}
case Severity::kINTERNAL_ERROR: {
- LOG(FATAL) << name_ << " " << msg;
+ LOG(FATAL) << msg;
break;
}
// This is useless for now. But would catch it in future if enum changes. It
diff --git a/tensorflow/contrib/tensorrt/log/trt_logger.h b/tensorflow/contrib/tensorrt/log/trt_logger.h
index 7f3544f8cf..d71f66b933 100644
--- a/tensorflow/contrib/tensorrt/log/trt_logger.h
+++ b/tensorflow/contrib/tensorrt/log/trt_logger.h
@@ -27,11 +27,9 @@ namespace tensorrt {
// Logger for GIE info/warning/errors
class Logger : public nvinfer1::ILogger {
- public:
- Logger(string name = "DefaultLogger") : name_(name){};
+ private:
void log(nvinfer1::ILogger::Severity severity, const char* msg) override;
- private:
string name_;
};
diff --git a/tensorflow/contrib/tensorrt/python/__init__.py b/tensorflow/contrib/tensorrt/python/__init__.py
index 0b2321b5fc..7e050a768c 100644
--- a/tensorflow/contrib/tensorrt/python/__init__.py
+++ b/tensorflow/contrib/tensorrt/python/__init__.py
@@ -20,6 +20,5 @@ from __future__ import print_function
# pylint: disable=unused-import,line-too-long
from tensorflow.contrib.tensorrt.python.ops import trt_engine_op
-from tensorflow.contrib.tensorrt.python.trt_convert import calib_graph_to_infer_graph
from tensorflow.contrib.tensorrt.python.trt_convert import create_inference_graph
# pylint: enable=unused-import,line-too-long
diff --git a/tensorflow/contrib/tensorrt/python/trt_convert.py b/tensorflow/contrib/tensorrt/python/trt_convert.py
index 666220d78c..9454862f85 100644
--- a/tensorflow/contrib/tensorrt/python/trt_convert.py
+++ b/tensorflow/contrib/tensorrt/python/trt_convert.py
@@ -20,17 +20,11 @@ from __future__ import print_function
# pylint: disable=unused-import,line-too-long
import six as _six
-from tensorflow.contrib.tensorrt.wrap_conversion import calib_convert
from tensorflow.contrib.tensorrt.wrap_conversion import trt_convert
from tensorflow.core.framework import graph_pb2
-from tensorflow.core.protobuf import rewriter_config_pb2
from tensorflow.python.framework import errors
from tensorflow.python.framework import errors_impl as _impl
-from tensorflow.python.framework import meta_graph
from tensorflow.python.framework import ops
-from tensorflow.python.grappler import tf_optimizer
-from tensorflow.python.util import compat
-# pylint: enable=unused-import,line-too-long
# TODO(skama): get outputs from session when implemented as c++
@@ -38,33 +32,22 @@ from tensorflow.python.util import compat
def create_inference_graph(input_graph_def,
outputs,
max_batch_size=1,
- max_workspace_size_bytes=2 << 20,
- precision_mode="FP32",
- minimum_segment_size=3):
+ max_workspace_size_bytes=2 << 20):
"""Python wrapper for the TRT transormation.
+
Args:
input_graph_def: GraphDef object containing a model to be transformed.
- outputs: list of tensors or node names for the model outputs.
+ outputs: List of tensors or node names for the model outputs.
max_batch_size: max size for the input batch
max_workspace_size_bytes: parameter to control memory allocation (in Bytes)
- precision_mode: one of 'FP32', 'FP16' and 'INT8'
- minimum_segment_size: the minimum number of nodes required for a subgraph to
- be replaced by TRTEngineOp.
Returns:
New GraphDef with TRTEngineOps placed in graph replacing subgraphs.
Raises:
- ValueError: if the provided precision mode is invalid.
RuntimeError: if the returned status message is malformed.
"""
- supported_precision_modes = {"FP32": 0, "FP16": 1, "INT8": 2}
- if precision_mode.upper() not in supported_precision_modes:
- raise ValueError(("precision mode '{}' is not supported."
- "It should be one of {}").format(
- precision_mode, "{'FP32', 'FP16', 'INT8'}"))
- mode = supported_precision_modes[precision_mode.upper()]
def py2bytes(inp):
return inp
@@ -100,7 +83,7 @@ def create_inference_graph(input_graph_def,
# pair or strings where first one is encoded status and the second
# one is the transformed graphs protobuf string.
out = trt_convert(input_graph_def_str, out_names, max_batch_size,
- max_workspace_size_bytes, mode, minimum_segment_size)
+ max_workspace_size_bytes)
status = to_string(out[0])
output_graph_def_string = out[1]
del input_graph_def_str # Save some memory
@@ -118,46 +101,3 @@ def create_inference_graph(input_graph_def,
output_graph_def.ParseFromString(output_graph_def_string)
del output_graph_def_string # Save some memory
return output_graph_def
-
-
-def calib_graph_to_infer_graph(calibration_graph_def):
- """Convert an existing calibration graph to inference graph.
-
- Args:
- calibration_graph_def: the calibration GraphDef object with calibration data
- Returns:
- New GraphDef with TRTEngineOps placed in graph replacing calibration nodes.
- Raises:
- RuntimeError: if the returned status message is malformed.
- """
-
- def py2string(inp):
- return inp
-
- def py3string(inp):
- return inp.decode("utf-8")
-
- if _six.PY2:
- to_string = py2string
- else:
- to_string = py3string
-
- graph_str = calibration_graph_def.SerializeToString()
- out = calib_convert(graph_str)
- status = to_string(out[0])
- output_graph_def_string = out[1]
- del graph_str # Save some memory
- if len(status) < 2:
- raise _impl.UnknownError(None, None, status)
- if status[:2] != "OK":
- msg = status.split(";")
- if len(msg) == 1:
- raise RuntimeError("Status message is malformed {}".format(status))
- # pylint: disable=protected-access
- raise _impl._make_specific_exception(None, None, ";".join(msg[1:]),
- int(msg[0]))
- # pylint: enable=protected-access
- output_graph_def = graph_pb2.GraphDef()
- output_graph_def.ParseFromString(output_graph_def_string)
- del output_graph_def_string # Save some memory
- return output_graph_def
diff --git a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc
index 74df75902e..3d5cc76c42 100644
--- a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc
+++ b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc
@@ -23,7 +23,7 @@ limitations under the License.
#if GOOGLE_CUDA
#if GOOGLE_TENSORRT
-#include "cuda/include/cuda_runtime_api.h"
+#include "cuda_runtime_api.h"
namespace tensorflow {
namespace tensorrt {
@@ -38,18 +38,22 @@ TRTInt8Calibrator::TRTInt8Calibrator(
done_(false),
dev_buffers_(dev_buffers),
calib_running_(false),
- batch_is_set_(false),
engine_name_(engine_name) {}
-bool TRTInt8Calibrator::setBatch(const std::unordered_map<string, void*>& data,
- const cudaStream_t stream) {
- tensorflow::mutex_lock lock(cond_mtx_);
- while ((calib_running_ || batch_is_set_) &&
- !done_) { // wait while calibration is running
- cond_.wait(lock);
- }
+bool TRTInt8Calibrator::setBatch(
+ const std::unordered_map<string, void*>& data) {
+ // TODO(aaroey): make sure that in future PR:
+ // 1. the mutex_lock is outside of the loop
+ // 2. wait() is used instead of wait_for()
+ // 3. done_ is to be protected by the mutex
+ // 4. the first batch is not missed
if (done_) return false;
- CHECK(!calib_running_ && !batch_is_set_);
+ while (calib_running_.load(
+ std::memory_order_acquire)) { // wait while calibration is running
+ tensorflow::mutex_lock l(cond_mtx_);
+ cond_.wait_for(l, std::chrono::milliseconds(50));
+ if (done_) return false;
+ }
VLOG(1) << "Set Batch Waiting finished";
for (const auto it : data) {
auto devptr = dev_buffers_.find(it.first);
@@ -61,32 +65,27 @@ bool TRTInt8Calibrator::setBatch(const std::unordered_map<string, void*>& data,
// TODO(aaroey): we should not use sync copy on default stream. Make sure
// stream->ThenMemcpy() is used in future PRs.
- // TODO(sami,aaroey): Need to figureout a way to ensure synchronization
- // between stream, perhaps using a tensor?
- auto status = cudaMemcpyAsync(d.first, it.second, d.second,
- cudaMemcpyDeviceToDevice, stream);
+ auto status =
+ cudaMemcpy(d.first, it.second, d.second, cudaMemcpyDeviceToDevice);
if (status != cudaSuccess) {
LOG(FATAL) << "cudaMemcpy " << engine_name_ << " for '" << it.first
<< "' failed with " << status;
}
}
-
- // TODO(Sami, aaorey): Find an alternative way!
- cudaStreamSynchronize(
- stream); // we have to wait for the stream before returning!
- batch_is_set_ = true;
+ calib_running_.store(true, std::memory_order_release); // release builder
cond_.notify_all();
return true;
}
bool TRTInt8Calibrator::getBatch(void** bindings, const char** names,
int num_bindings) {
- tensorflow::mutex_lock lock(cond_mtx_);
- calib_running_ = false;
+ calib_running_.store(false, std::memory_order_release); // wait for new batch
cond_.notify_all();
- while ((!batch_is_set_ && !done_)) { // wait until new batch arrives
- cond_.wait(lock);
-
+ while (!calib_running_.load(
+ std::memory_order_acquire)) { // wait until new batch arrives
+ tensorflow::mutex_lock l(cond_mtx_);
+ cond_.wait_for(l, std::chrono::milliseconds(50));
+ if (done_) return false;
}
if (done_) {
return false;
@@ -101,8 +100,6 @@ bool TRTInt8Calibrator::getBatch(void** bindings, const char** names,
bindings[i] = it->second.first;
}
- batch_is_set_ = false;
- calib_running_ = true;
return true;
}
@@ -110,12 +107,6 @@ const void* TRTInt8Calibrator::readCalibrationCache(std::size_t& length) {
return nullptr;
}
-void TRTInt8Calibrator::setDone() {
- tensorflow::mutex_lock lock(cond_mtx_);
- done_ = true;
- cond_.notify_all();
-}
-
void TRTInt8Calibrator::writeCalibrationCache(const void* ptr,
std::size_t length) {}
TRTInt8Calibrator::~TRTInt8Calibrator() {
@@ -124,6 +115,5 @@ TRTInt8Calibrator::~TRTInt8Calibrator() {
} // namespace tensorrt
} // namespace tensorflow
-
#endif
#endif
diff --git a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h
index d77aa2c5ab..8830f7efe7 100644
--- a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h
+++ b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h
@@ -24,10 +24,7 @@ limitations under the License.
#if GOOGLE_CUDA
#if GOOGLE_TENSORRT
-
-#include "cuda/include/cuda_runtime_api.h"
#include "tensorrt/include/NvInfer.h"
-
namespace tensorflow {
namespace tensorrt {
// This class provides a 1 element queue to match TFs push model to
@@ -42,9 +39,8 @@ struct TRTInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator {
int getBatchSize() const override;
bool getBatch(void* bindings[], const char* names[],
int num_bindings) override;
- bool setBatch(const std::unordered_map<string, void*>& data,
- const cudaStream_t stream);
- void setDone();
+ bool setBatch(const std::unordered_map<string, void*>& data);
+ void setDone() { done_ = true; }
const void* readCalibrationCache(std::size_t& length) override;
void writeCalibrationCache(const void* ptr, std::size_t length) override;
~TRTInt8Calibrator();
@@ -59,14 +55,11 @@ struct TRTInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator {
const std::unordered_map<string, std::pair<void*, size_t>>
dev_buffers_; // map to keep tensorrt input buffers and sizes keyed with
// buffer names
- bool calib_running_;
- bool batch_is_set_;
+ std::atomic_bool calib_running_;
string engine_name_;
};
-
} // namespace tensorrt
} // namespace tensorflow
-
+#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_INT8_CALIBRATOR_H_
#endif
#endif
-#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_INT8_CALIBRATOR_H_
diff --git a/tensorflow/contrib/tensorrt/test/test_tftrt.py b/tensorflow/contrib/tensorrt/test/test_tftrt.py
index 0b661bd536..c78f6f2224 100644
--- a/tensorflow/contrib/tensorrt/test/test_tftrt.py
+++ b/tensorflow/contrib/tensorrt/test/test_tftrt.py
@@ -60,7 +60,6 @@ def get_simple_graph_def():
def run_graph(gdef, dumm_inp):
- """Run given graphdef once."""
gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50)
ops.reset_default_graph()
g = ops.Graph()
@@ -75,65 +74,15 @@ def run_graph(gdef, dumm_inp):
return val
-# Use real data that is representatitive of the inference dataset
-# for calibration. For this test script it is random data.
-def run_calibration(gdef, dumm_inp):
- """Run given calibration graph multiple times."""
- gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50)
- ops.reset_default_graph()
- g = ops.Graph()
- with g.as_default():
- inp, out = importer.import_graph_def(
- graph_def=gdef, return_elements=["input", "output"])
- inp = inp.outputs[0]
- out = out.outputs[0]
- with csess.Session(
- config=cpb2.ConfigProto(gpu_options=gpu_options), graph=g) as sess:
- # run over real calibration data here, we are mimicking a calibration set of
- # 30 different batches. Use as much calibration data as you want
- for _ in range(30):
- val = sess.run(out, {inp: dumm_inp})
- return val
-
-
if "__main__" in __name__:
inp_dims = (100, 24, 24, 2)
dummy_input = np.random.random_sample(inp_dims)
- orig_graph = get_simple_graph_def() # use a frozen graph for inference
+ gdef = get_simple_graph_def()
# Get optimized graph
- trt_graph = trt.create_inference_graph(
- input_graph_def=orig_graph,
- outputs=["output"],
- max_batch_size=inp_dims[0],
- max_workspace_size_bytes=1 << 25,
- precision_mode="FP32", # TRT Engine precision "FP32","FP16" or "INT8"
- minimum_segment_size=2 # minimum number of nodes in an engine
- )
- o1 = run_graph(orig_graph, dummy_input)
+ trt_graph = trt.create_inference_graph(gdef, ["output"], inp_dims[0])
+ o1 = run_graph(gdef, dummy_input)
o2 = run_graph(trt_graph, dummy_input)
o3 = run_graph(trt_graph, dummy_input)
assert np.array_equal(o1, o2)
assert np.array_equal(o3, o2) # sanity check
- fp16_graph = trt.create_inference_graph(
- input_graph_def=orig_graph,
- outputs=["output"],
- max_batch_size=inp_dims[0],
- max_workspace_size_bytes=1 << 25,
- precision_mode="FP16", # TRT Engine precision "FP32","FP16" or "INT8"
- minimum_segment_size=2 # minimum number of nodes in an engine
- )
- int8_calib_gdef = trt.create_inference_graph(
- input_graph_def=orig_graph,
- outputs=["output"],
- max_batch_size=inp_dims[0],
- max_workspace_size_bytes=1 << 25,
- precision_mode="INT8", # TRT Engine precision "FP32","FP16" or "INT8"
- minimum_segment_size=2 # minimum number of nodes in an engine
- )
- o4 = run_graph(fp16_graph, dummy_input)
- _ = run_calibration(int8_calib_gdef, dummy_input)
- int8_graph = trt.calib_graph_to_infer_graph(int8_calib_gdef)
- o5 = run_graph(int8_graph, dummy_input)
- assert np.allclose(o1, o4)
- assert np.allclose(o1, o5)
print("Pass")
diff --git a/tensorflow/contrib/tensorrt/trt_conversion.i b/tensorflow/contrib/tensorrt/trt_conversion.i
index 46480e99a1..d679945d56 100644
--- a/tensorflow/contrib/tensorrt/trt_conversion.i
+++ b/tensorflow/contrib/tensorrt/trt_conversion.i
@@ -64,17 +64,13 @@ PyObject* pair_helper(std::pair<string, string>* in) {
%ignoreall
%unignore tensorflow;
%unignore trt_convert;
-%unignore calib_convert;
%{
-
std::pair<string, string> trt_convert(
string graph_def_string, // The serialized GraphDef string.
std::vector<string> output_names,
size_t max_batch_size,
- size_t max_workspace_size_bytes,
- int precision_mode,
- int minimum_segment_size
+ size_t max_workspace_size_bytes
// Unfortunately we can't use TF_Status here since it
// is in c/c_api and brings in a lot of other libraries
// which in turn declare ops. These ops are included
@@ -94,64 +90,16 @@ std::pair<string, string> trt_convert(
return std::pair<string, string>{out_status, ""};
}
- if(precision_mode < 0 || precision_mode > 2){
- out_status = "InvalidArgument;Invalid precision_mode";
- return std::pair<string, string>{out_status, ""};
- }
if (!output_names.size()) {
out_status = "InvalidArgument;Size of the output_names vector is 0";
return std::pair<string, string>{out_status, ""};
+ // return "";
}
tensorflow::GraphDef outGraph;
tensorflow::Status conversion_status =
tensorflow::tensorrt::convert::ConvertGraphDefToTensorRT(
graph_def, output_names, max_batch_size, max_workspace_size_bytes,
- &outGraph, precision_mode, minimum_segment_size);
- if (!conversion_status.ok()) {
- auto retCode = (int)conversion_status.code();
- char buff[2000];
- snprintf(buff, 2000, "%d;%s", retCode,
- conversion_status.error_message().c_str());
- out_status = buff;
- return std::pair<string, string>{out_status, ""};
- }
- string result;
- if (!outGraph.SerializeToString(&result)) {
- out_status = "InvalidArgument;Couldn't serialize output as a GraphDef";
- return std::pair<string, string>{out_status, ""};
- }
- out_status = "OK;All good!";
- return std::pair<string, string>{out_status, result};
-#else
- // Returns FAILED_PRECONDITION.
- return std::pair<string, string>{"9;TensorRT is not enabled!", ""};
-#endif // GOOGLE_CUDA && GOOGLE_TENSORRT
-}
-
-std::pair<string, string> calib_convert(string graph_def_string // const tensorflow::GraphDef&
- // unfortunately we can't use TF_Status here since it
- // is in c/c_api and brings in a lot of other libraries
- // which in turn declare ops. These ops are included
- // statically in our library and cause an abort when
- // module is loaded due to double registration
- // until Tensorflow properly exposes these headers
- // we have to work around this by returning a string
- // and converting it to exception on python side.
- //,TF_Status* out_status) {
-) {
-#if GOOGLE_CUDA && GOOGLE_TENSORRT
- string out_status;
-
- tensorflow::GraphDef graph_def;
- if (!graph_def.ParseFromString(graph_def_string)) {
- out_status = "InvalidArgument;Couldn't interpret input as a GraphDef";
- return std::pair<string, string>{out_status, ""};
- }
-
- tensorflow::GraphDef outGraph;
- tensorflow::Status conversion_status =
- tensorflow::tensorrt::convert::ConvertCalibGraphToInferGraph(graph_def,
- &outGraph);
+ &outGraph);
if (!conversion_status.ok()) {
auto retCode = (int)conversion_status.code();
char buff[2000];
@@ -174,13 +122,10 @@ std::pair<string, string> calib_convert(string graph_def_string // const tenso
}
%}
-std::pair<string, string> calib_convert(string graph_def_string);
-
std::pair<string, string> trt_convert(string graph_def_string,
std::vector<string> output_names,
size_t max_batch_size,
- size_t max_workspace_size_bytes,
- int precision_mode, int minimum_segment_size);
+ size_t max_workspace_size_bytes);
%unignoreall
diff --git a/tensorflow/contrib/timeseries/examples/BUILD b/tensorflow/contrib/timeseries/examples/BUILD
index 70bf67c779..bb86ecb220 100644
--- a/tensorflow/contrib/timeseries/examples/BUILD
+++ b/tensorflow/contrib/timeseries/examples/BUILD
@@ -25,10 +25,7 @@ py_test(
srcs = ["predict_test.py"],
data = ["data/period_trend.csv"],
srcs_version = "PY2AND3",
- tags = [
- "no_windows", # TODO: needs investigation on Windows
- "notsan", # b/67513579
- ],
+ tags = ["notsan"], # b/67513579
deps = [
":predict",
"//tensorflow/python:client_testlib",
diff --git a/tensorflow/contrib/timeseries/python/timeseries/BUILD b/tensorflow/contrib/timeseries/python/timeseries/BUILD
index 64f5cd8357..ed3ed4c0e1 100644
--- a/tensorflow/contrib/timeseries/python/timeseries/BUILD
+++ b/tensorflow/contrib/timeseries/python/timeseries/BUILD
@@ -156,7 +156,9 @@ py_test(
"head_test.py",
],
srcs_version = "PY2AND3",
- tags = ["no_pip_gpu"], # b/63391119
+ tags = [
+ "no_pip_gpu", # b/63391119
+ ],
deps = [
":feature_keys",
":head",
@@ -425,7 +427,6 @@ py_test(
srcs_version = "PY2AND3",
tags = [
"no_pip_gpu", # b/63391119
- "no_windows", # TODO: needs investigation on Windows
],
deps = [
":feature_keys",
diff --git a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD
index 07df7bc9a5..c86d06e923 100644
--- a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD
+++ b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD
@@ -40,7 +40,6 @@ py_test(
timeout = "long", # Moderate but for asan
srcs = ["state_space_model_test.py"],
srcs_version = "PY2AND3",
- tags = ["no_windows"], # TODO: needs investigation on Windows
deps = [
":state_space_model",
"//tensorflow/contrib/layers:layers_py",
diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD
index f9d433a45b..ed930e44e8 100644
--- a/tensorflow/contrib/tpu/BUILD
+++ b/tensorflow/contrib/tpu/BUILD
@@ -225,7 +225,6 @@ tf_py_test(
"//tensorflow/python:framework",
"//tensorflow/python:layers",
],
- tags = ["no_windows"], # TODO: needs investigation on Windows
)
tf_py_test(
diff --git a/tensorflow/contrib/util/loader.py b/tensorflow/contrib/util/loader.py
index dca01d26f4..f4283cd9ed 100644
--- a/tensorflow/contrib/util/loader.py
+++ b/tensorflow/contrib/util/loader.py
@@ -42,10 +42,9 @@ def load_op_library(path):
plugin.
"""
if os.name == 'nt':
- # To avoid making every user_ops aware of windows, re-write
- # the file extension from .so to .dll if .so file doesn't exist.
- if not os.path.exists(path):
- path = re.sub(r'\.so$', '.dll', path)
+ # To avoid makeing every user_ops aware of windows, re-write
+ # the file extension from .so to .dll.
+ path = re.sub(r'\.so$', '.dll', path)
# Currently we have only some user_ops as dlls on windows - don't try
# to load them if the dll is not found.