aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--tensorflow/contrib/tensorrt/BUILD30
-rw-r--r--tensorflow/contrib/tensorrt/convert/convert_nodes.h1
-rw-r--r--tensorflow/contrib/tensorrt/resources/trt_allocator.cc49
-rw-r--r--tensorflow/contrib/tensorrt/resources/trt_allocator.h14
-rw-r--r--tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc79
5 files changed, 160 insertions, 13 deletions
diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD
index 70ce4a499c..a9378e9ad6 100644
--- a/tensorflow/contrib/tensorrt/BUILD
+++ b/tensorflow/contrib/tensorrt/BUILD
@@ -80,6 +80,7 @@ cc_library(
copts = tf_copts(),
visibility = ["//visibility:public"],
deps = [
+ ":trt_allocator",
":trt_logging",
":trt_plugins",
":trt_resources",
@@ -195,17 +196,16 @@ tf_py_wrap_cc(
tf_cuda_library(
name = "trt_resources",
srcs = [
- "resources/trt_allocator.cc",
"resources/trt_int8_calibrator.cc",
"resources/trt_resource_manager.cc",
],
hdrs = [
- "resources/trt_allocator.h",
"resources/trt_int8_calibrator.h",
"resources/trt_resource_manager.h",
"resources/trt_resources.h",
],
deps = [
+ ":trt_allocator",
":trt_logging",
":utils",
"//tensorflow/core:framework_headers_lib",
@@ -216,6 +216,31 @@ tf_cuda_library(
]),
)
+tf_cuda_library(
+ name = "trt_allocator",
+ srcs = ["resources/trt_allocator.cc"],
+ hdrs = ["resources/trt_allocator.h"],
+ deps = [
+ "//tensorflow/core:framework_headers_lib",
+ "//tensorflow/core:framework_lite",
+ "//tensorflow/core:lib_proto_parsing",
+ ] + if_tensorrt([
+ "@local_config_tensorrt//:nv_infer",
+ ]),
+)
+
+tf_cc_test(
+ name = "trt_allocator_test",
+ size = "small",
+ srcs = ["resources/trt_allocator_test.cc"],
+ tags = ["no_windows"],
+ deps = [
+ ":trt_allocator",
+ "//tensorflow/core:test",
+ "//tensorflow/core:test_main",
+ ],
+)
+
# Library for the node-level conversion portion of TensorRT operation creation
tf_cuda_library(
name = "trt_conversion",
@@ -231,6 +256,7 @@ tf_cuda_library(
],
deps = [
":segment",
+ ":trt_allocator",
":trt_plugins",
":trt_logging",
":trt_resources",
diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.h b/tensorflow/contrib/tensorrt/convert/convert_nodes.h
index 1a4c0e755d..81baf8e7c1 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h
@@ -23,6 +23,7 @@ limitations under the License.
#include <vector>
#include "tensorflow/contrib/tensorrt/convert/utils.h"
+#include "tensorflow/contrib/tensorrt/log/trt_logger.h"
#include "tensorflow/contrib/tensorrt/resources/trt_allocator.h"
#include "tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h"
#include "tensorflow/core/framework/graph.pb.h"
diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator.cc b/tensorflow/contrib/tensorrt/resources/trt_allocator.cc
index 81d7330b49..d8f97bfbbc 100644
--- a/tensorflow/contrib/tensorrt/resources/trt_allocator.cc
+++ b/tensorflow/contrib/tensorrt/resources/trt_allocator.cc
@@ -19,12 +19,42 @@ limitations under the License.
#if GOOGLE_CUDA
#if GOOGLE_TENSORRT
+#include "cuda/include/cuda_runtime_api.h"
+#endif // GOOGLE_TENSORRT
+#endif // GOOGLE_CUDA
+
+namespace tensorflow {
+namespace tensorrt {
+
+// std::align is not supported, so this method mimic its behavior.
+void* Align(size_t alignment, size_t size, void*& ptr, size_t& space) {
+ QCHECK_GT(alignment, 0) << "alignment must be greater than 0.";
+ QCHECK_EQ(0, alignment & (alignment - 1)) << "Alignment must be power of 2.";
+ QCHECK_GT(size, 0) << "size must be greater than 0.";
+ QCHECK(ptr) << "ptr must not be nullptr.";
+ QCHECK_GT(space, 0) << "space must be greater than 0.";
+ const uintptr_t ptr_val = reinterpret_cast<uintptr_t>(ptr);
+ QCHECK_GE(ptr_val + space, ptr_val) << "Provided space overflows.";
+ if (size > space) return nullptr;
+ const uintptr_t aligned_ptr_val = ((ptr_val + alignment - 1) & -alignment);
+ if (aligned_ptr_val > ptr_val + space - size) return nullptr;
+ ptr = reinterpret_cast<void*>(aligned_ptr_val);
+ const uintptr_t diff = aligned_ptr_val - ptr_val;
+ space -= diff;
+ return ptr;
+}
+
+} // namespace tensorrt
+} // namespace tensorflow
+
+#if GOOGLE_CUDA
+#if GOOGLE_TENSORRT
#if NV_TENSORRT_MAJOR > 2
-#include "cuda/include/cuda_runtime_api.h"
namespace tensorflow {
namespace tensorrt {
+
void* TRTCudaAllocator::allocate(uint64_t size, uint64_t alignment,
uint32_t flags) {
assert((alignment & (alignment - 1)) == 0); // zero or a power of 2.
@@ -44,17 +74,16 @@ void* TRTDeviceAllocator::allocate(uint64_t size, uint64_t alignment,
assert((alignment & (alignment - 1)) == 0); // zero or a power of 2.
size_t total_size = size + alignment;
void* mem = allocator_->AllocateRaw(alignment, total_size);
- if (!mem) {
- return nullptr;
- }
+ if (!mem) return nullptr;
void* alloc_mem = mem;
- CHECK(std::align(alignment, size, mem, total_size));
+ QCHECK(Align(alignment, size, mem, total_size));
if (mem != alloc_mem) {
- CHECK(mem_map_.insert({mem, alloc_mem}).second);
+ QCHECK(mem_map_.insert({mem, alloc_mem}).second);
}
- VLOG(2) << "Allocated " << size << " bytes with alignment " << alignment
- << " @ " << mem;
+ VLOG(2) << "Allocated " << total_size << " bytes memory @" << alloc_mem
+ << "; aligned to " << size << " bytes @" << mem << " with alignment "
+ << alignment;
return mem;
}
@@ -80,5 +109,5 @@ void TRTDeviceAllocator::free(void* memory) {
} // namespace tensorflow
#endif
-#endif
-#endif
+#endif // GOOGLE_TENSORRT
+#endif // GOOGLE_CUDA
diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator.h b/tensorflow/contrib/tensorrt/resources/trt_allocator.h
index b8825b108d..6f94492083 100644
--- a/tensorflow/contrib/tensorrt/resources/trt_allocator.h
+++ b/tensorflow/contrib/tensorrt/resources/trt_allocator.h
@@ -16,13 +16,25 @@ limitations under the License.
#ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_ALLOCATOR_H_
#define TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_ALLOCATOR_H_
-#include "tensorflow/contrib/tensorrt/log/trt_logger.h"
+#include <unordered_map>
+
#include "tensorflow/core/framework/allocator.h"
#if GOOGLE_CUDA
#if GOOGLE_TENSORRT
#include "tensorrt/include/NvInfer.h"
+#endif // GOOGLE_TENSORRT
+#endif // GOOGLE_CUDA
+
+namespace tensorflow {
+namespace tensorrt {
+// std::align is not supported, so this function mimic its behavior.
+void* Align(size_t alignment, size_t size, void*& ptr, size_t& space);
+} // namespace tensorrt
+} // namespace tensorflow
+#if GOOGLE_CUDA
+#if GOOGLE_TENSORRT
#if NV_TENSORRT_MAJOR == 3
// Define interface here temporarily until TRT 4.0 is released
namespace nvinfer1 {
diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc b/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
new file mode 100644
index 0000000000..f515ed03f2
--- /dev/null
+++ b/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
@@ -0,0 +1,79 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tensorrt/resources/trt_allocator.h"
+
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace tensorrt {
+
+bool RunTest(const size_t alignment, const size_t size,
+ const intptr_t orig_ptr_val, const size_t orig_space) {
+ void* const orig_ptr = reinterpret_cast<void*>(orig_ptr_val);
+ void* ptr = orig_ptr;
+ size_t space = orig_space;
+ void* result = Align(alignment, size, ptr, space);
+ if (result == nullptr) {
+ EXPECT_EQ(orig_ptr, ptr);
+ EXPECT_EQ(orig_space, space);
+ return false;
+ } else {
+ EXPECT_EQ(result, ptr);
+ const intptr_t ptr_val = reinterpret_cast<intptr_t>(ptr);
+ EXPECT_EQ(0, ptr_val % alignment);
+ EXPECT_GE(ptr_val, orig_ptr_val);
+ EXPECT_GE(space, size);
+ EXPECT_LE(space, orig_space);
+ EXPECT_EQ(ptr_val + space, orig_ptr_val + orig_space);
+ return true;
+ }
+}
+
+TEST(TRTAllocatorTest, Align) {
+ for (const size_t space :
+ {1, 2, 3, 4, 7, 8, 9, 10, 16, 32, 511, 512, 513, 700, 12345}) {
+ for (size_t alignment = 1; alignment <= space * 4; alignment *= 2) {
+ for (const intptr_t ptr_val :
+ {1ul, alignment == 1 ? 1ul : alignment - 1, alignment, alignment + 1,
+ alignment + (alignment / 2)}) {
+ if (ptr_val % alignment == 0) {
+ for (const size_t size :
+ {1ul, space == 1 ? 1ul : space - 1, space, space + 1}) {
+ EXPECT_EQ(space >= size, RunTest(alignment, size, ptr_val, space));
+ }
+ } else {
+ EXPECT_FALSE(RunTest(alignment, space, ptr_val, space));
+ const size_t diff = alignment - ptr_val % alignment;
+ if (space > diff) {
+ EXPECT_TRUE(
+ RunTest(alignment, space - diff, ptr_val + diff, space - diff));
+ for (const size_t size :
+ {1ul, space - diff > 1 ? space - diff - 1 : 1ul, space - diff,
+ space - diff + 1, space - 1}) {
+ EXPECT_EQ(space - diff >= size,
+ RunTest(alignment, size, ptr_val, space));
+ }
+ } else {
+ EXPECT_FALSE(RunTest(alignment, 1, ptr_val, space));
+ }
+ }
+ }
+ }
+ }
+}
+
+} // namespace tensorrt
+} // namespace tensorflow