diff options
-rw-r--r-- | tensorflow/contrib/tensorrt/BUILD | 30 | ||||
-rw-r--r-- | tensorflow/contrib/tensorrt/convert/convert_nodes.h | 1 | ||||
-rw-r--r-- | tensorflow/contrib/tensorrt/resources/trt_allocator.cc | 49 | ||||
-rw-r--r-- | tensorflow/contrib/tensorrt/resources/trt_allocator.h | 14 | ||||
-rw-r--r-- | tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc | 79 |
5 files changed, 160 insertions, 13 deletions
diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index 70ce4a499c..a9378e9ad6 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -80,6 +80,7 @@ cc_library( copts = tf_copts(), visibility = ["//visibility:public"], deps = [ + ":trt_allocator", ":trt_logging", ":trt_plugins", ":trt_resources", @@ -195,17 +196,16 @@ tf_py_wrap_cc( tf_cuda_library( name = "trt_resources", srcs = [ - "resources/trt_allocator.cc", "resources/trt_int8_calibrator.cc", "resources/trt_resource_manager.cc", ], hdrs = [ - "resources/trt_allocator.h", "resources/trt_int8_calibrator.h", "resources/trt_resource_manager.h", "resources/trt_resources.h", ], deps = [ + ":trt_allocator", ":trt_logging", ":utils", "//tensorflow/core:framework_headers_lib", @@ -216,6 +216,31 @@ tf_cuda_library( ]), ) +tf_cuda_library( + name = "trt_allocator", + srcs = ["resources/trt_allocator.cc"], + hdrs = ["resources/trt_allocator.h"], + deps = [ + "//tensorflow/core:framework_headers_lib", + "//tensorflow/core:framework_lite", + "//tensorflow/core:lib_proto_parsing", + ] + if_tensorrt([ + "@local_config_tensorrt//:nv_infer", + ]), +) + +tf_cc_test( + name = "trt_allocator_test", + size = "small", + srcs = ["resources/trt_allocator_test.cc"], + tags = ["no_windows"], + deps = [ + ":trt_allocator", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + # Library for the node-level conversion portion of TensorRT operation creation tf_cuda_library( name = "trt_conversion", @@ -231,6 +256,7 @@ tf_cuda_library( ], deps = [ ":segment", + ":trt_allocator", ":trt_plugins", ":trt_logging", ":trt_resources", diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.h b/tensorflow/contrib/tensorrt/convert/convert_nodes.h index 1a4c0e755d..81baf8e7c1 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h @@ -23,6 +23,7 @@ limitations under the License. #include <vector> #include "tensorflow/contrib/tensorrt/convert/utils.h" +#include "tensorflow/contrib/tensorrt/log/trt_logger.h" #include "tensorflow/contrib/tensorrt/resources/trt_allocator.h" #include "tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h" #include "tensorflow/core/framework/graph.pb.h" diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator.cc b/tensorflow/contrib/tensorrt/resources/trt_allocator.cc index 81d7330b49..d8f97bfbbc 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_allocator.cc +++ b/tensorflow/contrib/tensorrt/resources/trt_allocator.cc @@ -19,12 +19,42 @@ limitations under the License. #if GOOGLE_CUDA #if GOOGLE_TENSORRT +#include "cuda/include/cuda_runtime_api.h" +#endif // GOOGLE_TENSORRT +#endif // GOOGLE_CUDA + +namespace tensorflow { +namespace tensorrt { + +// std::align is not supported, so this method mimic its behavior. +void* Align(size_t alignment, size_t size, void*& ptr, size_t& space) { + QCHECK_GT(alignment, 0) << "alignment must be greater than 0."; + QCHECK_EQ(0, alignment & (alignment - 1)) << "Alignment must be power of 2."; + QCHECK_GT(size, 0) << "size must be greater than 0."; + QCHECK(ptr) << "ptr must not be nullptr."; + QCHECK_GT(space, 0) << "space must be greater than 0."; + const uintptr_t ptr_val = reinterpret_cast<uintptr_t>(ptr); + QCHECK_GE(ptr_val + space, ptr_val) << "Provided space overflows."; + if (size > space) return nullptr; + const uintptr_t aligned_ptr_val = ((ptr_val + alignment - 1) & -alignment); + if (aligned_ptr_val > ptr_val + space - size) return nullptr; + ptr = reinterpret_cast<void*>(aligned_ptr_val); + const uintptr_t diff = aligned_ptr_val - ptr_val; + space -= diff; + return ptr; +} + +} // namespace tensorrt +} // namespace tensorflow + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT #if NV_TENSORRT_MAJOR > 2 -#include "cuda/include/cuda_runtime_api.h" namespace tensorflow { namespace tensorrt { + void* TRTCudaAllocator::allocate(uint64_t size, uint64_t alignment, uint32_t flags) { assert((alignment & (alignment - 1)) == 0); // zero or a power of 2. @@ -44,17 +74,16 @@ void* TRTDeviceAllocator::allocate(uint64_t size, uint64_t alignment, assert((alignment & (alignment - 1)) == 0); // zero or a power of 2. size_t total_size = size + alignment; void* mem = allocator_->AllocateRaw(alignment, total_size); - if (!mem) { - return nullptr; - } + if (!mem) return nullptr; void* alloc_mem = mem; - CHECK(std::align(alignment, size, mem, total_size)); + QCHECK(Align(alignment, size, mem, total_size)); if (mem != alloc_mem) { - CHECK(mem_map_.insert({mem, alloc_mem}).second); + QCHECK(mem_map_.insert({mem, alloc_mem}).second); } - VLOG(2) << "Allocated " << size << " bytes with alignment " << alignment - << " @ " << mem; + VLOG(2) << "Allocated " << total_size << " bytes memory @" << alloc_mem + << "; aligned to " << size << " bytes @" << mem << " with alignment " + << alignment; return mem; } @@ -80,5 +109,5 @@ void TRTDeviceAllocator::free(void* memory) { } // namespace tensorflow #endif -#endif -#endif +#endif // GOOGLE_TENSORRT +#endif // GOOGLE_CUDA diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator.h b/tensorflow/contrib/tensorrt/resources/trt_allocator.h index b8825b108d..6f94492083 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_allocator.h +++ b/tensorflow/contrib/tensorrt/resources/trt_allocator.h @@ -16,13 +16,25 @@ limitations under the License. #ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_ALLOCATOR_H_ #define TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_ALLOCATOR_H_ -#include "tensorflow/contrib/tensorrt/log/trt_logger.h" +#include <unordered_map> + #include "tensorflow/core/framework/allocator.h" #if GOOGLE_CUDA #if GOOGLE_TENSORRT #include "tensorrt/include/NvInfer.h" +#endif // GOOGLE_TENSORRT +#endif // GOOGLE_CUDA + +namespace tensorflow { +namespace tensorrt { +// std::align is not supported, so this function mimic its behavior. +void* Align(size_t alignment, size_t size, void*& ptr, size_t& space); +} // namespace tensorrt +} // namespace tensorflow +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT #if NV_TENSORRT_MAJOR == 3 // Define interface here temporarily until TRT 4.0 is released namespace nvinfer1 { diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc b/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc new file mode 100644 index 0000000000..f515ed03f2 --- /dev/null +++ b/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc @@ -0,0 +1,79 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/tensorrt/resources/trt_allocator.h" + +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace tensorrt { + +bool RunTest(const size_t alignment, const size_t size, + const intptr_t orig_ptr_val, const size_t orig_space) { + void* const orig_ptr = reinterpret_cast<void*>(orig_ptr_val); + void* ptr = orig_ptr; + size_t space = orig_space; + void* result = Align(alignment, size, ptr, space); + if (result == nullptr) { + EXPECT_EQ(orig_ptr, ptr); + EXPECT_EQ(orig_space, space); + return false; + } else { + EXPECT_EQ(result, ptr); + const intptr_t ptr_val = reinterpret_cast<intptr_t>(ptr); + EXPECT_EQ(0, ptr_val % alignment); + EXPECT_GE(ptr_val, orig_ptr_val); + EXPECT_GE(space, size); + EXPECT_LE(space, orig_space); + EXPECT_EQ(ptr_val + space, orig_ptr_val + orig_space); + return true; + } +} + +TEST(TRTAllocatorTest, Align) { + for (const size_t space : + {1, 2, 3, 4, 7, 8, 9, 10, 16, 32, 511, 512, 513, 700, 12345}) { + for (size_t alignment = 1; alignment <= space * 4; alignment *= 2) { + for (const intptr_t ptr_val : + {1ul, alignment == 1 ? 1ul : alignment - 1, alignment, alignment + 1, + alignment + (alignment / 2)}) { + if (ptr_val % alignment == 0) { + for (const size_t size : + {1ul, space == 1 ? 1ul : space - 1, space, space + 1}) { + EXPECT_EQ(space >= size, RunTest(alignment, size, ptr_val, space)); + } + } else { + EXPECT_FALSE(RunTest(alignment, space, ptr_val, space)); + const size_t diff = alignment - ptr_val % alignment; + if (space > diff) { + EXPECT_TRUE( + RunTest(alignment, space - diff, ptr_val + diff, space - diff)); + for (const size_t size : + {1ul, space - diff > 1 ? space - diff - 1 : 1ul, space - diff, + space - diff + 1, space - 1}) { + EXPECT_EQ(space - diff >= size, + RunTest(alignment, size, ptr_val, space)); + } + } else { + EXPECT_FALSE(RunTest(alignment, 1, ptr_val, space)); + } + } + } + } + } +} + +} // namespace tensorrt +} // namespace tensorflow |